Files
hadrian/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go

70 lines
1.7 KiB
Go

package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
type BlockThirdPartyScriptsRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
// This rewriter will strip out 3rd party JS URLs from script tags.
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
return &BlockThirdPartyScriptsRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
if token.DataAtom != atom.Script {
return false
}
// check for 3p .js urls in html elements
for i := range token.Attr {
attr := token.Attr[i]
switch {
case attr.Key != "src":
continue
case strings.HasPrefix(attr.Val, "/"):
return false
case !strings.HasPrefix(attr.Val, "http"):
return false
case strings.HasPrefix(attr.Val, r.proxyURL):
return false
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
return false
}
}
return true
}
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
if attr.Key != "src" {
continue
}
if !strings.HasPrefix(attr.Val, "http") {
continue
}
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
attr.Key = "blocked"
}
return "", ""
}