add subdomain url extractor; add 3p script blocker modifier
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
|
||||
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
|
||||
type BlockThirdPartyScriptsRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
|
||||
// This rewriter will strip out 3rd party JS URLs from script tags.
|
||||
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
|
||||
return &BlockThirdPartyScriptsRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
|
||||
if token.DataAtom != atom.Script {
|
||||
return false
|
||||
}
|
||||
|
||||
// check for 3p .js urls in html elements
|
||||
for i := range token.Attr {
|
||||
attr := token.Attr[i]
|
||||
switch {
|
||||
case attr.Key != "src":
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
return false
|
||||
case !strings.HasPrefix(attr.Val, "http"):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
if attr.Key != "src" {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(attr.Val, "http") {
|
||||
continue
|
||||
}
|
||||
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
|
||||
attr.Key = "blocked"
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
Reference in New Issue
Block a user