70 lines
1.7 KiB
Go
70 lines
1.7 KiB
Go
package rewriters
|
|
|
|
import (
|
|
_ "embed"
|
|
"fmt"
|
|
"log"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
|
|
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
|
|
type BlockThirdPartyScriptsRewriter struct {
|
|
baseURL *url.URL
|
|
proxyURL string // ladder URL, not proxied site URL
|
|
}
|
|
|
|
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
|
|
// This rewriter will strip out 3rd party JS URLs from script tags.
|
|
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
|
|
return &BlockThirdPartyScriptsRewriter{
|
|
baseURL: baseURL,
|
|
proxyURL: proxyURL,
|
|
}
|
|
}
|
|
|
|
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
|
|
if token.DataAtom != atom.Script {
|
|
return false
|
|
}
|
|
|
|
// check for 3p .js urls in html elements
|
|
for i := range token.Attr {
|
|
attr := token.Attr[i]
|
|
switch {
|
|
case attr.Key != "src":
|
|
continue
|
|
case strings.HasPrefix(attr.Val, "/"):
|
|
return false
|
|
case !strings.HasPrefix(attr.Val, "http"):
|
|
return false
|
|
case strings.HasPrefix(attr.Val, r.proxyURL):
|
|
return false
|
|
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
|
|
for i := range token.Attr {
|
|
attr := &token.Attr[i]
|
|
if attr.Key != "src" {
|
|
continue
|
|
}
|
|
|
|
if !strings.HasPrefix(attr.Val, "http") {
|
|
continue
|
|
}
|
|
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
|
|
attr.Key = "blocked"
|
|
}
|
|
return "", ""
|
|
}
|