add subdomain url extractor; add 3p script blocker modifier

This commit is contained in:
Kevin Pham
2023-12-06 14:18:40 -06:00
parent 0e940ec217
commit 6a5b85f260
9 changed files with 189 additions and 114 deletions

View File

@@ -0,0 +1,69 @@
package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
type BlockThirdPartyScriptsRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
// This rewriter will strip out 3rd party JS URLs from script tags.
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
return &BlockThirdPartyScriptsRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
if token.DataAtom != atom.Script {
return false
}
// check for 3p .js urls in html elements
for i := range token.Attr {
attr := token.Attr[i]
switch {
case attr.Key != "src":
continue
case strings.HasPrefix(attr.Val, "/"):
return false
case !strings.HasPrefix(attr.Val, "http"):
return false
case strings.HasPrefix(attr.Val, r.proxyURL):
return false
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
return false
}
}
return true
}
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
if attr.Key != "src" {
continue
}
if !strings.HasPrefix(attr.Val, "http") {
continue
}
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
attr.Key = "blocked"
}
return "", ""
}

View File

@@ -6,6 +6,8 @@ import (
"sort"
"strings"
"crypto/md5"
"encoding/hex"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -14,8 +16,9 @@ import (
// ScriptInjectorRewriter is a struct that injects JS into the page
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
type ScriptInjectorRewriter struct {
execTime ScriptExecTime
script string
execTime ScriptExecTime
script string
scriptMD5 string
}
type ScriptExecTime int
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch {
case r.execTime == BeforeDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
default:
return "", ""
}
}
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
func generateMD5Hash(input string) string {
hasher := md5.New()
hasher.Write([]byte(input))
return hex.EncodeToString(hasher.Sum(nil))
}
// applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
scriptMD5 := generateMD5Hash(script)
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
return &ScriptInjectorRewriter{
execTime: execTime,
script: script,
execTime: execTime,
script: executeOnceScript,
scriptMD5: scriptMD5,
}
}
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
// the params map represents the key-value pair of the params.
// the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
rr := &ScriptInjectorRewriter{
execTime: execTime,
script: script,
}
rr := NewScriptInjectorRewriter(script, execTime)
rr.applyParams(params)
return rr
}