add subdomain url extractor; add 3p script blocker modifier

This commit is contained in:
Kevin Pham
2023-12-06 14:18:40 -06:00
parent 0e940ec217
commit 6a5b85f260
9 changed files with 189 additions and 114 deletions

View File

@@ -0,0 +1,33 @@
package responsemodifiers
import (
_ "embed"
"fmt"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
func BlockThirdPartyScripts() proxychain.ResponseModification {
// TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = blockJSRewriter
return nil
}
}

View File

@@ -1,34 +0,0 @@
package responsemodifiers
import (
_ "embed"
"io"
"strings"
"ladder/proxychain"
)
//go:embed vendor/patch_google_analytics.js
var gaPatch string
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
// Some sites will not display content until GA is loaded, so we fake one instead.
// Credit to Raymond Hill @ github.com/gorhill/uBlock
func PatchGoogleAnalytics() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// preflight check
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
if !(isGADomain || isGAPath) {
return nil
}
// send modified js payload to client containing
// stub functions from patch_google_analytics.js
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
chain.Response.Body = gaPatchReader
chain.Context.Set("content-type", "text/javascript")
return nil
}
}

View File

@@ -0,0 +1,69 @@
package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
type BlockThirdPartyScriptsRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
// This rewriter will strip out 3rd party JS URLs from script tags.
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
return &BlockThirdPartyScriptsRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
if token.DataAtom != atom.Script {
return false
}
// check for 3p .js urls in html elements
for i := range token.Attr {
attr := token.Attr[i]
switch {
case attr.Key != "src":
continue
case strings.HasPrefix(attr.Val, "/"):
return false
case !strings.HasPrefix(attr.Val, "http"):
return false
case strings.HasPrefix(attr.Val, r.proxyURL):
return false
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
return false
}
}
return true
}
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
if attr.Key != "src" {
continue
}
if !strings.HasPrefix(attr.Val, "http") {
continue
}
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
attr.Key = "blocked"
}
return "", ""
}

View File

@@ -6,6 +6,8 @@ import (
"sort"
"strings"
"crypto/md5"
"encoding/hex"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -14,8 +16,9 @@ import (
// ScriptInjectorRewriter is a struct that injects JS into the page
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
type ScriptInjectorRewriter struct {
execTime ScriptExecTime
script string
execTime ScriptExecTime
script string
scriptMD5 string
}
type ScriptExecTime int
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch {
case r.execTime == BeforeDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
default:
return "", ""
}
}
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
func generateMD5Hash(input string) string {
hasher := md5.New()
hasher.Write([]byte(input))
return hex.EncodeToString(hasher.Sum(nil))
}
// applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
scriptMD5 := generateMD5Hash(script)
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
return &ScriptInjectorRewriter{
execTime: execTime,
script: script,
execTime: execTime,
script: executeOnceScript,
scriptMD5: scriptMD5,
}
}
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
// the params map represents the key-value pair of the params.
// the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
rr := &ScriptInjectorRewriter{
execTime: execTime,
script: script,
}
rr := NewScriptInjectorRewriter(script, execTime)
rr.applyParams(params)
return rr
}

View File

@@ -134,7 +134,7 @@
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (
XMLHttpRequest.prototype.open = function(
method,
url,
async = true,
@@ -150,7 +150,7 @@
);
const oldSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function (method, url) {
XMLHttpRequest.prototype.send = function(method, url) {
return oldSend.call(this, method, rewriteURL(url));
};
hideMonkeyPatch(
@@ -160,6 +160,7 @@
);
// monkey patch service worker registration
/*
const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options);
@@ -169,10 +170,11 @@
"register",
"function register() { [native code] }",
);
*/
// monkey patch URL.toString() method
const oldToString = URL.prototype.toString;
URL.prototype.toString = function () {
URL.prototype.toString = function() {
let originalURL = oldToString.call(this);
return rewriteURL(originalURL);
};
@@ -184,7 +186,7 @@
// monkey patch URL.toJSON() method
const oldToJson = URL.prototype.toString;
URL.prototype.toString = function () {
URL.prototype.toString = function() {
let originalURL = oldToJson.call(this);
return rewriteURL(originalURL);
};
@@ -200,11 +202,11 @@
"href",
);
Object.defineProperty(URL.prototype, "href", {
get: function () {
get: function() {
let originalHref = originalHrefDescriptor.get.call(this);
return rewriteURL(originalHref);
},
set: function (newValue) {
set: function(newValue) {
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
},
});
@@ -283,7 +285,7 @@
// monkey-patching Element.setAttribute
const originalSetAttribute = Element.prototype.setAttribute;
Element.prototype.setAttribute = function (name, value) {
Element.prototype.setAttribute = function(name, value) {
const isMatchingElement = elements.some((element) => {
return this.tagName.toLowerCase() === element.tag &&
name.toLowerCase() === element.attribute;