add subdomain url extractor; add 3p script blocker modifier
This commit is contained in:
33
proxychain/responsemodifiers/block_third_party_scripts.go
Normal file
33
proxychain/responsemodifiers/block_third_party_scripts.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
|
||||
func BlockThirdPartyScripts() proxychain.ResponseModification {
|
||||
// TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
|
||||
originalURI := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||
|
||||
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
|
||||
rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
|
||||
blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = blockJSRewriter
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
//go:embed vendor/patch_google_analytics.js
|
||||
var gaPatch string
|
||||
|
||||
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
|
||||
// Some sites will not display content until GA is loaded, so we fake one instead.
|
||||
// Credit to Raymond Hill @ github.com/gorhill/uBlock
|
||||
func PatchGoogleAnalytics() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
|
||||
// preflight check
|
||||
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
|
||||
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
|
||||
if !(isGADomain || isGAPath) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// send modified js payload to client containing
|
||||
// stub functions from patch_google_analytics.js
|
||||
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
|
||||
chain.Response.Body = gaPatchReader
|
||||
chain.Context.Set("content-type", "text/javascript")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
|
||||
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
|
||||
type BlockThirdPartyScriptsRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
|
||||
// This rewriter will strip out 3rd party JS URLs from script tags.
|
||||
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
|
||||
return &BlockThirdPartyScriptsRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
|
||||
if token.DataAtom != atom.Script {
|
||||
return false
|
||||
}
|
||||
|
||||
// check for 3p .js urls in html elements
|
||||
for i := range token.Attr {
|
||||
attr := token.Attr[i]
|
||||
switch {
|
||||
case attr.Key != "src":
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
return false
|
||||
case !strings.HasPrefix(attr.Val, "http"):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
if attr.Key != "src" {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(attr.Val, "http") {
|
||||
continue
|
||||
}
|
||||
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
|
||||
attr.Key = "blocked"
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
@@ -14,8 +16,9 @@ import (
|
||||
// ScriptInjectorRewriter is a struct that injects JS into the page
|
||||
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
|
||||
type ScriptInjectorRewriter struct {
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
scriptMD5 string
|
||||
}
|
||||
|
||||
type ScriptExecTime int
|
||||
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
|
||||
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
|
||||
switch {
|
||||
case r.execTime == BeforeDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
|
||||
|
||||
case r.execTime == AfterDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
|
||||
|
||||
case r.execTime == AfterDOMIdle:
|
||||
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
|
||||
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
|
||||
func generateMD5Hash(input string) string {
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(input))
|
||||
return hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
|
||||
// applies parameters by string replacement of the template script
|
||||
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// Sort the keys by length in descending order
|
||||
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||
scriptMD5 := generateMD5Hash(script)
|
||||
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
|
||||
|
||||
return &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
execTime: execTime,
|
||||
script: executeOnceScript,
|
||||
scriptMD5: scriptMD5,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
|
||||
// the params map represents the key-value pair of the params.
|
||||
// the key will be string replaced with the value
|
||||
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
|
||||
rr := &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
rr := NewScriptInjectorRewriter(script, execTime)
|
||||
rr.applyParams(params)
|
||||
return rr
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@
|
||||
|
||||
// monkey patch xmlhttprequest
|
||||
const oldOpen = XMLHttpRequest.prototype.open;
|
||||
XMLHttpRequest.prototype.open = function (
|
||||
XMLHttpRequest.prototype.open = function(
|
||||
method,
|
||||
url,
|
||||
async = true,
|
||||
@@ -150,7 +150,7 @@
|
||||
);
|
||||
|
||||
const oldSend = XMLHttpRequest.prototype.send;
|
||||
XMLHttpRequest.prototype.send = function (method, url) {
|
||||
XMLHttpRequest.prototype.send = function(method, url) {
|
||||
return oldSend.call(this, method, rewriteURL(url));
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
@@ -160,6 +160,7 @@
|
||||
);
|
||||
|
||||
// monkey patch service worker registration
|
||||
/*
|
||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
|
||||
return oldRegister.call(this, rewriteURL(scriptURL), options);
|
||||
@@ -169,10 +170,11 @@
|
||||
"register",
|
||||
"function register() { [native code] }",
|
||||
);
|
||||
*/
|
||||
|
||||
// monkey patch URL.toString() method
|
||||
const oldToString = URL.prototype.toString;
|
||||
URL.prototype.toString = function () {
|
||||
URL.prototype.toString = function() {
|
||||
let originalURL = oldToString.call(this);
|
||||
return rewriteURL(originalURL);
|
||||
};
|
||||
@@ -184,7 +186,7 @@
|
||||
|
||||
// monkey patch URL.toJSON() method
|
||||
const oldToJson = URL.prototype.toString;
|
||||
URL.prototype.toString = function () {
|
||||
URL.prototype.toString = function() {
|
||||
let originalURL = oldToJson.call(this);
|
||||
return rewriteURL(originalURL);
|
||||
};
|
||||
@@ -200,11 +202,11 @@
|
||||
"href",
|
||||
);
|
||||
Object.defineProperty(URL.prototype, "href", {
|
||||
get: function () {
|
||||
get: function() {
|
||||
let originalHref = originalHrefDescriptor.get.call(this);
|
||||
return rewriteURL(originalHref);
|
||||
},
|
||||
set: function (newValue) {
|
||||
set: function(newValue) {
|
||||
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
|
||||
},
|
||||
});
|
||||
@@ -283,7 +285,7 @@
|
||||
|
||||
// monkey-patching Element.setAttribute
|
||||
const originalSetAttribute = Element.prototype.setAttribute;
|
||||
Element.prototype.setAttribute = function (name, value) {
|
||||
Element.prototype.setAttribute = function(name, value) {
|
||||
const isMatchingElement = elements.some((element) => {
|
||||
return this.tagName.toLowerCase() === element.tag &&
|
||||
name.toLowerCase() === element.attribute;
|
||||
|
||||
Reference in New Issue
Block a user