From 6a5b85f260a8091a0692f5002ef77dc79221fc1a Mon Sep 17 00:00:00 2001 From: Kevin Pham Date: Wed, 6 Dec 2023 14:18:40 -0600 Subject: [PATCH] add subdomain url extractor; add 3p script blocker modifier --- handlers/proxy.go | 8 ++- proxychain/proxychain.go | 50 +++++++++++++- .../requestmodifiers/spoof_ja3_fingerprint.go | 52 -------------- .../block_third_party_scripts.go | 33 +++++++++ .../patch_google_analytics.go | 34 --------- .../rewriters/block_third_party_scripts.go | 69 +++++++++++++++++++ .../rewriters/script_injector_rewriter.go | 33 ++++++--- .../vendor/patch_dynamic_resource_urls.js | 16 +++-- proxychain/ruleset/rule_resmod_types.gen.go | 8 +-- 9 files changed, 189 insertions(+), 114 deletions(-) delete mode 100644 proxychain/requestmodifiers/spoof_ja3_fingerprint.go create mode 100644 proxychain/responsemodifiers/block_third_party_scripts.go delete mode 100644 proxychain/responsemodifiers/patch_google_analytics.go create mode 100644 proxychain/responsemodifiers/rewriters/block_third_party_scripts.go diff --git a/handlers/proxy.go b/handlers/proxy.go index 6e3b750..7a04fa4 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -34,7 +34,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetRequestModifications( //rx.SpoofJA3fingerprint(ja3, "Googlebot"), rx.AddCacheBusterQuery(), - rx.MasqueradeAsGoogleBot(), + //rx.MasqueradeAsGoogleBot(), rx.ForwardRequestHeaders(), rx.DeleteOutgoingCookies(), rx.SpoofReferrerFromRedditPost(), @@ -44,15 +44,17 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { ). AddResponseModifications( //tx.ForwardResponseHeaders(), + //tx.BlockThirdPartyScripts(), tx.DeleteIncomingCookies(), tx.DeleteLocalStorageData(), tx.DeleteSessionStorageData(), tx.BypassCORS(), tx.BypassContentSecurityPolicy(), tx.RewriteHTMLResourceURLs(), - tx.PatchTrackerScripts(), tx.PatchDynamicResourceURLs(), - //tx.BlockElementRemoval(".article-content"), + tx.PatchTrackerScripts(), + //tx.BlockElementRemoval(".article-content"), // techcrunch + tx.BlockElementRemoval(".available-content"), // substack // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), ) diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index ceb0712..4f3df50 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -234,9 +234,53 @@ func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.U return preventRecursiveProxyRequest(fixedURL, baseProxyURL) } -// extractURL extracts a URL from the request ctx. If the URL in the request -// is a relative path, it reconstructs the full URL using the referer header. +// extractURL extracts a URL from the request ctx func (chain *ProxyChain) extractURL() (*url.URL, error) { + isLocal := strings.HasPrefix(chain.Context.BaseURL(), "http://localhost") || strings.HasPrefix(chain.Context.BaseURL(), "http://127.0.0.1") + isReqPath := strings.HasPrefix(chain.Context.Path(), "/http") + isAPI := strings.HasPrefix(chain.Context.Path(), "/api") + isOutline := strings.HasPrefix(chain.Context.Path(), "/outline") + + if isLocal || isReqPath || isAPI || isOutline { + return chain.extractURLFromPath() + } + + u, err := url.Parse(chain.Context.BaseURL()) + if err != nil { + return &url.URL{}, err + } + parts := strings.Split(u.Hostname(), ".") + if len(parts) < 2 { + fmt.Println("path") + return chain.extractURLFromPath() + } + + return chain.extractURLFromSubdomain() +} + +// extractURLFromPath extracts a URL from the request ctx if subdomains are used. +func (chain *ProxyChain) extractURLFromSubdomain() (*url.URL, error) { + u, err := url.Parse(chain.Context.BaseURL()) + if err != nil { + return &url.URL{}, err + } + parts := strings.Split(u.Hostname(), ".") + if len(parts) < 2 { + // no subdomain set, fallback to path extraction + //panic("asdf") + return chain.extractURLFromPath() + } + subdomain := strings.Join(parts[:len(parts)-2], ".") + subURL := subdomain + subURL = strings.ReplaceAll(subURL, "--", "|") + subURL = strings.ReplaceAll(subURL, "-", ".") + subURL = strings.ReplaceAll(subURL, "|", "-") + return url.Parse(fmt.Sprintf("https://%s/%s", subURL, u.Path)) +} + +// extractURLFromPath extracts a URL from the request ctx. If the URL in the request +// is a relative path, it reconstructs the full URL using the referer header. +func (chain *ProxyChain) extractURLFromPath() (*url.URL, error) { reqURL := chain.Context.Params("*") fmt.Println("XXXXXXXXXXXXXXXX") @@ -316,7 +360,7 @@ func (chain *ProxyChain) validateCtxIsSet() error { if chain.Context != nil { return nil } - err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetCtx()") + err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetFiberCtx()") chain.abortErr = chain.abort(err) return chain.abortErr } diff --git a/proxychain/requestmodifiers/spoof_ja3_fingerprint.go b/proxychain/requestmodifiers/spoof_ja3_fingerprint.go deleted file mode 100644 index 84a70a7..0000000 --- a/proxychain/requestmodifiers/spoof_ja3_fingerprint.go +++ /dev/null @@ -1,52 +0,0 @@ -package requestmodifiers - -// removed due to using a different TLS spoofing technique - -/* -import ( - //"github.com/Danny-Dasilva/CycleTLS/cycletls" - //http "github.com/Danny-Dasilva/fhttp" - //http "github.com/bogdanfinn/fhttp" - - "golang.org/x/net/proxy" - "ladder/proxychain" -) - -// SpoofJA3fingerprint modifies the TLS client and user agent to spoof a particular JA3 fingerprint -// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear -// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/ -// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/ -func SpoofJA3fingerprint(ja3 string, userAgent string) proxychain.RequestModification { - //fmt.Println(ja3) - return func(chain *proxychain.ProxyChain) error { - // deep copy existing client while modifying http transport - ja3SpoofClient := &http.Client{ - Transport: cycletls.NewTransport(ja3, userAgent), - Timeout: chain.Client.Timeout, - CheckRedirect: chain.Client.CheckRedirect, - } - - chain.SetOnceHTTPClient(ja3SpoofClient) - return nil - } -} - -// SpoofJA3fingerprintWithProxy modifies the TLS client and user agent to spoof a particular JA3 fingerprint and use a proxy.ContextDialer from the "golang.org/x/net/proxy" -// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear -// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/ -// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/ -func SpoofJA3fingerprintWithProxy(ja3 string, userAgent string, proxy proxy.ContextDialer) proxychain.RequestModification { - return func(chain *proxychain.ProxyChain) error { - - // deep copy existing client while modifying http transport - ja3SpoofClient := &http.Client{ - Transport: cycletls.NewTransportWithProxy(ja3, userAgent, proxy), - Timeout: chain.Client.Timeout, - CheckRedirect: chain.Client.CheckRedirect, - } - - chain.SetOnceHTTPClient(ja3SpoofClient) - return nil - } -} -*/ diff --git a/proxychain/responsemodifiers/block_third_party_scripts.go b/proxychain/responsemodifiers/block_third_party_scripts.go new file mode 100644 index 0000000..398a6cc --- /dev/null +++ b/proxychain/responsemodifiers/block_third_party_scripts.go @@ -0,0 +1,33 @@ +package responsemodifiers + +import ( + _ "embed" + "fmt" + "strings" + + "ladder/proxychain" + "ladder/proxychain/responsemodifiers/rewriters" +) + +// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading. +func BlockThirdPartyScripts() proxychain.ResponseModification { + // TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well. + return func(chain *proxychain.ProxyChain) error { + // don't add rewriter if it's not even html + ct := chain.Response.Header.Get("content-type") + if !strings.HasPrefix(ct, "text/html") { + return nil + } + + // proxyURL is the URL of the ladder: http://localhost:8080 (ladder) + originalURI := chain.Context.Request().URI() + proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host()) + + // replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes + rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL) + blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr) + chain.Response.Body = blockJSRewriter + + return nil + } +} diff --git a/proxychain/responsemodifiers/patch_google_analytics.go b/proxychain/responsemodifiers/patch_google_analytics.go deleted file mode 100644 index 4427ea5..0000000 --- a/proxychain/responsemodifiers/patch_google_analytics.go +++ /dev/null @@ -1,34 +0,0 @@ -package responsemodifiers - -import ( - _ "embed" - "io" - "strings" - - "ladder/proxychain" -) - -//go:embed vendor/patch_google_analytics.js -var gaPatch string - -// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function. -// Some sites will not display content until GA is loaded, so we fake one instead. -// Credit to Raymond Hill @ github.com/gorhill/uBlock -func PatchGoogleAnalytics() proxychain.ResponseModification { - return func(chain *proxychain.ProxyChain) error { - - // preflight check - isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com" - isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js") - if !(isGADomain || isGAPath) { - return nil - } - - // send modified js payload to client containing - // stub functions from patch_google_analytics.js - gaPatchReader := io.NopCloser(strings.NewReader(gaPatch)) - chain.Response.Body = gaPatchReader - chain.Context.Set("content-type", "text/javascript") - return nil - } -} diff --git a/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go b/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go new file mode 100644 index 0000000..82e54e0 --- /dev/null +++ b/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go @@ -0,0 +1,69 @@ +package rewriters + +import ( + _ "embed" + "fmt" + "log" + "net/url" + "strings" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter +// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked" +type BlockThirdPartyScriptsRewriter struct { + baseURL *url.URL + proxyURL string // ladder URL, not proxied site URL +} + +// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter. +// This rewriter will strip out 3rd party JS URLs from script tags. +func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter { + return &BlockThirdPartyScriptsRewriter{ + baseURL: baseURL, + proxyURL: proxyURL, + } +} + +func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool { + if token.DataAtom != atom.Script { + return false + } + + // check for 3p .js urls in html elements + for i := range token.Attr { + attr := token.Attr[i] + switch { + case attr.Key != "src": + continue + case strings.HasPrefix(attr.Val, "/"): + return false + case !strings.HasPrefix(attr.Val, "http"): + return false + case strings.HasPrefix(attr.Val, r.proxyURL): + return false + case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())): + return false + } + } + + return true +} + +func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) { + for i := range token.Attr { + attr := &token.Attr[i] + if attr.Key != "src" { + continue + } + + if !strings.HasPrefix(attr.Val, "http") { + continue + } + log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String()) + attr.Key = "blocked" + } + return "", "" +} diff --git a/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go b/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go index d6e63b5..be82da5 100644 --- a/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go +++ b/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go @@ -6,6 +6,8 @@ import ( "sort" "strings" + "crypto/md5" + "encoding/hex" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) @@ -14,8 +16,9 @@ import ( // ScriptInjectorRewriter is a struct that injects JS into the page // It uses an HTML tokenizer to process HTML content and injects JS at a specified location type ScriptInjectorRewriter struct { - execTime ScriptExecTime - script string + execTime ScriptExecTime + script string + scriptMD5 string } type ScriptExecTime int @@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) { switch { case r.execTime == BeforeDOMContentLoaded: - return "", fmt.Sprintf("\n\n", r.script) + return "", fmt.Sprintf("\n\n", r.scriptMD5, r.script) case r.execTime == AfterDOMContentLoaded: - return "", fmt.Sprintf("\n", r.script) + return "", fmt.Sprintf("\n", r.scriptMD5, r.script) case r.execTime == AfterDOMIdle: s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1) - return "", fmt.Sprintf("\n\n", s) + return "", fmt.Sprintf("\n\n", r.scriptMD5, s) default: return "", "" } } +// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string +func generateMD5Hash(input string) string { + hasher := md5.New() + hasher.Write([]byte(input)) + return hex.EncodeToString(hasher.Sum(nil)) +} + // applies parameters by string replacement of the template script func (r *ScriptInjectorRewriter) applyParams(params map[string]string) { // Sort the keys by length in descending order @@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) { // NewScriptInjectorRewriter implements a HtmlTokenRewriter // and injects JS into the page for execution at a particular time func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter { + scriptMD5 := generateMD5Hash(script) + executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5) + return &ScriptInjectorRewriter{ - execTime: execTime, - script: script, + execTime: execTime, + script: executeOnceScript, + scriptMD5: scriptMD5, } } @@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn // the params map represents the key-value pair of the params. // the key will be string replaced with the value func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter { - rr := &ScriptInjectorRewriter{ - execTime: execTime, - script: script, - } + rr := NewScriptInjectorRewriter(script, execTime) rr.applyParams(params) return rr } diff --git a/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js b/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js index 3e04400..74eba11 100644 --- a/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js +++ b/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js @@ -134,7 +134,7 @@ // monkey patch xmlhttprequest const oldOpen = XMLHttpRequest.prototype.open; - XMLHttpRequest.prototype.open = function ( + XMLHttpRequest.prototype.open = function( method, url, async = true, @@ -150,7 +150,7 @@ ); const oldSend = XMLHttpRequest.prototype.send; - XMLHttpRequest.prototype.send = function (method, url) { + XMLHttpRequest.prototype.send = function(method, url) { return oldSend.call(this, method, rewriteURL(url)); }; hideMonkeyPatch( @@ -160,6 +160,7 @@ ); // monkey patch service worker registration + /* const oldRegister = ServiceWorkerContainer.prototype.register; ServiceWorkerContainer.prototype.register = function (scriptURL, options) { return oldRegister.call(this, rewriteURL(scriptURL), options); @@ -169,10 +170,11 @@ "register", "function register() { [native code] }", ); + */ // monkey patch URL.toString() method const oldToString = URL.prototype.toString; - URL.prototype.toString = function () { + URL.prototype.toString = function() { let originalURL = oldToString.call(this); return rewriteURL(originalURL); }; @@ -184,7 +186,7 @@ // monkey patch URL.toJSON() method const oldToJson = URL.prototype.toString; - URL.prototype.toString = function () { + URL.prototype.toString = function() { let originalURL = oldToJson.call(this); return rewriteURL(originalURL); }; @@ -200,11 +202,11 @@ "href", ); Object.defineProperty(URL.prototype, "href", { - get: function () { + get: function() { let originalHref = originalHrefDescriptor.get.call(this); return rewriteURL(originalHref); }, - set: function (newValue) { + set: function(newValue) { originalHrefDescriptor.set.call(this, rewriteURL(newValue)); }, }); @@ -283,7 +285,7 @@ // monkey-patching Element.setAttribute const originalSetAttribute = Element.prototype.setAttribute; - Element.prototype.setAttribute = function (name, value) { + Element.prototype.setAttribute = function(name, value) { const isMatchingElement = elements.some((element) => { return this.tagName.toLowerCase() === element.tag && name.toLowerCase() === element.attribute; diff --git a/proxychain/ruleset/rule_resmod_types.gen.go b/proxychain/ruleset/rule_resmod_types.gen.go index dfff064..eb42a02 100644 --- a/proxychain/ruleset/rule_resmod_types.gen.go +++ b/proxychain/ruleset/rule_resmod_types.gen.go @@ -24,6 +24,10 @@ func init() { return tx.BlockElementRemoval(params[0]) } + rsmModMap["BlockThirdPartyScripts"] = func(_ ...string) proxychain.ResponseModification { + return tx.BlockThirdPartyScripts() + } + rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification { return tx.BypassCORS() } @@ -92,10 +96,6 @@ func init() { return tx.PatchDynamicResourceURLs() } - rsmModMap["PatchGoogleAnalytics"] = func(_ ...string) proxychain.ResponseModification { - return tx.PatchGoogleAnalytics() - } - rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification { return tx.PatchTrackerScripts() }