From 47def5c610bb13f1fe8266cdabf1e48298910718 Mon Sep 17 00:00:00 2001 From: Kevin Pham Date: Fri, 1 Dec 2023 16:26:59 -0600 Subject: [PATCH] add patch tracker scripts response modifier --- .gitmodules | 3 + handlers/proxy.go | 6 +- .../masquerade_as_trusted_bot.go | 37 +++---- .../requestmodifers/vendor/ua-parser-js | 2 +- .../responsemodifers/patch_tracker_scripts.go | 99 +++++++++++++++++++ .../vendor/ddg-tracker-surrogates | 1 + 6 files changed, 128 insertions(+), 20 deletions(-) create mode 100644 proxychain/responsemodifers/patch_tracker_scripts.go create mode 160000 proxychain/responsemodifers/vendor/ddg-tracker-surrogates diff --git a/.gitmodules b/.gitmodules index 66e57b4..4481990 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "proxychain/requestmodifers/vendor/ua-parser-js"] path = proxychain/requestmodifers/vendor/ua-parser-js url = https://github.com/faisalman/ua-parser-js.git +[submodule "proxychain/responsemodifers/vendor/ddg-tracker-surrogates"] + path = proxychain/responsemodifers/vendor/ddg-tracker-surrogates + url = https://github.com/duckduckgo/tracker-surrogates diff --git a/handlers/proxy.go b/handlers/proxy.go index 07165ac..84e046d 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -33,10 +33,11 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetRequestModifications( // rx.SpoofJA3fingerprint(ja3, "Googlebot"), // rx.MasqueradeAsFacebookBot(), - rx.MasqueradeAsGoogleBot(), + //rx.MasqueradeAsGoogleBot(), rx.DeleteOutgoingCookies(), rx.ForwardRequestHeaders(), - rx.SpoofReferrerFromGoogleSearch(), + //rx.SpoofReferrerFromGoogleSearch(), + rx.SpoofReferrerFromLinkedInPost(), // rx.RequestWaybackMachine(), // rx.RequestArchiveIs(), ). @@ -47,6 +48,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { // tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), tx.PatchDynamicResourceURLs(), + tx.PatchTrackerScripts(), // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), ). Execute() diff --git a/proxychain/requestmodifers/masquerade_as_trusted_bot.go b/proxychain/requestmodifers/masquerade_as_trusted_bot.go index aaa1e9d..7186219 100644 --- a/proxychain/requestmodifers/masquerade_as_trusted_bot.go +++ b/proxychain/requestmodifers/masquerade_as_trusted_bot.go @@ -85,26 +85,29 @@ func masqueradeAsTrustedBot(botUA string, botIP string, ja3 string) proxychain.R // general / nginx SetRequestHeader("X-Forwarded-For", botIP), SetRequestHeader("X-Real-IP", botIP), - // akamai SetRequestHeader("True-Client-IP", botIP), - - // cloudflare - // TODO: this seems to cause issues with CF... figure out workaround or remove - /* - Error 1000 - Ray ID: xxxxxxxxxxxxxxxx • - 2023-12-01 20:09:22 UTC - DNS points to prohibited IP - What happened? - You've requested a page on a website (xxxxxxxxxxxxxxxxxxx) that is on the Cloudflare network. Unfortunately, it is resolving to an IP address that is creating a conflict within Cloudflare's system - */ - - SetRequestHeader("CF-Connecting-IP", botIP), - - // weblogic SetRequestHeader("WL-Proxy-Client-IP", botIP), - // azure SetRequestHeader("X-Cluster-Client-IP", botIP), + /* + // akamai + SetRequestHeader("True-Client-IP", botIP), + + // cloudflare + // TODO: this seems to cause issues with CF... figure out workaround or remove + Error 1000 + Ray ID: xxxxxxxxxxxxxxxx • + 2023-12-01 20:09:22 UTC + DNS points to prohibited IP + What happened? + You've requested a page on a website (xxxxxxxxxxxxxxxxxxx) that is on the Cloudflare network. Unfortunately, it is resolving to an IP address that is creating a conflict within Cloudflare's system + + SetRequestHeader("CF-Connecting-IP", botIP), + + // weblogic + SetRequestHeader("WL-Proxy-Client-IP", botIP), + // azure + SetRequestHeader("X-Cluster-Client-IP", botIP), + */ DeleteRequestHeader("referrer"), DeleteRequestHeader("origin"), diff --git a/proxychain/requestmodifers/vendor/ua-parser-js b/proxychain/requestmodifers/vendor/ua-parser-js index 5173a54..3622b61 160000 --- a/proxychain/requestmodifers/vendor/ua-parser-js +++ b/proxychain/requestmodifers/vendor/ua-parser-js @@ -1 +1 @@ -Subproject commit 5173a5442f4af04b4f9c51519587e7969de9fc15 +Subproject commit 3622b614a71749e6d96f241d6810d6086075e2a4 diff --git a/proxychain/responsemodifers/patch_tracker_scripts.go b/proxychain/responsemodifers/patch_tracker_scripts.go new file mode 100644 index 0000000..88e1b43 --- /dev/null +++ b/proxychain/responsemodifers/patch_tracker_scripts.go @@ -0,0 +1,99 @@ +package responsemodifers + +import ( + "embed" + "encoding/json" + "io" + "ladder/proxychain" + "log" + "regexp" +) + +//go:embed vendor/ddg-tracker-surrogates/mapping.json +var mappingJSON []byte + +//go:embed vendor/ddg-tracker-surrogates/surrogates/* +var surrogateFS embed.FS + +var rules domainRules + +func init() { + err := json.Unmarshal([]byte(mappingJSON), &rules) + if err != nil { + log.Printf("[ERROR]: PatchTrackerScripts: failed to deserialize ladder/proxychain/responsemodifers/vendor/ddg-tracker-surrogates/mapping.json") + } +} + +// mapping.json schema +type rule struct { + RegexRule *regexp.Regexp `json:"regexRule"` + Surrogate string `json:"surrogate"` + Action string `json:"action,omitempty"` +} + +type domainRules map[string][]rule + +func (r *rule) UnmarshalJSON(data []byte) error { + type Tmp struct { + RegexRule string `json:"regexRule"` + Surrogate string `json:"surrogate"` + Action string `json:"action,omitempty"` + } + + var tmp Tmp + if err := json.Unmarshal(data, &tmp); err != nil { + return err + } + + regex := regexp.MustCompile(tmp.RegexRule) + + r.RegexRule = regex + r.Surrogate = tmp.Surrogate + r.Action = tmp.Action + + return nil +} + +// PatchTrackerScripts replaces any request to tracker scripts such as google analytics +// with a no-op stub that mocks the API structure of the original scripts they replace. +// Some pages depend on the existence of these structures for proper loading, so this may fix +// some broken elements. +// Surrogate script code borrowed from: DuckDuckGo Privacy Essentials browser extension for Firefox, Chrome. (Apache 2.0 license) +func PatchTrackerScripts() proxychain.ResponseModification { + + return func(chain *proxychain.ProxyChain) error { + + // preflight checks + reqURL := chain.Request.URL.String() + isTracker := false + + var surrogateScript io.ReadCloser + for domain, domainRules := range rules { + for _, rule := range domainRules { + if !rule.RegexRule.MatchString(reqURL) { + continue + } + + // found tracker script, replacing response body with nop stub from + // ./vendor/ddg-tracker-surrogates/surrogates/{{rule.Surrogate}} + isTracker = true + script, err := surrogateFS.Open("vendor/ddg-tracker-surrogates/surrogates/" + rule.Surrogate) + if err != nil { + panic(err) + } + surrogateScript = io.NopCloser(script) + log.Printf("INFO: PatchTrackerScripts :: injecting surrogate for '%s' => 'surrogates/%s'\n", domain, rule.Surrogate) + break + + } + } + + if !isTracker { + return nil + } + + chain.Response.Body = surrogateScript + chain.Context.Set("content-type", "text/javascript") + return nil + } +} diff --git a/proxychain/responsemodifers/vendor/ddg-tracker-surrogates b/proxychain/responsemodifers/vendor/ddg-tracker-surrogates new file mode 160000 index 0000000..ba0d8ce --- /dev/null +++ b/proxychain/responsemodifers/vendor/ddg-tracker-surrogates @@ -0,0 +1 @@ +Subproject commit ba0d8cefe4432723ec75b998241efd2454dff35a