diff --git a/handlers/proxy.go b/handlers/proxy.go
index 6e3b750..7a04fa4 100644
--- a/handlers/proxy.go
+++ b/handlers/proxy.go
@@ -34,7 +34,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetRequestModifications(
//rx.SpoofJA3fingerprint(ja3, "Googlebot"),
rx.AddCacheBusterQuery(),
- rx.MasqueradeAsGoogleBot(),
+ //rx.MasqueradeAsGoogleBot(),
rx.ForwardRequestHeaders(),
rx.DeleteOutgoingCookies(),
rx.SpoofReferrerFromRedditPost(),
@@ -44,15 +44,17 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
).
AddResponseModifications(
//tx.ForwardResponseHeaders(),
+ //tx.BlockThirdPartyScripts(),
tx.DeleteIncomingCookies(),
tx.DeleteLocalStorageData(),
tx.DeleteSessionStorageData(),
tx.BypassCORS(),
tx.BypassContentSecurityPolicy(),
tx.RewriteHTMLResourceURLs(),
- tx.PatchTrackerScripts(),
tx.PatchDynamicResourceURLs(),
- //tx.BlockElementRemoval(".article-content"),
+ tx.PatchTrackerScripts(),
+ //tx.BlockElementRemoval(".article-content"), // techcrunch
+ tx.BlockElementRemoval(".available-content"), // substack
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
)
diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go
index ceb0712..4f3df50 100644
--- a/proxychain/proxychain.go
+++ b/proxychain/proxychain.go
@@ -234,9 +234,53 @@ func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.U
return preventRecursiveProxyRequest(fixedURL, baseProxyURL)
}
-// extractURL extracts a URL from the request ctx. If the URL in the request
-// is a relative path, it reconstructs the full URL using the referer header.
+// extractURL extracts a URL from the request ctx
func (chain *ProxyChain) extractURL() (*url.URL, error) {
+ isLocal := strings.HasPrefix(chain.Context.BaseURL(), "http://localhost") || strings.HasPrefix(chain.Context.BaseURL(), "http://127.0.0.1")
+ isReqPath := strings.HasPrefix(chain.Context.Path(), "/http")
+ isAPI := strings.HasPrefix(chain.Context.Path(), "/api")
+ isOutline := strings.HasPrefix(chain.Context.Path(), "/outline")
+
+ if isLocal || isReqPath || isAPI || isOutline {
+ return chain.extractURLFromPath()
+ }
+
+ u, err := url.Parse(chain.Context.BaseURL())
+ if err != nil {
+ return &url.URL{}, err
+ }
+ parts := strings.Split(u.Hostname(), ".")
+ if len(parts) < 2 {
+ fmt.Println("path")
+ return chain.extractURLFromPath()
+ }
+
+ return chain.extractURLFromSubdomain()
+}
+
+// extractURLFromPath extracts a URL from the request ctx if subdomains are used.
+func (chain *ProxyChain) extractURLFromSubdomain() (*url.URL, error) {
+ u, err := url.Parse(chain.Context.BaseURL())
+ if err != nil {
+ return &url.URL{}, err
+ }
+ parts := strings.Split(u.Hostname(), ".")
+ if len(parts) < 2 {
+ // no subdomain set, fallback to path extraction
+ //panic("asdf")
+ return chain.extractURLFromPath()
+ }
+ subdomain := strings.Join(parts[:len(parts)-2], ".")
+ subURL := subdomain
+ subURL = strings.ReplaceAll(subURL, "--", "|")
+ subURL = strings.ReplaceAll(subURL, "-", ".")
+ subURL = strings.ReplaceAll(subURL, "|", "-")
+ return url.Parse(fmt.Sprintf("https://%s/%s", subURL, u.Path))
+}
+
+// extractURLFromPath extracts a URL from the request ctx. If the URL in the request
+// is a relative path, it reconstructs the full URL using the referer header.
+func (chain *ProxyChain) extractURLFromPath() (*url.URL, error) {
reqURL := chain.Context.Params("*")
fmt.Println("XXXXXXXXXXXXXXXX")
@@ -316,7 +360,7 @@ func (chain *ProxyChain) validateCtxIsSet() error {
if chain.Context != nil {
return nil
}
- err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetCtx()")
+ err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetFiberCtx()")
chain.abortErr = chain.abort(err)
return chain.abortErr
}
diff --git a/proxychain/requestmodifiers/spoof_ja3_fingerprint.go b/proxychain/requestmodifiers/spoof_ja3_fingerprint.go
deleted file mode 100644
index 84a70a7..0000000
--- a/proxychain/requestmodifiers/spoof_ja3_fingerprint.go
+++ /dev/null
@@ -1,52 +0,0 @@
-package requestmodifiers
-
-// removed due to using a different TLS spoofing technique
-
-/*
-import (
- //"github.com/Danny-Dasilva/CycleTLS/cycletls"
- //http "github.com/Danny-Dasilva/fhttp"
- //http "github.com/bogdanfinn/fhttp"
-
- "golang.org/x/net/proxy"
- "ladder/proxychain"
-)
-
-// SpoofJA3fingerprint modifies the TLS client and user agent to spoof a particular JA3 fingerprint
-// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
-// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
-// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
-func SpoofJA3fingerprint(ja3 string, userAgent string) proxychain.RequestModification {
- //fmt.Println(ja3)
- return func(chain *proxychain.ProxyChain) error {
- // deep copy existing client while modifying http transport
- ja3SpoofClient := &http.Client{
- Transport: cycletls.NewTransport(ja3, userAgent),
- Timeout: chain.Client.Timeout,
- CheckRedirect: chain.Client.CheckRedirect,
- }
-
- chain.SetOnceHTTPClient(ja3SpoofClient)
- return nil
- }
-}
-
-// SpoofJA3fingerprintWithProxy modifies the TLS client and user agent to spoof a particular JA3 fingerprint and use a proxy.ContextDialer from the "golang.org/x/net/proxy"
-// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
-// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
-// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
-func SpoofJA3fingerprintWithProxy(ja3 string, userAgent string, proxy proxy.ContextDialer) proxychain.RequestModification {
- return func(chain *proxychain.ProxyChain) error {
-
- // deep copy existing client while modifying http transport
- ja3SpoofClient := &http.Client{
- Transport: cycletls.NewTransportWithProxy(ja3, userAgent, proxy),
- Timeout: chain.Client.Timeout,
- CheckRedirect: chain.Client.CheckRedirect,
- }
-
- chain.SetOnceHTTPClient(ja3SpoofClient)
- return nil
- }
-}
-*/
diff --git a/proxychain/responsemodifiers/block_third_party_scripts.go b/proxychain/responsemodifiers/block_third_party_scripts.go
new file mode 100644
index 0000000..398a6cc
--- /dev/null
+++ b/proxychain/responsemodifiers/block_third_party_scripts.go
@@ -0,0 +1,33 @@
+package responsemodifiers
+
+import (
+ _ "embed"
+ "fmt"
+ "strings"
+
+ "ladder/proxychain"
+ "ladder/proxychain/responsemodifiers/rewriters"
+)
+
+// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
+func BlockThirdPartyScripts() proxychain.ResponseModification {
+ // TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
+ return func(chain *proxychain.ProxyChain) error {
+ // don't add rewriter if it's not even html
+ ct := chain.Response.Header.Get("content-type")
+ if !strings.HasPrefix(ct, "text/html") {
+ return nil
+ }
+
+ // proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
+ originalURI := chain.Context.Request().URI()
+ proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
+
+ // replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
+ rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
+ blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
+ chain.Response.Body = blockJSRewriter
+
+ return nil
+ }
+}
diff --git a/proxychain/responsemodifiers/patch_google_analytics.go b/proxychain/responsemodifiers/patch_google_analytics.go
deleted file mode 100644
index 4427ea5..0000000
--- a/proxychain/responsemodifiers/patch_google_analytics.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package responsemodifiers
-
-import (
- _ "embed"
- "io"
- "strings"
-
- "ladder/proxychain"
-)
-
-//go:embed vendor/patch_google_analytics.js
-var gaPatch string
-
-// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
-// Some sites will not display content until GA is loaded, so we fake one instead.
-// Credit to Raymond Hill @ github.com/gorhill/uBlock
-func PatchGoogleAnalytics() proxychain.ResponseModification {
- return func(chain *proxychain.ProxyChain) error {
-
- // preflight check
- isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
- isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
- if !(isGADomain || isGAPath) {
- return nil
- }
-
- // send modified js payload to client containing
- // stub functions from patch_google_analytics.js
- gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
- chain.Response.Body = gaPatchReader
- chain.Context.Set("content-type", "text/javascript")
- return nil
- }
-}
diff --git a/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go b/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go
new file mode 100644
index 0000000..82e54e0
--- /dev/null
+++ b/proxychain/responsemodifiers/rewriters/block_third_party_scripts.go
@@ -0,0 +1,69 @@
+package rewriters
+
+import (
+ _ "embed"
+ "fmt"
+ "log"
+ "net/url"
+ "strings"
+
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
+)
+
+// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
+// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
+type BlockThirdPartyScriptsRewriter struct {
+ baseURL *url.URL
+ proxyURL string // ladder URL, not proxied site URL
+}
+
+// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
+// This rewriter will strip out 3rd party JS URLs from script tags.
+func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
+ return &BlockThirdPartyScriptsRewriter{
+ baseURL: baseURL,
+ proxyURL: proxyURL,
+ }
+}
+
+func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
+ if token.DataAtom != atom.Script {
+ return false
+ }
+
+ // check for 3p .js urls in html elements
+ for i := range token.Attr {
+ attr := token.Attr[i]
+ switch {
+ case attr.Key != "src":
+ continue
+ case strings.HasPrefix(attr.Val, "/"):
+ return false
+ case !strings.HasPrefix(attr.Val, "http"):
+ return false
+ case strings.HasPrefix(attr.Val, r.proxyURL):
+ return false
+ case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
+ return false
+ }
+ }
+
+ return true
+}
+
+func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
+ for i := range token.Attr {
+ attr := &token.Attr[i]
+ if attr.Key != "src" {
+ continue
+ }
+
+ if !strings.HasPrefix(attr.Val, "http") {
+ continue
+ }
+ log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
+ attr.Key = "blocked"
+ }
+ return "", ""
+}
diff --git a/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go b/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go
index d6e63b5..be82da5 100644
--- a/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go
+++ b/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go
@@ -6,6 +6,8 @@ import (
"sort"
"strings"
+ "crypto/md5"
+ "encoding/hex"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -14,8 +16,9 @@ import (
// ScriptInjectorRewriter is a struct that injects JS into the page
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
type ScriptInjectorRewriter struct {
- execTime ScriptExecTime
- script string
+ execTime ScriptExecTime
+ script string
+ scriptMD5 string
}
type ScriptExecTime int
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch {
case r.execTime == BeforeDOMContentLoaded:
- return "", fmt.Sprintf("\n\n", r.script)
+ return "", fmt.Sprintf("\n\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMContentLoaded:
- return "", fmt.Sprintf("\n", r.script)
+ return "", fmt.Sprintf("\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
- return "", fmt.Sprintf("\n\n", s)
+ return "", fmt.Sprintf("\n\n", r.scriptMD5, s)
default:
return "", ""
}
}
+// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
+func generateMD5Hash(input string) string {
+ hasher := md5.New()
+ hasher.Write([]byte(input))
+ return hex.EncodeToString(hasher.Sum(nil))
+}
+
// applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
+ scriptMD5 := generateMD5Hash(script)
+ executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
+
return &ScriptInjectorRewriter{
- execTime: execTime,
- script: script,
+ execTime: execTime,
+ script: executeOnceScript,
+ scriptMD5: scriptMD5,
}
}
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
// the params map represents the key-value pair of the params.
// the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
- rr := &ScriptInjectorRewriter{
- execTime: execTime,
- script: script,
- }
+ rr := NewScriptInjectorRewriter(script, execTime)
rr.applyParams(params)
return rr
}
diff --git a/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js b/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js
index 3e04400..74eba11 100644
--- a/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js
+++ b/proxychain/responsemodifiers/vendor/patch_dynamic_resource_urls.js
@@ -134,7 +134,7 @@
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
- XMLHttpRequest.prototype.open = function (
+ XMLHttpRequest.prototype.open = function(
method,
url,
async = true,
@@ -150,7 +150,7 @@
);
const oldSend = XMLHttpRequest.prototype.send;
- XMLHttpRequest.prototype.send = function (method, url) {
+ XMLHttpRequest.prototype.send = function(method, url) {
return oldSend.call(this, method, rewriteURL(url));
};
hideMonkeyPatch(
@@ -160,6 +160,7 @@
);
// monkey patch service worker registration
+ /*
const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options);
@@ -169,10 +170,11 @@
"register",
"function register() { [native code] }",
);
+ */
// monkey patch URL.toString() method
const oldToString = URL.prototype.toString;
- URL.prototype.toString = function () {
+ URL.prototype.toString = function() {
let originalURL = oldToString.call(this);
return rewriteURL(originalURL);
};
@@ -184,7 +186,7 @@
// monkey patch URL.toJSON() method
const oldToJson = URL.prototype.toString;
- URL.prototype.toString = function () {
+ URL.prototype.toString = function() {
let originalURL = oldToJson.call(this);
return rewriteURL(originalURL);
};
@@ -200,11 +202,11 @@
"href",
);
Object.defineProperty(URL.prototype, "href", {
- get: function () {
+ get: function() {
let originalHref = originalHrefDescriptor.get.call(this);
return rewriteURL(originalHref);
},
- set: function (newValue) {
+ set: function(newValue) {
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
},
});
@@ -283,7 +285,7 @@
// monkey-patching Element.setAttribute
const originalSetAttribute = Element.prototype.setAttribute;
- Element.prototype.setAttribute = function (name, value) {
+ Element.prototype.setAttribute = function(name, value) {
const isMatchingElement = elements.some((element) => {
return this.tagName.toLowerCase() === element.tag &&
name.toLowerCase() === element.attribute;
diff --git a/proxychain/ruleset/rule_resmod_types.gen.go b/proxychain/ruleset/rule_resmod_types.gen.go
index dfff064..eb42a02 100644
--- a/proxychain/ruleset/rule_resmod_types.gen.go
+++ b/proxychain/ruleset/rule_resmod_types.gen.go
@@ -24,6 +24,10 @@ func init() {
return tx.BlockElementRemoval(params[0])
}
+ rsmModMap["BlockThirdPartyScripts"] = func(_ ...string) proxychain.ResponseModification {
+ return tx.BlockThirdPartyScripts()
+ }
+
rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification {
return tx.BypassCORS()
}
@@ -92,10 +96,6 @@ func init() {
return tx.PatchDynamicResourceURLs()
}
- rsmModMap["PatchGoogleAnalytics"] = func(_ ...string) proxychain.ResponseModification {
- return tx.PatchGoogleAnalytics()
- }
-
rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification {
return tx.PatchTrackerScripts()
}