add subdomain url extractor; add 3p script blocker modifier

This commit is contained in:
Kevin Pham
2023-12-06 14:18:40 -06:00
parent 0e940ec217
commit 6a5b85f260
9 changed files with 189 additions and 114 deletions

View File

@@ -34,7 +34,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetRequestModifications( SetRequestModifications(
//rx.SpoofJA3fingerprint(ja3, "Googlebot"), //rx.SpoofJA3fingerprint(ja3, "Googlebot"),
rx.AddCacheBusterQuery(), rx.AddCacheBusterQuery(),
rx.MasqueradeAsGoogleBot(), //rx.MasqueradeAsGoogleBot(),
rx.ForwardRequestHeaders(), rx.ForwardRequestHeaders(),
rx.DeleteOutgoingCookies(), rx.DeleteOutgoingCookies(),
rx.SpoofReferrerFromRedditPost(), rx.SpoofReferrerFromRedditPost(),
@@ -44,15 +44,17 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
). ).
AddResponseModifications( AddResponseModifications(
//tx.ForwardResponseHeaders(), //tx.ForwardResponseHeaders(),
//tx.BlockThirdPartyScripts(),
tx.DeleteIncomingCookies(), tx.DeleteIncomingCookies(),
tx.DeleteLocalStorageData(), tx.DeleteLocalStorageData(),
tx.DeleteSessionStorageData(), tx.DeleteSessionStorageData(),
tx.BypassCORS(), tx.BypassCORS(),
tx.BypassContentSecurityPolicy(), tx.BypassContentSecurityPolicy(),
tx.RewriteHTMLResourceURLs(), tx.RewriteHTMLResourceURLs(),
tx.PatchTrackerScripts(),
tx.PatchDynamicResourceURLs(), tx.PatchDynamicResourceURLs(),
//tx.BlockElementRemoval(".article-content"), tx.PatchTrackerScripts(),
//tx.BlockElementRemoval(".article-content"), // techcrunch
tx.BlockElementRemoval(".available-content"), // substack
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
) )

View File

@@ -234,9 +234,53 @@ func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.U
return preventRecursiveProxyRequest(fixedURL, baseProxyURL) return preventRecursiveProxyRequest(fixedURL, baseProxyURL)
} }
// extractURL extracts a URL from the request ctx. If the URL in the request // extractURL extracts a URL from the request ctx
// is a relative path, it reconstructs the full URL using the referer header.
func (chain *ProxyChain) extractURL() (*url.URL, error) { func (chain *ProxyChain) extractURL() (*url.URL, error) {
isLocal := strings.HasPrefix(chain.Context.BaseURL(), "http://localhost") || strings.HasPrefix(chain.Context.BaseURL(), "http://127.0.0.1")
isReqPath := strings.HasPrefix(chain.Context.Path(), "/http")
isAPI := strings.HasPrefix(chain.Context.Path(), "/api")
isOutline := strings.HasPrefix(chain.Context.Path(), "/outline")
if isLocal || isReqPath || isAPI || isOutline {
return chain.extractURLFromPath()
}
u, err := url.Parse(chain.Context.BaseURL())
if err != nil {
return &url.URL{}, err
}
parts := strings.Split(u.Hostname(), ".")
if len(parts) < 2 {
fmt.Println("path")
return chain.extractURLFromPath()
}
return chain.extractURLFromSubdomain()
}
// extractURLFromPath extracts a URL from the request ctx if subdomains are used.
func (chain *ProxyChain) extractURLFromSubdomain() (*url.URL, error) {
u, err := url.Parse(chain.Context.BaseURL())
if err != nil {
return &url.URL{}, err
}
parts := strings.Split(u.Hostname(), ".")
if len(parts) < 2 {
// no subdomain set, fallback to path extraction
//panic("asdf")
return chain.extractURLFromPath()
}
subdomain := strings.Join(parts[:len(parts)-2], ".")
subURL := subdomain
subURL = strings.ReplaceAll(subURL, "--", "|")
subURL = strings.ReplaceAll(subURL, "-", ".")
subURL = strings.ReplaceAll(subURL, "|", "-")
return url.Parse(fmt.Sprintf("https://%s/%s", subURL, u.Path))
}
// extractURLFromPath extracts a URL from the request ctx. If the URL in the request
// is a relative path, it reconstructs the full URL using the referer header.
func (chain *ProxyChain) extractURLFromPath() (*url.URL, error) {
reqURL := chain.Context.Params("*") reqURL := chain.Context.Params("*")
fmt.Println("XXXXXXXXXXXXXXXX") fmt.Println("XXXXXXXXXXXXXXXX")
@@ -316,7 +360,7 @@ func (chain *ProxyChain) validateCtxIsSet() error {
if chain.Context != nil { if chain.Context != nil {
return nil return nil
} }
err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetCtx()") err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetFiberCtx()")
chain.abortErr = chain.abort(err) chain.abortErr = chain.abort(err)
return chain.abortErr return chain.abortErr
} }

View File

@@ -1,52 +0,0 @@
package requestmodifiers
// removed due to using a different TLS spoofing technique
/*
import (
//"github.com/Danny-Dasilva/CycleTLS/cycletls"
//http "github.com/Danny-Dasilva/fhttp"
//http "github.com/bogdanfinn/fhttp"
"golang.org/x/net/proxy"
"ladder/proxychain"
)
// SpoofJA3fingerprint modifies the TLS client and user agent to spoof a particular JA3 fingerprint
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
func SpoofJA3fingerprint(ja3 string, userAgent string) proxychain.RequestModification {
//fmt.Println(ja3)
return func(chain *proxychain.ProxyChain) error {
// deep copy existing client while modifying http transport
ja3SpoofClient := &http.Client{
Transport: cycletls.NewTransport(ja3, userAgent),
Timeout: chain.Client.Timeout,
CheckRedirect: chain.Client.CheckRedirect,
}
chain.SetOnceHTTPClient(ja3SpoofClient)
return nil
}
}
// SpoofJA3fingerprintWithProxy modifies the TLS client and user agent to spoof a particular JA3 fingerprint and use a proxy.ContextDialer from the "golang.org/x/net/proxy"
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
func SpoofJA3fingerprintWithProxy(ja3 string, userAgent string, proxy proxy.ContextDialer) proxychain.RequestModification {
return func(chain *proxychain.ProxyChain) error {
// deep copy existing client while modifying http transport
ja3SpoofClient := &http.Client{
Transport: cycletls.NewTransportWithProxy(ja3, userAgent, proxy),
Timeout: chain.Client.Timeout,
CheckRedirect: chain.Client.CheckRedirect,
}
chain.SetOnceHTTPClient(ja3SpoofClient)
return nil
}
}
*/

View File

@@ -0,0 +1,33 @@
package responsemodifiers
import (
_ "embed"
"fmt"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
func BlockThirdPartyScripts() proxychain.ResponseModification {
// TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = blockJSRewriter
return nil
}
}

View File

@@ -1,34 +0,0 @@
package responsemodifiers
import (
_ "embed"
"io"
"strings"
"ladder/proxychain"
)
//go:embed vendor/patch_google_analytics.js
var gaPatch string
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
// Some sites will not display content until GA is loaded, so we fake one instead.
// Credit to Raymond Hill @ github.com/gorhill/uBlock
func PatchGoogleAnalytics() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// preflight check
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
if !(isGADomain || isGAPath) {
return nil
}
// send modified js payload to client containing
// stub functions from patch_google_analytics.js
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
chain.Response.Body = gaPatchReader
chain.Context.Set("content-type", "text/javascript")
return nil
}
}

View File

@@ -0,0 +1,69 @@
package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
type BlockThirdPartyScriptsRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
// This rewriter will strip out 3rd party JS URLs from script tags.
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
return &BlockThirdPartyScriptsRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
if token.DataAtom != atom.Script {
return false
}
// check for 3p .js urls in html elements
for i := range token.Attr {
attr := token.Attr[i]
switch {
case attr.Key != "src":
continue
case strings.HasPrefix(attr.Val, "/"):
return false
case !strings.HasPrefix(attr.Val, "http"):
return false
case strings.HasPrefix(attr.Val, r.proxyURL):
return false
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
return false
}
}
return true
}
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
if attr.Key != "src" {
continue
}
if !strings.HasPrefix(attr.Val, "http") {
continue
}
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
attr.Key = "blocked"
}
return "", ""
}

View File

@@ -6,6 +6,8 @@ import (
"sort" "sort"
"strings" "strings"
"crypto/md5"
"encoding/hex"
"golang.org/x/net/html" "golang.org/x/net/html"
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
) )
@@ -16,6 +18,7 @@ import (
type ScriptInjectorRewriter struct { type ScriptInjectorRewriter struct {
execTime ScriptExecTime execTime ScriptExecTime
script string script string
scriptMD5 string
} }
type ScriptExecTime int type ScriptExecTime int
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) { func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch { switch {
case r.execTime == BeforeDOMContentLoaded: case r.execTime == BeforeDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script) return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMContentLoaded: case r.execTime == AfterDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script) return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
case r.execTime == AfterDOMIdle: case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1) s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s) return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
default: default:
return "", "" return "", ""
} }
} }
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
func generateMD5Hash(input string) string {
hasher := md5.New()
hasher.Write([]byte(input))
return hex.EncodeToString(hasher.Sum(nil))
}
// applies parameters by string replacement of the template script // applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) { func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order // Sort the keys by length in descending order
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// NewScriptInjectorRewriter implements a HtmlTokenRewriter // NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time // and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter { func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
scriptMD5 := generateMD5Hash(script)
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
return &ScriptInjectorRewriter{ return &ScriptInjectorRewriter{
execTime: execTime, execTime: execTime,
script: script, script: executeOnceScript,
scriptMD5: scriptMD5,
} }
} }
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
// the params map represents the key-value pair of the params. // the params map represents the key-value pair of the params.
// the key will be string replaced with the value // the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter { func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
rr := &ScriptInjectorRewriter{ rr := NewScriptInjectorRewriter(script, execTime)
execTime: execTime,
script: script,
}
rr.applyParams(params) rr.applyParams(params)
return rr return rr
} }

View File

@@ -160,6 +160,7 @@
); );
// monkey patch service worker registration // monkey patch service worker registration
/*
const oldRegister = ServiceWorkerContainer.prototype.register; const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function (scriptURL, options) { ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options); return oldRegister.call(this, rewriteURL(scriptURL), options);
@@ -169,6 +170,7 @@
"register", "register",
"function register() { [native code] }", "function register() { [native code] }",
); );
*/
// monkey patch URL.toString() method // monkey patch URL.toString() method
const oldToString = URL.prototype.toString; const oldToString = URL.prototype.toString;

View File

@@ -24,6 +24,10 @@ func init() {
return tx.BlockElementRemoval(params[0]) return tx.BlockElementRemoval(params[0])
} }
rsmModMap["BlockThirdPartyScripts"] = func(_ ...string) proxychain.ResponseModification {
return tx.BlockThirdPartyScripts()
}
rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification { rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification {
return tx.BypassCORS() return tx.BypassCORS()
} }
@@ -92,10 +96,6 @@ func init() {
return tx.PatchDynamicResourceURLs() return tx.PatchDynamicResourceURLs()
} }
rsmModMap["PatchGoogleAnalytics"] = func(_ ...string) proxychain.ResponseModification {
return tx.PatchGoogleAnalytics()
}
rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification { rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification {
return tx.PatchTrackerScripts() return tx.PatchTrackerScripts()
} }