add subdomain url extractor; add 3p script blocker modifier

This commit is contained in:
Kevin Pham
2023-12-06 14:18:40 -06:00
parent 0e940ec217
commit 6a5b85f260
9 changed files with 189 additions and 114 deletions

View File

@@ -34,7 +34,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetRequestModifications(
//rx.SpoofJA3fingerprint(ja3, "Googlebot"),
rx.AddCacheBusterQuery(),
rx.MasqueradeAsGoogleBot(),
//rx.MasqueradeAsGoogleBot(),
rx.ForwardRequestHeaders(),
rx.DeleteOutgoingCookies(),
rx.SpoofReferrerFromRedditPost(),
@@ -44,15 +44,17 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
).
AddResponseModifications(
//tx.ForwardResponseHeaders(),
//tx.BlockThirdPartyScripts(),
tx.DeleteIncomingCookies(),
tx.DeleteLocalStorageData(),
tx.DeleteSessionStorageData(),
tx.BypassCORS(),
tx.BypassContentSecurityPolicy(),
tx.RewriteHTMLResourceURLs(),
tx.PatchTrackerScripts(),
tx.PatchDynamicResourceURLs(),
//tx.BlockElementRemoval(".article-content"),
tx.PatchTrackerScripts(),
//tx.BlockElementRemoval(".article-content"), // techcrunch
tx.BlockElementRemoval(".available-content"), // substack
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
)

View File

@@ -234,9 +234,53 @@ func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.U
return preventRecursiveProxyRequest(fixedURL, baseProxyURL)
}
// extractURL extracts a URL from the request ctx. If the URL in the request
// is a relative path, it reconstructs the full URL using the referer header.
// extractURL extracts a URL from the request ctx
func (chain *ProxyChain) extractURL() (*url.URL, error) {
isLocal := strings.HasPrefix(chain.Context.BaseURL(), "http://localhost") || strings.HasPrefix(chain.Context.BaseURL(), "http://127.0.0.1")
isReqPath := strings.HasPrefix(chain.Context.Path(), "/http")
isAPI := strings.HasPrefix(chain.Context.Path(), "/api")
isOutline := strings.HasPrefix(chain.Context.Path(), "/outline")
if isLocal || isReqPath || isAPI || isOutline {
return chain.extractURLFromPath()
}
u, err := url.Parse(chain.Context.BaseURL())
if err != nil {
return &url.URL{}, err
}
parts := strings.Split(u.Hostname(), ".")
if len(parts) < 2 {
fmt.Println("path")
return chain.extractURLFromPath()
}
return chain.extractURLFromSubdomain()
}
// extractURLFromPath extracts a URL from the request ctx if subdomains are used.
func (chain *ProxyChain) extractURLFromSubdomain() (*url.URL, error) {
u, err := url.Parse(chain.Context.BaseURL())
if err != nil {
return &url.URL{}, err
}
parts := strings.Split(u.Hostname(), ".")
if len(parts) < 2 {
// no subdomain set, fallback to path extraction
//panic("asdf")
return chain.extractURLFromPath()
}
subdomain := strings.Join(parts[:len(parts)-2], ".")
subURL := subdomain
subURL = strings.ReplaceAll(subURL, "--", "|")
subURL = strings.ReplaceAll(subURL, "-", ".")
subURL = strings.ReplaceAll(subURL, "|", "-")
return url.Parse(fmt.Sprintf("https://%s/%s", subURL, u.Path))
}
// extractURLFromPath extracts a URL from the request ctx. If the URL in the request
// is a relative path, it reconstructs the full URL using the referer header.
func (chain *ProxyChain) extractURLFromPath() (*url.URL, error) {
reqURL := chain.Context.Params("*")
fmt.Println("XXXXXXXXXXXXXXXX")
@@ -316,7 +360,7 @@ func (chain *ProxyChain) validateCtxIsSet() error {
if chain.Context != nil {
return nil
}
err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetCtx()")
err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetFiberCtx()")
chain.abortErr = chain.abort(err)
return chain.abortErr
}

View File

@@ -1,52 +0,0 @@
package requestmodifiers
// removed due to using a different TLS spoofing technique
/*
import (
//"github.com/Danny-Dasilva/CycleTLS/cycletls"
//http "github.com/Danny-Dasilva/fhttp"
//http "github.com/bogdanfinn/fhttp"
"golang.org/x/net/proxy"
"ladder/proxychain"
)
// SpoofJA3fingerprint modifies the TLS client and user agent to spoof a particular JA3 fingerprint
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
func SpoofJA3fingerprint(ja3 string, userAgent string) proxychain.RequestModification {
//fmt.Println(ja3)
return func(chain *proxychain.ProxyChain) error {
// deep copy existing client while modifying http transport
ja3SpoofClient := &http.Client{
Transport: cycletls.NewTransport(ja3, userAgent),
Timeout: chain.Client.Timeout,
CheckRedirect: chain.Client.CheckRedirect,
}
chain.SetOnceHTTPClient(ja3SpoofClient)
return nil
}
}
// SpoofJA3fingerprintWithProxy modifies the TLS client and user agent to spoof a particular JA3 fingerprint and use a proxy.ContextDialer from the "golang.org/x/net/proxy"
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
func SpoofJA3fingerprintWithProxy(ja3 string, userAgent string, proxy proxy.ContextDialer) proxychain.RequestModification {
return func(chain *proxychain.ProxyChain) error {
// deep copy existing client while modifying http transport
ja3SpoofClient := &http.Client{
Transport: cycletls.NewTransportWithProxy(ja3, userAgent, proxy),
Timeout: chain.Client.Timeout,
CheckRedirect: chain.Client.CheckRedirect,
}
chain.SetOnceHTTPClient(ja3SpoofClient)
return nil
}
}
*/

View File

@@ -0,0 +1,33 @@
package responsemodifiers
import (
_ "embed"
"fmt"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
func BlockThirdPartyScripts() proxychain.ResponseModification {
// TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = blockJSRewriter
return nil
}
}

View File

@@ -1,34 +0,0 @@
package responsemodifiers
import (
_ "embed"
"io"
"strings"
"ladder/proxychain"
)
//go:embed vendor/patch_google_analytics.js
var gaPatch string
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
// Some sites will not display content until GA is loaded, so we fake one instead.
// Credit to Raymond Hill @ github.com/gorhill/uBlock
func PatchGoogleAnalytics() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// preflight check
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
if !(isGADomain || isGAPath) {
return nil
}
// send modified js payload to client containing
// stub functions from patch_google_analytics.js
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
chain.Response.Body = gaPatchReader
chain.Context.Set("content-type", "text/javascript")
return nil
}
}

View File

@@ -0,0 +1,69 @@
package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
type BlockThirdPartyScriptsRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
// This rewriter will strip out 3rd party JS URLs from script tags.
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
return &BlockThirdPartyScriptsRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
if token.DataAtom != atom.Script {
return false
}
// check for 3p .js urls in html elements
for i := range token.Attr {
attr := token.Attr[i]
switch {
case attr.Key != "src":
continue
case strings.HasPrefix(attr.Val, "/"):
return false
case !strings.HasPrefix(attr.Val, "http"):
return false
case strings.HasPrefix(attr.Val, r.proxyURL):
return false
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
return false
}
}
return true
}
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
if attr.Key != "src" {
continue
}
if !strings.HasPrefix(attr.Val, "http") {
continue
}
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
attr.Key = "blocked"
}
return "", ""
}

View File

@@ -6,6 +6,8 @@ import (
"sort"
"strings"
"crypto/md5"
"encoding/hex"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -16,6 +18,7 @@ import (
type ScriptInjectorRewriter struct {
execTime ScriptExecTime
script string
scriptMD5 string
}
type ScriptExecTime int
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch {
case r.execTime == BeforeDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
case r.execTime == AfterDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
default:
return "", ""
}
}
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
func generateMD5Hash(input string) string {
hasher := md5.New()
hasher.Write([]byte(input))
return hex.EncodeToString(hasher.Sum(nil))
}
// applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
scriptMD5 := generateMD5Hash(script)
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
return &ScriptInjectorRewriter{
execTime: execTime,
script: script,
script: executeOnceScript,
scriptMD5: scriptMD5,
}
}
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
// the params map represents the key-value pair of the params.
// the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
rr := &ScriptInjectorRewriter{
execTime: execTime,
script: script,
}
rr := NewScriptInjectorRewriter(script, execTime)
rr.applyParams(params)
return rr
}

View File

@@ -134,7 +134,7 @@
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (
XMLHttpRequest.prototype.open = function(
method,
url,
async = true,
@@ -150,7 +150,7 @@
);
const oldSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function (method, url) {
XMLHttpRequest.prototype.send = function(method, url) {
return oldSend.call(this, method, rewriteURL(url));
};
hideMonkeyPatch(
@@ -160,6 +160,7 @@
);
// monkey patch service worker registration
/*
const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options);
@@ -169,10 +170,11 @@
"register",
"function register() { [native code] }",
);
*/
// monkey patch URL.toString() method
const oldToString = URL.prototype.toString;
URL.prototype.toString = function () {
URL.prototype.toString = function() {
let originalURL = oldToString.call(this);
return rewriteURL(originalURL);
};
@@ -184,7 +186,7 @@
// monkey patch URL.toJSON() method
const oldToJson = URL.prototype.toString;
URL.prototype.toString = function () {
URL.prototype.toString = function() {
let originalURL = oldToJson.call(this);
return rewriteURL(originalURL);
};
@@ -200,11 +202,11 @@
"href",
);
Object.defineProperty(URL.prototype, "href", {
get: function () {
get: function() {
let originalHref = originalHrefDescriptor.get.call(this);
return rewriteURL(originalHref);
},
set: function (newValue) {
set: function(newValue) {
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
},
});
@@ -283,7 +285,7 @@
// monkey-patching Element.setAttribute
const originalSetAttribute = Element.prototype.setAttribute;
Element.prototype.setAttribute = function (name, value) {
Element.prototype.setAttribute = function(name, value) {
const isMatchingElement = elements.some((element) => {
return this.tagName.toLowerCase() === element.tag &&
name.toLowerCase() === element.attribute;

View File

@@ -24,6 +24,10 @@ func init() {
return tx.BlockElementRemoval(params[0])
}
rsmModMap["BlockThirdPartyScripts"] = func(_ ...string) proxychain.ResponseModification {
return tx.BlockThirdPartyScripts()
}
rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification {
return tx.BypassCORS()
}
@@ -92,10 +96,6 @@ func init() {
return tx.PatchDynamicResourceURLs()
}
rsmModMap["PatchGoogleAnalytics"] = func(_ ...string) proxychain.ResponseModification {
return tx.PatchGoogleAnalytics()
}
rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification {
return tx.PatchTrackerScripts()
}