From bfd647e526b48b5fbcc005658765ec572c5a8312 Mon Sep 17 00:00:00 2001 From: Kevin Pham Date: Tue, 21 Nov 2023 23:25:34 -0600 Subject: [PATCH] url rewrite improvements --- handlers/proxy.go | 3 +- proxychain/proxychain.go | 2 +- .../rewriters/html_resource_url_rewriter.go | 76 ++++++---- .../rewriters/js_resource_url_rewriter.js | 136 ++++++++++++++++-- 4 files changed, 177 insertions(+), 40 deletions(-) diff --git a/handlers/proxy.go b/handlers/proxy.go index 866cdb2..54718ad 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -57,7 +57,8 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetDebugLogging(opts.Verbose). SetRequestModifications( rx.DeleteOutgoingCookies(), - rx.SpoofReferrerFromTwitterPost(), + //rx.RequestArchiveIs(), + rx.MasqueradeAsGoogleBot(), ). AddResponseModifications( tx.DeleteIncomingCookies(), diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index b7f8883..2c71a4e 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -248,7 +248,7 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL } if realUrl.Scheme == "" || realUrl.Host == "" { - return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl) + return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer.String(), relativeUrl.String()) } log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String()) diff --git a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go index 7579602..1997f67 100644 --- a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go +++ b/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go @@ -146,8 +146,8 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head" if isHeadToken { params := map[string]string{ - "PROXY_ORIGIN_INJECT_FROM_GOLANG": r.proxyURL, - "ORIGIN_INJECT_FROM_GOLANG": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host), + "R_PROXYURL": r.proxyURL, + "R_BASEURL": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host), } injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params) } @@ -192,8 +192,10 @@ func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string { // Root-relative URLs: These are relative to the root path and start with a "/". func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) { // doublecheck this is a valid relative URL + log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val) _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val)) if err != nil { + log.Println(err) return } @@ -251,37 +253,53 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) { } func handleSrcSet(attr *html.Attribute, baseURL *url.URL) { - for i, src := range strings.Split(attr.Val, ",") { - src = strings.Trim(src, " ") - for j, s := range strings.Split(src, " ") { - s = strings.Trim(s, " ") - if j == 0 { - f := &html.Attribute{Val: s, Key: attr.Key} - switch { - case strings.HasPrefix(s, "//"): - handleProtocolRelativePath(f, baseURL) - case strings.HasPrefix(s, "/"): - handleRootRelativePath(f, baseURL) - case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"): - handleAbsolutePath(f, baseURL) - default: - handleDocumentRelativePath(f, baseURL) - } - s = f.Val - } - if i == 0 && j == 0 { - attr.Val = s - continue - } - attr.Val = fmt.Sprintf("%s %s", attr.Val, s) - } - attr.Val = fmt.Sprintf("%s,", attr.Val) - } - attr.Val = strings.TrimSuffix(attr.Val, ",") + var srcSetBuilder strings.Builder + srcSetItems := strings.Split(attr.Val, ",") + for i, srcItem := range srcSetItems { + srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them + + if len(srcParts) == 0 { + continue // skip empty items + } + + // Process URL part + urlPart := processURLPart(srcParts[0], baseURL) + + // First srcset item without a descriptor + if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) { + srcSetBuilder.WriteString(urlPart) + } else { + srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1])) + } + + if i < len(srcSetItems)-1 { + srcSetBuilder.WriteString(",") // Add comma for all but last item + } + } + + attr.Val = srcSetBuilder.String() log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val) } +// only for srcset +func processURLPart(urlPart string, baseURL *url.URL) string { + f := &html.Attribute{Val: urlPart, Key: "src"} + + switch { + case strings.HasPrefix(urlPart, "//"): + handleProtocolRelativePath(f, baseURL) + case strings.HasPrefix(urlPart, "/"): + handleRootRelativePath(f, baseURL) + case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"): + handleAbsolutePath(f, baseURL) + default: + handleDocumentRelativePath(f, baseURL) + } + + return f.Val +} + func isBlackedlistedScheme(url string) bool { spl := strings.Split(url, ":") if len(spl) == 0 { diff --git a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js index 880e7ac..9c04281 100644 --- a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js +++ b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js @@ -20,21 +20,28 @@ if (!url) return url let isStr = (typeof url.startsWith === 'function') if (!isStr) return url - // don't rewrite invalid URIs - try { new URL(url) } catch { return url } // don't rewrite special URIs if (blacklistedSchemes.includes(url)) return url; - // don't double rewrite //const proxyOrigin = globalThis.window.location.origin; - const proxyOrigin = `PROXY_ORIGIN_INJECT_FROM_GOLANG`; + const proxyOrigin = "R_PROXYURL"; + //const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin + const origin = "R_BASEURL"; + + // don't rewrite invalid URIs + try { new URL(url, origin) } catch { return url } + + // don't double rewrite if (url.startsWith(proxyOrigin)) return url; if (url.startsWith(`/${proxyOrigin}`)) return url; if (url.startsWith(`/${origin}`)) return url; + if (url.startsWith(`/http://`)) return url; + if (url.startsWith(`/https://`)) return url; + if (url.startsWith(`/http%3A%2F%2F`)) return url; + if (url.startsWith(`/https%3A%2F%2F`)) return url; + if (url.startsWith(`/%2Fhttp`)) return url; - //const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin - const origin = `ORIGIN_INJECT_FROM_GOLANG`; //console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`) if (url.startsWith("//")) { @@ -50,27 +57,62 @@ return url; }; + // sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked + function hideMonkeyPatch(objectOrName, method, originalToString) { + let obj; + let isGlobalFunction = false; + + if (typeof objectOrName === 'string') { + obj = globalThis[objectOrName]; + isGlobalFunction = (typeof obj === 'function') && (method === objectOrName); + } else { + obj = objectOrName; + } + + if (isGlobalFunction) { + const originalFunction = obj; + globalThis[objectOrName] = function(...args) { + return originalFunction.apply(this, args); + }; + globalThis[objectOrName].toString = () => originalToString; + } else if (obj && typeof obj[method] === 'function') { + const originalMethod = obj[method]; + obj[method] = function(...args) { + return originalMethod.apply(this, args); + }; + obj[method].toString = () => originalToString; + } else { + console.warn(`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`); + } + } + // monkey patch fetch - const oldFetch = globalThis.fetch; - globalThis.fetch = async (url, init) => { + const oldFetch = fetch; + fetch = async (url, init) => { return oldFetch(rewriteURL(url), init) } + hideMonkeyPatch('fetch', 'fetch', 'function fetch() { [native code] }') // monkey patch xmlhttprequest const oldOpen = XMLHttpRequest.prototype.open; XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) { return oldOpen.call(this, method, rewriteURL(url), async, user, password); }; + hideMonkeyPatch(XMLHttpRequest.prototype, 'open', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}'); + const oldSend = XMLHttpRequest.prototype.send; XMLHttpRequest.prototype.send = function(method, url) { return oldSend.call(this, method, rewriteURL(url)); }; + hideMonkeyPatch(XMLHttpRequest.prototype, 'send', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}'); + // monkey patch service worker registration const oldRegister = ServiceWorkerContainer.prototype.register; ServiceWorkerContainer.prototype.register = function(scriptURL, options) { return oldRegister.call(this, rewriteURL(scriptURL), options) } + hideMonkeyPatch(ServiceWorkerContainer.prototype, 'register', 'function register() { [native code] }') // monkey patch URL.toString() method const oldToString = URL.prototype.toString @@ -78,6 +120,7 @@ let originalURL = oldToString.call(this) return rewriteURL(originalURL) } + hideMonkeyPatch(URL.prototype, 'toString', 'function toString() { [native code] }') // monkey patch URL.toJSON() method const oldToJson = URL.prototype.toString @@ -85,6 +128,7 @@ let originalURL = oldToJson.call(this) return rewriteURL(originalURL) } + hideMonkeyPatch(URL.prototype, 'toString', 'function toJSON() { [native code] }') // Monkey patch URL.href getter and setter const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href'); @@ -98,6 +142,9 @@ } }); + // TODO: do one more pass of this by manually traversing the DOM + // AFTER all the JS and page has loaded just in case + // Monkey patch setter const elements = [ { tag: 'a', attribute: 'href' }, @@ -166,4 +213,75 @@ }); } }); -})(); + + + // sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters. + // in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could + // contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links. + function rewriteInnerHTML(html, elements) { + const isRewritingHTMLKey = Symbol.for('isRewritingHTML'); + + // Check if already processing + if (document[isRewritingHTMLKey]) { + return html; + } + + const tempContainer = document.createElement('div'); + document[isRewritingHTMLKey] = true; + + try { + tempContainer.innerHTML = html; + + // Create a map for quick lookup + const elementsMap = new Map(elements.map(e => [e.tag, e.attribute])); + + // Loop-based DOM traversal + const nodes = [...tempContainer.querySelectorAll('*')]; + for (const node of nodes) { + const attribute = elementsMap.get(node.tagName.toLowerCase()); + if (attribute && node.hasAttribute(attribute)) { + const originalUrl = node.getAttribute(attribute); + const rewrittenUrl = rewriteURL(originalUrl); + node.setAttribute(attribute, rewrittenUrl); + } + } + + return tempContainer.innerHTML; + } finally { + // Clear the flag + document[isRewritingHTMLKey] = false; + } + } + + + // Store original setters +const originalSetters = {}; + + ['innerHTML', 'outerHTML'].forEach(property => { + const descriptor = Object.getOwnPropertyDescriptor(Element.prototype, property); + if (descriptor && descriptor.set) { + originalSetters[property] = descriptor.set; + + Object.defineProperty(Element.prototype, property, { + ...descriptor, + set(value) { + const isRewritingHTMLKey = Symbol.for('isRewritingHTML'); + if (!this[isRewritingHTMLKey]) { + this[isRewritingHTMLKey] = true; + try { + // Use custom logic + descriptor.set.call(this, rewriteInnerHTML(value, elements)); + } finally { + this[isRewritingHTMLKey] = false; + } + } else { + // Use original setter in recursive call + originalSetters[property].call(this, value); + } + } + }); + } + }); + + +})(); \ No newline at end of file