url rewrite improvements
This commit is contained in:
@@ -57,7 +57,8 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
|
||||
SetDebugLogging(opts.Verbose).
|
||||
SetRequestModifications(
|
||||
rx.DeleteOutgoingCookies(),
|
||||
rx.SpoofReferrerFromTwitterPost(),
|
||||
//rx.RequestArchiveIs(),
|
||||
rx.MasqueradeAsGoogleBot(),
|
||||
).
|
||||
AddResponseModifications(
|
||||
tx.DeleteIncomingCookies(),
|
||||
|
||||
@@ -248,7 +248,7 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
|
||||
}
|
||||
|
||||
if realUrl.Scheme == "" || realUrl.Host == "" {
|
||||
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl)
|
||||
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer.String(), relativeUrl.String())
|
||||
}
|
||||
|
||||
log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
|
||||
|
||||
@@ -146,8 +146,8 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
||||
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
||||
if isHeadToken {
|
||||
params := map[string]string{
|
||||
"PROXY_ORIGIN_INJECT_FROM_GOLANG": r.proxyURL,
|
||||
"ORIGIN_INJECT_FROM_GOLANG": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
|
||||
"R_PROXYURL": r.proxyURL,
|
||||
"R_BASEURL": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
|
||||
}
|
||||
injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params)
|
||||
}
|
||||
@@ -192,8 +192,10 @@ func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
|
||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// doublecheck this is a valid relative URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -251,37 +253,53 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
}
|
||||
|
||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||
for i, src := range strings.Split(attr.Val, ",") {
|
||||
src = strings.Trim(src, " ")
|
||||
for j, s := range strings.Split(src, " ") {
|
||||
s = strings.Trim(s, " ")
|
||||
if j == 0 {
|
||||
f := &html.Attribute{Val: s, Key: attr.Key}
|
||||
switch {
|
||||
case strings.HasPrefix(s, "//"):
|
||||
handleProtocolRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(s, "/"):
|
||||
handleRootRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
|
||||
handleAbsolutePath(f, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(f, baseURL)
|
||||
}
|
||||
s = f.Val
|
||||
}
|
||||
if i == 0 && j == 0 {
|
||||
attr.Val = s
|
||||
continue
|
||||
}
|
||||
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
|
||||
}
|
||||
attr.Val = fmt.Sprintf("%s,", attr.Val)
|
||||
}
|
||||
attr.Val = strings.TrimSuffix(attr.Val, ",")
|
||||
var srcSetBuilder strings.Builder
|
||||
srcSetItems := strings.Split(attr.Val, ",")
|
||||
|
||||
for i, srcItem := range srcSetItems {
|
||||
srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
|
||||
|
||||
if len(srcParts) == 0 {
|
||||
continue // skip empty items
|
||||
}
|
||||
|
||||
// Process URL part
|
||||
urlPart := processURLPart(srcParts[0], baseURL)
|
||||
|
||||
// First srcset item without a descriptor
|
||||
if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
|
||||
srcSetBuilder.WriteString(urlPart)
|
||||
} else {
|
||||
srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
|
||||
}
|
||||
|
||||
if i < len(srcSetItems)-1 {
|
||||
srcSetBuilder.WriteString(",") // Add comma for all but last item
|
||||
}
|
||||
}
|
||||
|
||||
attr.Val = srcSetBuilder.String()
|
||||
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// only for srcset
|
||||
func processURLPart(urlPart string, baseURL *url.URL) string {
|
||||
f := &html.Attribute{Val: urlPart, Key: "src"}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(urlPart, "//"):
|
||||
handleProtocolRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(urlPart, "/"):
|
||||
handleRootRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"):
|
||||
handleAbsolutePath(f, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(f, baseURL)
|
||||
}
|
||||
|
||||
return f.Val
|
||||
}
|
||||
|
||||
func isBlackedlistedScheme(url string) bool {
|
||||
spl := strings.Split(url, ":")
|
||||
if len(spl) == 0 {
|
||||
|
||||
@@ -20,21 +20,28 @@
|
||||
if (!url) return url
|
||||
let isStr = (typeof url.startsWith === 'function')
|
||||
if (!isStr) return url
|
||||
// don't rewrite invalid URIs
|
||||
try { new URL(url) } catch { return url }
|
||||
|
||||
// don't rewrite special URIs
|
||||
if (blacklistedSchemes.includes(url)) return url;
|
||||
|
||||
// don't double rewrite
|
||||
//const proxyOrigin = globalThis.window.location.origin;
|
||||
const proxyOrigin = `PROXY_ORIGIN_INJECT_FROM_GOLANG`;
|
||||
const proxyOrigin = "R_PROXYURL";
|
||||
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||
const origin = "R_BASEURL";
|
||||
|
||||
// don't rewrite invalid URIs
|
||||
try { new URL(url, origin) } catch { return url }
|
||||
|
||||
// don't double rewrite
|
||||
if (url.startsWith(proxyOrigin)) return url;
|
||||
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
||||
if (url.startsWith(`/${origin}`)) return url;
|
||||
if (url.startsWith(`/http://`)) return url;
|
||||
if (url.startsWith(`/https://`)) return url;
|
||||
if (url.startsWith(`/http%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/https%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/%2Fhttp`)) return url;
|
||||
|
||||
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||
const origin = `ORIGIN_INJECT_FROM_GOLANG`;
|
||||
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
||||
|
||||
if (url.startsWith("//")) {
|
||||
@@ -50,27 +57,62 @@
|
||||
return url;
|
||||
};
|
||||
|
||||
// sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked
|
||||
function hideMonkeyPatch(objectOrName, method, originalToString) {
|
||||
let obj;
|
||||
let isGlobalFunction = false;
|
||||
|
||||
if (typeof objectOrName === 'string') {
|
||||
obj = globalThis[objectOrName];
|
||||
isGlobalFunction = (typeof obj === 'function') && (method === objectOrName);
|
||||
} else {
|
||||
obj = objectOrName;
|
||||
}
|
||||
|
||||
if (isGlobalFunction) {
|
||||
const originalFunction = obj;
|
||||
globalThis[objectOrName] = function(...args) {
|
||||
return originalFunction.apply(this, args);
|
||||
};
|
||||
globalThis[objectOrName].toString = () => originalToString;
|
||||
} else if (obj && typeof obj[method] === 'function') {
|
||||
const originalMethod = obj[method];
|
||||
obj[method] = function(...args) {
|
||||
return originalMethod.apply(this, args);
|
||||
};
|
||||
obj[method].toString = () => originalToString;
|
||||
} else {
|
||||
console.warn(`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`);
|
||||
}
|
||||
}
|
||||
|
||||
// monkey patch fetch
|
||||
const oldFetch = globalThis.fetch;
|
||||
globalThis.fetch = async (url, init) => {
|
||||
const oldFetch = fetch;
|
||||
fetch = async (url, init) => {
|
||||
return oldFetch(rewriteURL(url), init)
|
||||
}
|
||||
hideMonkeyPatch('fetch', 'fetch', 'function fetch() { [native code] }')
|
||||
|
||||
// monkey patch xmlhttprequest
|
||||
const oldOpen = XMLHttpRequest.prototype.open;
|
||||
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
||||
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||
};
|
||||
hideMonkeyPatch(XMLHttpRequest.prototype, 'open', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||
|
||||
const oldSend = XMLHttpRequest.prototype.send;
|
||||
XMLHttpRequest.prototype.send = function(method, url) {
|
||||
return oldSend.call(this, method, rewriteURL(url));
|
||||
};
|
||||
hideMonkeyPatch(XMLHttpRequest.prototype, 'send', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||
|
||||
|
||||
// monkey patch service worker registration
|
||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
|
||||
return oldRegister.call(this, rewriteURL(scriptURL), options)
|
||||
}
|
||||
hideMonkeyPatch(ServiceWorkerContainer.prototype, 'register', 'function register() { [native code] }')
|
||||
|
||||
// monkey patch URL.toString() method
|
||||
const oldToString = URL.prototype.toString
|
||||
@@ -78,6 +120,7 @@
|
||||
let originalURL = oldToString.call(this)
|
||||
return rewriteURL(originalURL)
|
||||
}
|
||||
hideMonkeyPatch(URL.prototype, 'toString', 'function toString() { [native code] }')
|
||||
|
||||
// monkey patch URL.toJSON() method
|
||||
const oldToJson = URL.prototype.toString
|
||||
@@ -85,6 +128,7 @@
|
||||
let originalURL = oldToJson.call(this)
|
||||
return rewriteURL(originalURL)
|
||||
}
|
||||
hideMonkeyPatch(URL.prototype, 'toString', 'function toJSON() { [native code] }')
|
||||
|
||||
// Monkey patch URL.href getter and setter
|
||||
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
|
||||
@@ -98,6 +142,9 @@
|
||||
}
|
||||
});
|
||||
|
||||
// TODO: do one more pass of this by manually traversing the DOM
|
||||
// AFTER all the JS and page has loaded just in case
|
||||
|
||||
// Monkey patch setter
|
||||
const elements = [
|
||||
{ tag: 'a', attribute: 'href' },
|
||||
@@ -166,4 +213,75 @@
|
||||
});
|
||||
}
|
||||
});
|
||||
})();
|
||||
|
||||
|
||||
// sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters.
|
||||
// in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could
|
||||
// contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links.
|
||||
function rewriteInnerHTML(html, elements) {
|
||||
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||
|
||||
// Check if already processing
|
||||
if (document[isRewritingHTMLKey]) {
|
||||
return html;
|
||||
}
|
||||
|
||||
const tempContainer = document.createElement('div');
|
||||
document[isRewritingHTMLKey] = true;
|
||||
|
||||
try {
|
||||
tempContainer.innerHTML = html;
|
||||
|
||||
// Create a map for quick lookup
|
||||
const elementsMap = new Map(elements.map(e => [e.tag, e.attribute]));
|
||||
|
||||
// Loop-based DOM traversal
|
||||
const nodes = [...tempContainer.querySelectorAll('*')];
|
||||
for (const node of nodes) {
|
||||
const attribute = elementsMap.get(node.tagName.toLowerCase());
|
||||
if (attribute && node.hasAttribute(attribute)) {
|
||||
const originalUrl = node.getAttribute(attribute);
|
||||
const rewrittenUrl = rewriteURL(originalUrl);
|
||||
node.setAttribute(attribute, rewrittenUrl);
|
||||
}
|
||||
}
|
||||
|
||||
return tempContainer.innerHTML;
|
||||
} finally {
|
||||
// Clear the flag
|
||||
document[isRewritingHTMLKey] = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Store original setters
|
||||
const originalSetters = {};
|
||||
|
||||
['innerHTML', 'outerHTML'].forEach(property => {
|
||||
const descriptor = Object.getOwnPropertyDescriptor(Element.prototype, property);
|
||||
if (descriptor && descriptor.set) {
|
||||
originalSetters[property] = descriptor.set;
|
||||
|
||||
Object.defineProperty(Element.prototype, property, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||
if (!this[isRewritingHTMLKey]) {
|
||||
this[isRewritingHTMLKey] = true;
|
||||
try {
|
||||
// Use custom logic
|
||||
descriptor.set.call(this, rewriteInnerHTML(value, elements));
|
||||
} finally {
|
||||
this[isRewritingHTMLKey] = false;
|
||||
}
|
||||
} else {
|
||||
// Use original setter in recursive call
|
||||
originalSetters[property].call(this, value);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
})();
|
||||
Reference in New Issue
Block a user