url rewrite improvements
This commit is contained in:
@@ -57,7 +57,8 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
|
|||||||
SetDebugLogging(opts.Verbose).
|
SetDebugLogging(opts.Verbose).
|
||||||
SetRequestModifications(
|
SetRequestModifications(
|
||||||
rx.DeleteOutgoingCookies(),
|
rx.DeleteOutgoingCookies(),
|
||||||
rx.SpoofReferrerFromTwitterPost(),
|
//rx.RequestArchiveIs(),
|
||||||
|
rx.MasqueradeAsGoogleBot(),
|
||||||
).
|
).
|
||||||
AddResponseModifications(
|
AddResponseModifications(
|
||||||
tx.DeleteIncomingCookies(),
|
tx.DeleteIncomingCookies(),
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
|
|||||||
}
|
}
|
||||||
|
|
||||||
if realUrl.Scheme == "" || realUrl.Host == "" {
|
if realUrl.Scheme == "" || realUrl.Host == "" {
|
||||||
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl)
|
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer.String(), relativeUrl.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
|
log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
|
||||||
|
|||||||
@@ -146,8 +146,8 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
|||||||
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
||||||
if isHeadToken {
|
if isHeadToken {
|
||||||
params := map[string]string{
|
params := map[string]string{
|
||||||
"PROXY_ORIGIN_INJECT_FROM_GOLANG": r.proxyURL,
|
"R_PROXYURL": r.proxyURL,
|
||||||
"ORIGIN_INJECT_FROM_GOLANG": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
|
"R_BASEURL": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
|
||||||
}
|
}
|
||||||
injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params)
|
injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params)
|
||||||
}
|
}
|
||||||
@@ -192,8 +192,10 @@ func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
|
|||||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
// doublecheck this is a valid relative URL
|
// doublecheck this is a valid relative URL
|
||||||
|
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -251,37 +253,53 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||||
for i, src := range strings.Split(attr.Val, ",") {
|
var srcSetBuilder strings.Builder
|
||||||
src = strings.Trim(src, " ")
|
srcSetItems := strings.Split(attr.Val, ",")
|
||||||
for j, s := range strings.Split(src, " ") {
|
|
||||||
s = strings.Trim(s, " ")
|
|
||||||
if j == 0 {
|
|
||||||
f := &html.Attribute{Val: s, Key: attr.Key}
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(s, "//"):
|
|
||||||
handleProtocolRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(s, "/"):
|
|
||||||
handleRootRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
|
|
||||||
handleAbsolutePath(f, baseURL)
|
|
||||||
default:
|
|
||||||
handleDocumentRelativePath(f, baseURL)
|
|
||||||
}
|
|
||||||
s = f.Val
|
|
||||||
}
|
|
||||||
if i == 0 && j == 0 {
|
|
||||||
attr.Val = s
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf("%s,", attr.Val)
|
|
||||||
}
|
|
||||||
attr.Val = strings.TrimSuffix(attr.Val, ",")
|
|
||||||
|
|
||||||
|
for i, srcItem := range srcSetItems {
|
||||||
|
srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
|
||||||
|
|
||||||
|
if len(srcParts) == 0 {
|
||||||
|
continue // skip empty items
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process URL part
|
||||||
|
urlPart := processURLPart(srcParts[0], baseURL)
|
||||||
|
|
||||||
|
// First srcset item without a descriptor
|
||||||
|
if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
|
||||||
|
srcSetBuilder.WriteString(urlPart)
|
||||||
|
} else {
|
||||||
|
srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
|
||||||
|
}
|
||||||
|
|
||||||
|
if i < len(srcSetItems)-1 {
|
||||||
|
srcSetBuilder.WriteString(",") // Add comma for all but last item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
attr.Val = srcSetBuilder.String()
|
||||||
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only for srcset
|
||||||
|
func processURLPart(urlPart string, baseURL *url.URL) string {
|
||||||
|
f := &html.Attribute{Val: urlPart, Key: "src"}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(urlPart, "//"):
|
||||||
|
handleProtocolRelativePath(f, baseURL)
|
||||||
|
case strings.HasPrefix(urlPart, "/"):
|
||||||
|
handleRootRelativePath(f, baseURL)
|
||||||
|
case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"):
|
||||||
|
handleAbsolutePath(f, baseURL)
|
||||||
|
default:
|
||||||
|
handleDocumentRelativePath(f, baseURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.Val
|
||||||
|
}
|
||||||
|
|
||||||
func isBlackedlistedScheme(url string) bool {
|
func isBlackedlistedScheme(url string) bool {
|
||||||
spl := strings.Split(url, ":")
|
spl := strings.Split(url, ":")
|
||||||
if len(spl) == 0 {
|
if len(spl) == 0 {
|
||||||
|
|||||||
@@ -20,21 +20,28 @@
|
|||||||
if (!url) return url
|
if (!url) return url
|
||||||
let isStr = (typeof url.startsWith === 'function')
|
let isStr = (typeof url.startsWith === 'function')
|
||||||
if (!isStr) return url
|
if (!isStr) return url
|
||||||
// don't rewrite invalid URIs
|
|
||||||
try { new URL(url) } catch { return url }
|
|
||||||
|
|
||||||
// don't rewrite special URIs
|
// don't rewrite special URIs
|
||||||
if (blacklistedSchemes.includes(url)) return url;
|
if (blacklistedSchemes.includes(url)) return url;
|
||||||
|
|
||||||
// don't double rewrite
|
|
||||||
//const proxyOrigin = globalThis.window.location.origin;
|
//const proxyOrigin = globalThis.window.location.origin;
|
||||||
const proxyOrigin = `PROXY_ORIGIN_INJECT_FROM_GOLANG`;
|
const proxyOrigin = "R_PROXYURL";
|
||||||
|
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||||
|
const origin = "R_BASEURL";
|
||||||
|
|
||||||
|
// don't rewrite invalid URIs
|
||||||
|
try { new URL(url, origin) } catch { return url }
|
||||||
|
|
||||||
|
// don't double rewrite
|
||||||
if (url.startsWith(proxyOrigin)) return url;
|
if (url.startsWith(proxyOrigin)) return url;
|
||||||
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
||||||
if (url.startsWith(`/${origin}`)) return url;
|
if (url.startsWith(`/${origin}`)) return url;
|
||||||
|
if (url.startsWith(`/http://`)) return url;
|
||||||
|
if (url.startsWith(`/https://`)) return url;
|
||||||
|
if (url.startsWith(`/http%3A%2F%2F`)) return url;
|
||||||
|
if (url.startsWith(`/https%3A%2F%2F`)) return url;
|
||||||
|
if (url.startsWith(`/%2Fhttp`)) return url;
|
||||||
|
|
||||||
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
|
||||||
const origin = `ORIGIN_INJECT_FROM_GOLANG`;
|
|
||||||
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
||||||
|
|
||||||
if (url.startsWith("//")) {
|
if (url.startsWith("//")) {
|
||||||
@@ -50,27 +57,62 @@
|
|||||||
return url;
|
return url;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked
|
||||||
|
function hideMonkeyPatch(objectOrName, method, originalToString) {
|
||||||
|
let obj;
|
||||||
|
let isGlobalFunction = false;
|
||||||
|
|
||||||
|
if (typeof objectOrName === 'string') {
|
||||||
|
obj = globalThis[objectOrName];
|
||||||
|
isGlobalFunction = (typeof obj === 'function') && (method === objectOrName);
|
||||||
|
} else {
|
||||||
|
obj = objectOrName;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isGlobalFunction) {
|
||||||
|
const originalFunction = obj;
|
||||||
|
globalThis[objectOrName] = function(...args) {
|
||||||
|
return originalFunction.apply(this, args);
|
||||||
|
};
|
||||||
|
globalThis[objectOrName].toString = () => originalToString;
|
||||||
|
} else if (obj && typeof obj[method] === 'function') {
|
||||||
|
const originalMethod = obj[method];
|
||||||
|
obj[method] = function(...args) {
|
||||||
|
return originalMethod.apply(this, args);
|
||||||
|
};
|
||||||
|
obj[method].toString = () => originalToString;
|
||||||
|
} else {
|
||||||
|
console.warn(`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// monkey patch fetch
|
// monkey patch fetch
|
||||||
const oldFetch = globalThis.fetch;
|
const oldFetch = fetch;
|
||||||
globalThis.fetch = async (url, init) => {
|
fetch = async (url, init) => {
|
||||||
return oldFetch(rewriteURL(url), init)
|
return oldFetch(rewriteURL(url), init)
|
||||||
}
|
}
|
||||||
|
hideMonkeyPatch('fetch', 'fetch', 'function fetch() { [native code] }')
|
||||||
|
|
||||||
// monkey patch xmlhttprequest
|
// monkey patch xmlhttprequest
|
||||||
const oldOpen = XMLHttpRequest.prototype.open;
|
const oldOpen = XMLHttpRequest.prototype.open;
|
||||||
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
||||||
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||||
};
|
};
|
||||||
|
hideMonkeyPatch(XMLHttpRequest.prototype, 'open', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||||
|
|
||||||
const oldSend = XMLHttpRequest.prototype.send;
|
const oldSend = XMLHttpRequest.prototype.send;
|
||||||
XMLHttpRequest.prototype.send = function(method, url) {
|
XMLHttpRequest.prototype.send = function(method, url) {
|
||||||
return oldSend.call(this, method, rewriteURL(url));
|
return oldSend.call(this, method, rewriteURL(url));
|
||||||
};
|
};
|
||||||
|
hideMonkeyPatch(XMLHttpRequest.prototype, 'send', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||||
|
|
||||||
|
|
||||||
// monkey patch service worker registration
|
// monkey patch service worker registration
|
||||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||||
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
|
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
|
||||||
return oldRegister.call(this, rewriteURL(scriptURL), options)
|
return oldRegister.call(this, rewriteURL(scriptURL), options)
|
||||||
}
|
}
|
||||||
|
hideMonkeyPatch(ServiceWorkerContainer.prototype, 'register', 'function register() { [native code] }')
|
||||||
|
|
||||||
// monkey patch URL.toString() method
|
// monkey patch URL.toString() method
|
||||||
const oldToString = URL.prototype.toString
|
const oldToString = URL.prototype.toString
|
||||||
@@ -78,6 +120,7 @@
|
|||||||
let originalURL = oldToString.call(this)
|
let originalURL = oldToString.call(this)
|
||||||
return rewriteURL(originalURL)
|
return rewriteURL(originalURL)
|
||||||
}
|
}
|
||||||
|
hideMonkeyPatch(URL.prototype, 'toString', 'function toString() { [native code] }')
|
||||||
|
|
||||||
// monkey patch URL.toJSON() method
|
// monkey patch URL.toJSON() method
|
||||||
const oldToJson = URL.prototype.toString
|
const oldToJson = URL.prototype.toString
|
||||||
@@ -85,6 +128,7 @@
|
|||||||
let originalURL = oldToJson.call(this)
|
let originalURL = oldToJson.call(this)
|
||||||
return rewriteURL(originalURL)
|
return rewriteURL(originalURL)
|
||||||
}
|
}
|
||||||
|
hideMonkeyPatch(URL.prototype, 'toString', 'function toJSON() { [native code] }')
|
||||||
|
|
||||||
// Monkey patch URL.href getter and setter
|
// Monkey patch URL.href getter and setter
|
||||||
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
|
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
|
||||||
@@ -98,6 +142,9 @@
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// TODO: do one more pass of this by manually traversing the DOM
|
||||||
|
// AFTER all the JS and page has loaded just in case
|
||||||
|
|
||||||
// Monkey patch setter
|
// Monkey patch setter
|
||||||
const elements = [
|
const elements = [
|
||||||
{ tag: 'a', attribute: 'href' },
|
{ tag: 'a', attribute: 'href' },
|
||||||
@@ -166,4 +213,75 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
// sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters.
|
||||||
|
// in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could
|
||||||
|
// contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links.
|
||||||
|
function rewriteInnerHTML(html, elements) {
|
||||||
|
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||||
|
|
||||||
|
// Check if already processing
|
||||||
|
if (document[isRewritingHTMLKey]) {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tempContainer = document.createElement('div');
|
||||||
|
document[isRewritingHTMLKey] = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
tempContainer.innerHTML = html;
|
||||||
|
|
||||||
|
// Create a map for quick lookup
|
||||||
|
const elementsMap = new Map(elements.map(e => [e.tag, e.attribute]));
|
||||||
|
|
||||||
|
// Loop-based DOM traversal
|
||||||
|
const nodes = [...tempContainer.querySelectorAll('*')];
|
||||||
|
for (const node of nodes) {
|
||||||
|
const attribute = elementsMap.get(node.tagName.toLowerCase());
|
||||||
|
if (attribute && node.hasAttribute(attribute)) {
|
||||||
|
const originalUrl = node.getAttribute(attribute);
|
||||||
|
const rewrittenUrl = rewriteURL(originalUrl);
|
||||||
|
node.setAttribute(attribute, rewrittenUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempContainer.innerHTML;
|
||||||
|
} finally {
|
||||||
|
// Clear the flag
|
||||||
|
document[isRewritingHTMLKey] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Store original setters
|
||||||
|
const originalSetters = {};
|
||||||
|
|
||||||
|
['innerHTML', 'outerHTML'].forEach(property => {
|
||||||
|
const descriptor = Object.getOwnPropertyDescriptor(Element.prototype, property);
|
||||||
|
if (descriptor && descriptor.set) {
|
||||||
|
originalSetters[property] = descriptor.set;
|
||||||
|
|
||||||
|
Object.defineProperty(Element.prototype, property, {
|
||||||
|
...descriptor,
|
||||||
|
set(value) {
|
||||||
|
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||||
|
if (!this[isRewritingHTMLKey]) {
|
||||||
|
this[isRewritingHTMLKey] = true;
|
||||||
|
try {
|
||||||
|
// Use custom logic
|
||||||
|
descriptor.set.call(this, rewriteInnerHTML(value, elements));
|
||||||
|
} finally {
|
||||||
|
this[isRewritingHTMLKey] = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Use original setter in recursive call
|
||||||
|
originalSetters[property].call(this, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
})();
|
})();
|
||||||
Reference in New Issue
Block a user