url rewriter tweaks

This commit is contained in:
Kevin Pham
2023-11-21 14:10:37 -06:00
parent 543192afbe
commit dab77d786f
6 changed files with 178 additions and 23 deletions

View File

@@ -53,16 +53,18 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
proxychain := proxychain. proxychain := proxychain.
NewProxyChain(). NewProxyChain().
SetFiberCtx(c).
SetDebugLogging(opts.Verbose). SetDebugLogging(opts.Verbose).
SetRequestModifications( SetRequestModifications(
rx.DeleteOutgoingCookies(), rx.DeleteOutgoingCookies(),
//rx.RequestArchiveIs(),
). ).
AddResponseModifications( AddResponseModifications(
tx.DeleteIncomingCookies(), tx.DeleteIncomingCookies(),
tx.RewriteHTMLResourceURLs(), tx.RewriteHTMLResourceURLs(),
) ).
return proxychain.SetFiberCtx(c).Execute() Execute()
return proxychain
} }
} }

View File

@@ -251,7 +251,7 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl) return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl)
} }
log.Printf("'%s' -> '%s'\n", relativeUrl.String(), realUrl.String()) log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
return &url.URL{ return &url.URL{
Scheme: referer.Scheme, Scheme: referer.Scheme,
@@ -264,10 +264,19 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
// extractUrl extracts a URL from the request ctx. If the URL in the request // extractUrl extracts a URL from the request ctx. If the URL in the request
// is a relative path, it reconstructs the full URL using the referer header. // is a relative path, it reconstructs the full URL using the referer header.
func (chain *ProxyChain) extractUrl() (*url.URL, error) { func (chain *ProxyChain) extractUrl() (*url.URL, error) {
// try to extract url-encoded reqUrl := chain.Context.Params("*")
reqUrl, err := url.QueryUnescape(chain.Context.Params("*"))
if err != nil { // sometimes client requests doubleroot '//'
reqUrl = chain.Context.Params("*") // fallback // there is a bug somewhere else, but this is a workaround until we find it
if strings.HasPrefix(reqUrl, "/") || strings.HasPrefix(reqUrl, `%2F`) {
reqUrl = strings.TrimPrefix(reqUrl, "/")
reqUrl = strings.TrimPrefix(reqUrl, `%2F`)
}
// unescape url query
uReqUrl, err := url.QueryUnescape(reqUrl)
if err == nil {
reqUrl = uReqUrl
} }
urlQuery, err := url.Parse(reqUrl) urlQuery, err := url.Parse(reqUrl)

View File

@@ -36,7 +36,7 @@ func resolveWithGoogleDoH(host string) (string, error) {
return "", fmt.Errorf("no DoH DNS record found for %s", host) return "", fmt.Errorf("no DoH DNS record found for %s", host)
} }
// ResolveWithGoogleDoH modifies a ProxyChain's client to make the request but resolve the URL // ResolveWithGoogleDoH modifies a ProxyChain's client to make the request by resolving the URL
// using Google's DNS over HTTPs service // using Google's DNS over HTTPs service
func ResolveWithGoogleDoH() proxychain.RequestModification { func ResolveWithGoogleDoH() proxychain.RequestModification {
return func(px *proxychain.ProxyChain) error { return func(px *proxychain.ProxyChain) error {

View File

@@ -13,11 +13,12 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
// Define list of HTML attributes to try to rewrite var attributesToRewrite map[string]bool
var AttributesToRewrite map[string]bool var schemeBlacklist map[string]bool
func init() { func init() {
AttributesToRewrite = map[string]bool{ // Define list of HTML attributes to try to rewrite
attributesToRewrite = map[string]bool{
"src": true, "src": true,
"href": true, "href": true,
"action": true, "action": true,
@@ -35,6 +36,23 @@ func init() {
"icon": true, "icon": true,
"pluginspage": true, "pluginspage": true,
} }
// define URIs to NOT rewrite
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
schemeBlacklist = map[string]bool {
"data": true,
"tel": true,
"mailto": true,
"file": true,
"blob": true,
"javascript": true,
"about": true,
"magnet": true,
"ws": true,
"wss": true,
"ftp": true,
}
} }
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL. // HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
@@ -222,17 +240,54 @@ func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val) log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
} }
func isBlackedlistedScheme(url string) bool {
spl := strings.Split(url, ":")
if len(spl) == 0 {
return false
}
scheme := spl[0]
return schemeBlacklist[scheme]
}
func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) { func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
for i := range token.Attr { for i := range token.Attr {
attr := &token.Attr[i] attr := &token.Attr[i]
switch { switch {
// don't touch attributes except for the ones we defined // don't touch attributes except for the ones we defined
case !AttributesToRewrite[attr.Key]: case !attributesToRewrite[attr.Key]:
continue
// don't rewrite special URIs that don't make network requests
case isBlackedlistedScheme(attr.Val):
continue continue
// don't double-overwrite the url // don't double-overwrite the url
case strings.HasPrefix(attr.Val, proxyURL): case strings.HasPrefix(attr.Val, proxyURL):
continue continue
// don't overwrite special URIs
case strings.HasPrefix(attr.Val, "data:"):
continue
case strings.HasPrefix(attr.Val, "ftp:"):
continue
case strings.HasPrefix(attr.Val, "tel:"):
continue
case strings.HasPrefix(attr.Val, "javascript:"):
continue
case strings.HasPrefix(attr.Val, "file:"):
continue
case strings.HasPrefix(attr.Val, "ftp:"):
continue
case strings.HasPrefix(attr.Val, "mailto:"):
continue
case strings.HasPrefix(attr.Val, "blob:"):
continue
case strings.HasPrefix(attr.Val, "about:"):
continue
case strings.HasPrefix(attr.Val, "magnet:"):
continue
case strings.HasPrefix(attr.Val, "ws:"):
continue
case strings.HasPrefix(attr.Val, "wss:"):
continue
case attr.Key == "srcset": case attr.Key == "srcset":
handleSrcSet(attr, baseURL) handleSrcSet(attr, baseURL)
continue continue

View File

@@ -2,22 +2,47 @@
// Also overrides the attribute setter prototype to modify the request URLs // Also overrides the attribute setter prototype to modify the request URLs
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js") // fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
(() => { (() => {
const blacklistedSchemes = [
"ftp:",
"mailto:",
"tel:",
"file:",
"blob:",
"javascript:",
"about:",
"magnet:",
"ws:",
"wss:",
];
function rewriteURL(url) { function rewriteURL(url) {
oldUrl = url const oldUrl = url
if (!url) return url if (!url) return url
// don't rewrite invalid URIs
try { new URL(url) } catch { return url }
proxyOrigin = globalThis.window.location.origin // don't rewrite special URIs
if (url.startsWith(proxyOrigin)) return url if (blacklistedSchemes.includes(url)) return url;
// don't double rewrite
const proxyOrigin = globalThis.window.location.origin;
if (url.startsWith(proxyOrigin)) return url;
if (url.startsWith(`/${proxyOrigin}`)) return url;
if (url.startsWith(`/${origin}`)) return url;
const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
const origin = (new URL(decodeURI(globalThis.window.location.pathname.substring(1)))).origin
if (url.startsWith("//")) { if (url.startsWith("//")) {
url = `/${origin}/${encodeURIComponent(url.substring(2))}`; url = `/${origin}/${encodeURIComponent(url.substring(2))}`;
} else if (url.startsWith("/")) { } else if (url.startsWith("/")) {
url = `/${origin}/${encodeURIComponent(url.substring(1))}`; url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
} else if (url.startsWith(origin)) {
url = `/${encodeURIComponent(url)}`
} else if (url.startsWith("http://") || url.startsWith("https://")) { } else if (url.startsWith("http://") || url.startsWith("https://")) {
url = `/${origin}/${encodeURIComponent(url)}`; url = `/${proxyOrigin}/${encodeURIComponent(url)}`;
} }
console.log(`rewrite JS URL: ${oldUrl} -> ${url}`) console.log(`proxychain: rewrite JS URL: ${oldUrl} -> ${url}`)
return url; return url;
}; };
@@ -43,21 +68,62 @@
return oldRegister.call(this, rewriteURL(scriptURL), options) return oldRegister.call(this, rewriteURL(scriptURL), options)
} }
// Monkey patch setter methods // monkey patch URL.toString() method
const oldToString = URL.prototype.toString
URL.prototype.toString = function() {
let originalURL = oldToString.call(this)
return rewriteURL(originalURL)
}
// monkey patch URL.toJSON() method
const oldToJson = URL.prototype.toString
URL.prototype.toString = function() {
let originalURL = oldToJson.call(this)
return rewriteURL(originalURL)
}
// Monkey patch URL.href getter and setter
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
Object.defineProperty(URL.prototype, 'href', {
get: function() {
let originalHref = originalHrefDescriptor.get.call(this);
return rewriteURL(originalHref)
},
set: function(newValue) {
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
}
});
// Monkey patch setter
const elements = [ const elements = [
{ tag: 'a', attribute: 'href' }, { tag: 'a', attribute: 'href' },
{ tag: 'img', attribute: 'src' }, { tag: 'img', attribute: 'src' },
// { tag: 'img', attribute: 'srcset' }, // TODO: handle srcset
{ tag: 'script', attribute: 'src' }, { tag: 'script', attribute: 'src' },
{ tag: 'link', attribute: 'href' }, { tag: 'link', attribute: 'href' },
{ tag: 'link', attribute: 'icon' },
{ tag: 'iframe', attribute: 'src' }, { tag: 'iframe', attribute: 'src' },
{ tag: 'audio', attribute: 'src' }, { tag: 'audio', attribute: 'src' },
{ tag: 'video', attribute: 'src' }, { tag: 'video', attribute: 'src' },
{ tag: 'source', attribute: 'src' }, { tag: 'source', attribute: 'src' },
// { tag: 'source', attribute: 'srcset' }, // TODO: handle srcset
{ tag: 'embed', attribute: 'src' }, { tag: 'embed', attribute: 'src' },
{ tag: 'embed', attribute: 'pluginspage' },
{ tag: 'html', attribute: 'manifest' },
{ tag: 'object', attribute: 'src' }, { tag: 'object', attribute: 'src' },
{ tag: 'input', attribute: 'src' }, { tag: 'input', attribute: 'src' },
{ tag: 'track', attribute: 'src' }, { tag: 'track', attribute: 'src' },
{ tag: 'form', attribute: 'action' }, { tag: 'form', attribute: 'action' },
{ tag: 'area', attribute: 'href' },
{ tag: 'base', attribute: 'href' },
{ tag: 'blockquote', attribute: 'cite' },
{ tag: 'del', attribute: 'cite' },
{ tag: 'ins', attribute: 'cite' },
{ tag: 'q', attribute: 'cite' },
{ tag: 'button', attribute: 'formaction' },
{ tag: 'input', attribute: 'formaction' },
{ tag: 'meta', attribute: 'content' },
{ tag: 'object', attribute: 'data' },
]; ];
elements.forEach(({ tag, attribute }) => { elements.forEach(({ tag, attribute }) => {
@@ -67,10 +133,33 @@
Object.defineProperty(proto, attribute, { Object.defineProperty(proto, attribute, {
...descriptor, ...descriptor,
set(value) { set(value) {
return descriptor.set.call(this, rewriteURL(value)); // calling rewriteURL will end up calling a setter for href,
// leading to a recusive loop and a Maximum call stack size exceeded
// error, so we guard against this with a local semaphore flag
const isRewritingSetKey = Symbol.for('isRewritingSet');
if (!this[isRewritingSetKey]) {
this[isRewritingSetKey] = true;
descriptor.set.call(this, rewriteURL(value));
//descriptor.set.call(this, value);
this[isRewritingSetKey] = false;
} else {
// Directly set the value without rewriting
descriptor.set.call(this, value);
}
},
get() {
const isRewritingGetKey = Symbol.for('isRewritingGet');
if (!this[isRewritingGetKey]) {
this[isRewritingGetKey] = true;
let oldURL = descriptor.get.call(this);
let newURL = rewriteURL(oldURL);
this[isRewritingGetKey] = false;
return newURL
} else {
return descriptor.get.call(this);
}
} }
}); });
} }
}); });
})(); })();