handle srcset resource URL rewrites; monkey patch JS for URL rewrites

This commit is contained in:
Kevin Pham
2023-11-20 23:42:50 -06:00
parent 6222476684
commit 79a229f28c
4 changed files with 195 additions and 82 deletions

View File

@@ -56,6 +56,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetDebugLogging(opts.Verbose).
SetRequestModifications(
rx.DeleteOutgoingCookies(),
//rx.RequestArchiveIs(),
).
AddResponseModifications(
tx.DeleteIncomingCookies(),

View File

@@ -26,6 +26,7 @@ func masqueradeAsTrustedBot(botUA string, botIP string) proxychain.RequestModifi
SpoofUserAgent(botUA),
SpoofXForwardedFor(botIP),
SpoofReferrer(""),
SpoofOrigin(""),
)
return nil
}

View File

@@ -0,0 +1,62 @@
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
// Also overrides the attribute setter prototype to modify the request URLs
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
(() => {
function rewriteURL(url) {
if (!url) return url
if (url.startsWith(window.location.origin)) return url
if (url.startsWith("//")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(2))}`;
} else if (url.startsWith("/")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(1))}`;
} else if (url.startsWith("http://") || url.startsWith("https://")) {
url = `${window.location.origin}/${encodeURIComponent(url)}`;
}
return url;
};
// monkey patch fetch
const oldFetch = globalThis.fetch ;
globalThis.fetch = async (url, init) => {
return oldFetch(rewriteURL(url), init)
}
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
};
// Monkey patch setter methods
const elements = [
{ tag: 'a', attribute: 'href' },
{ tag: 'img', attribute: 'src' },
{ tag: 'script', attribute: 'src' },
{ tag: 'link', attribute: 'href' },
{ tag: 'iframe', attribute: 'src' },
{ tag: 'audio', attribute: 'src' },
{ tag: 'video', attribute: 'src' },
{ tag: 'source', attribute: 'src' },
{ tag: 'embed', attribute: 'src' },
{ tag: 'object', attribute: 'src' },
{ tag: 'input', attribute: 'src' },
{ tag: 'track', attribute: 'src' },
{ tag: 'form', attribute: 'action' },
];
elements.forEach(({ tag, attribute }) => {
const proto = document.createElement(tag).constructor.prototype;
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
if (descriptor && descriptor.set) {
Object.defineProperty(proto, attribute, {
...descriptor,
set(value) {
return descriptor.set.call(this, rewriteURL(value));
}
});
}
});
})();

View File

@@ -5,7 +5,6 @@ import (
"fmt"
"io"
"ladder/proxychain"
"log"
"net/url"
"strings"
@@ -17,24 +16,22 @@ var AttributesToRewrite map[string]bool
func init() {
AttributesToRewrite = map[string]bool{
"src": true,
"href": true,
/*
"action": true,
"srcset": true,
"poster": true,
"data": true,
"cite": true,
"formaction": true,
"background": true,
"usemap": true,
"longdesc": true,
"manifest": true,
"archive": true,
"codebase": true,
"icon": true,
"pluginspage": true,
*/
"src": true,
"href": true,
"action": true,
"srcset": true, // TODO: fix
"poster": true,
"data": true,
"cite": true,
"formaction": true,
"background": true,
"usemap": true,
"longdesc": true,
"manifest": true,
"archive": true,
"codebase": true,
"icon": true,
"pluginspage": true,
}
}
@@ -42,7 +39,7 @@ func init() {
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
type HTMLResourceURLRewriter struct {
baseURL string // eg: https://proxiedsite.com (note, no trailing '/')
baseURL *url.URL
tokenizer *html.Tokenizer
currentToken html.Token
tokenBuffer *bytes.Buffer
@@ -52,7 +49,7 @@ type HTMLResourceURLRewriter struct {
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
// It initializes the tokenizer with the provided source and sets the proxy URL.
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter {
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: html.Token{},
@@ -110,64 +107,124 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
return n, err
}
func patchResourceURL(token *html.Token, baseURL string) {
// Root-relative URLs: These are relative to the root path and start with a "/".
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
// doublecheck this is a valid relative URL
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
if err != nil {
return
}
//log.Printf("BASEURL patch: %s\n", baseURL)
attr.Val = fmt.Sprintf(
"/%s://%s/%s",
baseURL.Scheme,
baseURL.Host,
strings.TrimPrefix(attr.Val, "/"),
)
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = fmt.Sprintf(
"%s://%s/%s%s",
baseURL.Scheme,
strings.Trim(baseURL.Host, "/"),
strings.Trim(baseURL.RawPath, "/"),
strings.Trim(attr.Val, "/"),
)
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = strings.TrimPrefix(attr.Val, "/")
handleRootRelativePath(attr, baseURL)
//log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
// check if valid URL
u, err := url.Parse(attr.Val)
if err != nil {
return
}
if !(u.Scheme == "http" || u.Scheme == "https") {
return
}
attr.Val = fmt.Sprintf(
"/%s",
url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
),
)
//log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
for i, src := range strings.Split(attr.Val, ",") {
src = strings.Trim(src, " ")
for j, s := range strings.Split(src, " ") {
s = strings.Trim(s, " ")
if j == 0 {
f := &html.Attribute{Val: s, Key: attr.Key}
switch {
case strings.HasPrefix(s, "//"):
handleProtocolRelativePath(f, baseURL)
case strings.HasPrefix(s, "/"):
handleRootRelativePath(f, baseURL)
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
handleAbsolutePath(f, baseURL)
default:
handleDocumentRelativePath(f, baseURL)
}
s = f.Val
}
if i == 0 && j == 0 {
attr.Val = s
continue
}
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
}
attr.Val = fmt.Sprintf("%s,", attr.Val)
}
attr.Val = strings.TrimSuffix(attr.Val, ",")
}
// TODO: figure out how to handle these
// srcset
func patchResourceURL(token *html.Token, baseURL *url.URL) {
for i := range token.Attr {
attr := &token.Attr[i]
switch {
// dont touch attributes except for the ones we defined
_, exists := AttributesToRewrite[attr.Key]
if !exists {
case !AttributesToRewrite[attr.Key]:
continue
case attr.Key == "srcset":
handleSrcSet(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "//"):
handleProtocolRelativePath(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "/"):
handleRootRelativePath(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
handleAbsolutePath(attr, baseURL)
continue
default:
handleDocumentRelativePath(attr, baseURL)
continue
}
isRelativePath := strings.HasPrefix(attr.Val, "/")
//log.Printf("PRE '%s'='%s'", attr.Key, attr.Val)
// double check if attribute is valid http URL before modifying
if isRelativePath {
_, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val))
if err != nil {
return
}
} else {
u, err := url.Parse(attr.Val)
if err != nil {
return
}
if !(u.Scheme == "http" || u.Scheme == "https") {
return
}
}
// patch relative paths
// <img src="/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
if isRelativePath {
log.Printf("BASEURL patch: %s\n", baseURL)
attr.Val = fmt.Sprintf(
"/%s/%s",
baseURL,
//url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
//),
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
continue
}
// patch absolute paths to relative path pointing to ladder proxy
// <img src="http://images.cdn.proxiedsite.com/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
//log.Printf("abolute patch: %s\n", attr.Val)
attr.Val = fmt.Sprintf(
"/%s",
//url.QueryEscape(attr.Val),
//url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
//),
//attr.Val,
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
}
@@ -199,15 +256,7 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return nil
}
// should be site being requested to proxy
baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host)
/*
log.Println("--------------------")
log.Println(baseUrl)
log.Println("--------------------")
*/
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl)
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
return nil
}
}