handle srcset resource URL rewrites; monkey patch JS for URL rewrites

This commit is contained in:
Kevin Pham
2023-11-20 23:42:50 -06:00
parent 6222476684
commit 79a229f28c
4 changed files with 195 additions and 82 deletions

View File

@@ -56,6 +56,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetDebugLogging(opts.Verbose). SetDebugLogging(opts.Verbose).
SetRequestModifications( SetRequestModifications(
rx.DeleteOutgoingCookies(), rx.DeleteOutgoingCookies(),
//rx.RequestArchiveIs(),
). ).
AddResponseModifications( AddResponseModifications(
tx.DeleteIncomingCookies(), tx.DeleteIncomingCookies(),

View File

@@ -26,6 +26,7 @@ func masqueradeAsTrustedBot(botUA string, botIP string) proxychain.RequestModifi
SpoofUserAgent(botUA), SpoofUserAgent(botUA),
SpoofXForwardedFor(botIP), SpoofXForwardedFor(botIP),
SpoofReferrer(""), SpoofReferrer(""),
SpoofOrigin(""),
) )
return nil return nil
} }

View File

@@ -0,0 +1,62 @@
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
// Also overrides the attribute setter prototype to modify the request URLs
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
(() => {
function rewriteURL(url) {
if (!url) return url
if (url.startsWith(window.location.origin)) return url
if (url.startsWith("//")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(2))}`;
} else if (url.startsWith("/")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(1))}`;
} else if (url.startsWith("http://") || url.startsWith("https://")) {
url = `${window.location.origin}/${encodeURIComponent(url)}`;
}
return url;
};
// monkey patch fetch
const oldFetch = globalThis.fetch ;
globalThis.fetch = async (url, init) => {
return oldFetch(rewriteURL(url), init)
}
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
};
// Monkey patch setter methods
const elements = [
{ tag: 'a', attribute: 'href' },
{ tag: 'img', attribute: 'src' },
{ tag: 'script', attribute: 'src' },
{ tag: 'link', attribute: 'href' },
{ tag: 'iframe', attribute: 'src' },
{ tag: 'audio', attribute: 'src' },
{ tag: 'video', attribute: 'src' },
{ tag: 'source', attribute: 'src' },
{ tag: 'embed', attribute: 'src' },
{ tag: 'object', attribute: 'src' },
{ tag: 'input', attribute: 'src' },
{ tag: 'track', attribute: 'src' },
{ tag: 'form', attribute: 'action' },
];
elements.forEach(({ tag, attribute }) => {
const proto = document.createElement(tag).constructor.prototype;
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
if (descriptor && descriptor.set) {
Object.defineProperty(proto, attribute, {
...descriptor,
set(value) {
return descriptor.set.call(this, rewriteURL(value));
}
});
}
});
})();

View File

@@ -5,7 +5,6 @@ import (
"fmt" "fmt"
"io" "io"
"ladder/proxychain" "ladder/proxychain"
"log"
"net/url" "net/url"
"strings" "strings"
@@ -19,9 +18,8 @@ func init() {
AttributesToRewrite = map[string]bool{ AttributesToRewrite = map[string]bool{
"src": true, "src": true,
"href": true, "href": true,
/*
"action": true, "action": true,
"srcset": true, "srcset": true, // TODO: fix
"poster": true, "poster": true,
"data": true, "data": true,
"cite": true, "cite": true,
@@ -34,7 +32,6 @@ func init() {
"codebase": true, "codebase": true,
"icon": true, "icon": true,
"pluginspage": true, "pluginspage": true,
*/
} }
} }
@@ -42,7 +39,7 @@ func init() {
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes. // It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'> // <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
type HTMLResourceURLRewriter struct { type HTMLResourceURLRewriter struct {
baseURL string // eg: https://proxiedsite.com (note, no trailing '/') baseURL *url.URL
tokenizer *html.Tokenizer tokenizer *html.Tokenizer
currentToken html.Token currentToken html.Token
tokenBuffer *bytes.Buffer tokenBuffer *bytes.Buffer
@@ -52,7 +49,7 @@ type HTMLResourceURLRewriter struct {
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter. // NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
// It initializes the tokenizer with the provided source and sets the proxy URL. // It initializes the tokenizer with the provided source and sets the proxy URL.
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter { func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{ return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src), tokenizer: html.NewTokenizer(src),
currentToken: html.Token{}, currentToken: html.Token{},
@@ -110,25 +107,51 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
return n, err return n, err
} }
func patchResourceURL(token *html.Token, baseURL string) { // Root-relative URLs: These are relative to the root path and start with a "/".
for i := range token.Attr { func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr := &token.Attr[i] // doublecheck this is a valid relative URL
// dont touch attributes except for the ones we defined _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
_, exists := AttributesToRewrite[attr.Key]
if !exists {
continue
}
isRelativePath := strings.HasPrefix(attr.Val, "/")
//log.Printf("PRE '%s'='%s'", attr.Key, attr.Val)
// double check if attribute is valid http URL before modifying
if isRelativePath {
_, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val))
if err != nil { if err != nil {
return return
} }
} else {
//log.Printf("BASEURL patch: %s\n", baseURL)
attr.Val = fmt.Sprintf(
"/%s://%s/%s",
baseURL.Scheme,
baseURL.Host,
strings.TrimPrefix(attr.Val, "/"),
)
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = fmt.Sprintf(
"%s://%s/%s%s",
baseURL.Scheme,
strings.Trim(baseURL.Host, "/"),
strings.Trim(baseURL.RawPath, "/"),
strings.Trim(attr.Val, "/"),
)
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = strings.TrimPrefix(attr.Val, "/")
handleRootRelativePath(attr, baseURL)
//log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
// check if valid URL
u, err := url.Parse(attr.Val) u, err := url.Parse(attr.Val)
if err != nil { if err != nil {
return return
@@ -136,38 +159,72 @@ func patchResourceURL(token *html.Token, baseURL string) {
if !(u.Scheme == "http" || u.Scheme == "https") { if !(u.Scheme == "http" || u.Scheme == "https") {
return return
} }
attr.Val = fmt.Sprintf(
"/%s",
url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
),
)
//log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
} }
// patch relative paths func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
// <img src="/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png"> for i, src := range strings.Split(attr.Val, ",") {
if isRelativePath { src = strings.Trim(src, " ")
log.Printf("BASEURL patch: %s\n", baseURL) for j, s := range strings.Split(src, " ") {
s = strings.Trim(s, " ")
if j == 0 {
f := &html.Attribute{Val: s, Key: attr.Key}
switch {
case strings.HasPrefix(s, "//"):
handleProtocolRelativePath(f, baseURL)
case strings.HasPrefix(s, "/"):
handleRootRelativePath(f, baseURL)
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
handleAbsolutePath(f, baseURL)
default:
handleDocumentRelativePath(f, baseURL)
}
s = f.Val
}
if i == 0 && j == 0 {
attr.Val = s
continue
}
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
}
attr.Val = fmt.Sprintf("%s,", attr.Val)
}
attr.Val = strings.TrimSuffix(attr.Val, ",")
}
attr.Val = fmt.Sprintf( // TODO: figure out how to handle these
"/%s/%s", // srcset
baseURL, func patchResourceURL(token *html.Token, baseURL *url.URL) {
//url.QueryEscape( for i := range token.Attr {
strings.TrimPrefix(attr.Val, "/"), attr := &token.Attr[i]
//),
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val) switch {
// dont touch attributes except for the ones we defined
case !AttributesToRewrite[attr.Key]:
continue
case attr.Key == "srcset":
handleSrcSet(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "//"):
handleProtocolRelativePath(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "/"):
handleRootRelativePath(attr, baseURL)
continue
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
handleAbsolutePath(attr, baseURL)
continue
default:
handleDocumentRelativePath(attr, baseURL)
continue continue
} }
// patch absolute paths to relative path pointing to ladder proxy
// <img src="http://images.cdn.proxiedsite.com/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
//log.Printf("abolute patch: %s\n", attr.Val)
attr.Val = fmt.Sprintf(
"/%s",
//url.QueryEscape(attr.Val),
//url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
//),
//attr.Val,
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
} }
} }
@@ -199,15 +256,7 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return nil return nil
} }
// should be site being requested to proxy chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host)
/*
log.Println("--------------------")
log.Println(baseUrl)
log.Println("--------------------")
*/
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl)
return nil return nil
} }
} }