support js URL rewriting; support post req

This commit is contained in:
Kevin Pham
2023-11-21 10:45:29 -06:00
parent 79a229f28c
commit 543192afbe
3 changed files with 60 additions and 18 deletions

View File

@@ -87,7 +87,7 @@ func main() {
app := fiber.New(
fiber.Config{
Prefork: *prefork,
GETOnly: true,
GETOnly: false,
},
)
@@ -138,5 +138,6 @@ func main() {
}
app.Get("/*", handlers.NewProxySiteHandler(proxyOpts))
app.Post("/*", handlers.NewProxySiteHandler(proxyOpts))
log.Fatal(app.Listen(":" + *port))
}

View File

@@ -2,9 +2,11 @@ package responsemodifers
import (
"bytes"
_ "embed"
"fmt"
"io"
"ladder/proxychain"
"log"
"net/url"
"strings"
@@ -19,7 +21,7 @@ func init() {
"src": true,
"href": true,
"action": true,
"srcset": true, // TODO: fix
"srcset": true,
"poster": true,
"data": true,
"cite": true,
@@ -45,17 +47,19 @@ type HTMLResourceURLRewriter struct {
tokenBuffer *bytes.Buffer
currentTokenIndex int
currentTokenProcessed bool
proxyURL string // ladder URL, not proxied site URL
}
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
// It initializes the tokenizer with the provided source and sets the proxy URL.
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL) *HTMLResourceURLRewriter {
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: html.Token{},
currentTokenIndex: 0,
tokenBuffer: new(bytes.Buffer),
baseURL: baseURL,
proxyURL: proxyURL,
}
}
@@ -90,11 +94,18 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
// patch tokens with URLs
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
if isTokenWithAttribute {
patchResourceURL(&r.currentToken, r.baseURL)
patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
}
r.tokenBuffer.Reset()
r.tokenBuffer.WriteString(r.currentToken.String())
// inject <script> right after <head>
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
if isHeadToken {
injectScript(r.tokenBuffer, rewriteJSResourceUrlsScript)
}
r.currentTokenProcessed = false
r.currentTokenIndex = 0
}
@@ -107,6 +118,17 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
return n, err
}
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
//
//go:embed rewrite_js_resource_urls.js
var rewriteJSResourceUrlsScript string
func injectScript(tokenBuffer *bytes.Buffer, script string) {
tokenBuffer.WriteString(
fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
)
}
// Root-relative URLs: These are relative to the root path and start with a "/".
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
// doublecheck this is a valid relative URL
@@ -126,7 +148,7 @@ func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
@@ -140,14 +162,14 @@ func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
)
attr.Val = url.QueryEscape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
//log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = strings.TrimPrefix(attr.Val, "/")
handleRootRelativePath(attr, baseURL)
//log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
@@ -165,7 +187,7 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
strings.TrimPrefix(attr.Val, "/"),
),
)
//log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
@@ -196,18 +218,21 @@ func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
attr.Val = fmt.Sprintf("%s,", attr.Val)
}
attr.Val = strings.TrimSuffix(attr.Val, ",")
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// TODO: figure out how to handle these
// srcset
func patchResourceURL(token *html.Token, baseURL *url.URL) {
func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
for i := range token.Attr {
attr := &token.Attr[i]
switch {
// dont touch attributes except for the ones we defined
// don't touch attributes except for the ones we defined
case !AttributesToRewrite[attr.Key]:
continue
// don't double-overwrite the url
case strings.HasPrefix(attr.Val, proxyURL):
continue
case attr.Key == "srcset":
handleSrcSet(attr, baseURL)
continue
@@ -255,8 +280,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
if !strings.HasPrefix(ct, "text/html") {
return nil
}
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL, proxyURL)
return nil
}
}

View File

@@ -3,21 +3,26 @@
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
(() => {
function rewriteURL(url) {
oldUrl = url
if (!url) return url
if (url.startsWith(window.location.origin)) return url
proxyOrigin = globalThis.window.location.origin
if (url.startsWith(proxyOrigin)) return url
const origin = (new URL(decodeURI(globalThis.window.location.pathname.substring(1)))).origin
if (url.startsWith("//")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(2))}`;
url = `/${origin}/${encodeURIComponent(url.substring(2))}`;
} else if (url.startsWith("/")) {
url = `${window.location.origin}/${encodeURIComponent(url.substring(1))}`;
url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
} else if (url.startsWith("http://") || url.startsWith("https://")) {
url = `${window.location.origin}/${encodeURIComponent(url)}`;
url = `/${origin}/${encodeURIComponent(url)}`;
}
console.log(`rewrite JS URL: ${oldUrl} -> ${url}`)
return url;
};
// monkey patch fetch
const oldFetch = globalThis.fetch ;
const oldFetch = globalThis.fetch;
globalThis.fetch = async (url, init) => {
return oldFetch(rewriteURL(url), init)
}
@@ -27,7 +32,16 @@
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
};
const oldSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function(method, url) {
return oldSend.call(this, method, rewriteURL(url));
};
// monkey patch service worker registration
const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options)
}
// Monkey patch setter methods
const elements = [