organize rewriters
This commit is contained in:
@@ -1,343 +1,13 @@
|
|||||||
package responsemodifers
|
package responsemodifers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
_ "embed"
|
_ "embed"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"ladder/proxychain"
|
"ladder/proxychain"
|
||||||
"log"
|
"ladder/proxychain/responsemodifers/rewriters"
|
||||||
"net/url"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var attributesToRewrite map[string]bool
|
|
||||||
var schemeBlacklist map[string]bool
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
// Define list of HTML attributes to try to rewrite
|
|
||||||
attributesToRewrite = map[string]bool{
|
|
||||||
"src": true,
|
|
||||||
"href": true,
|
|
||||||
"action": true,
|
|
||||||
"srcset": true,
|
|
||||||
"poster": true,
|
|
||||||
"data": true,
|
|
||||||
"cite": true,
|
|
||||||
"formaction": true,
|
|
||||||
"background": true,
|
|
||||||
"usemap": true,
|
|
||||||
"longdesc": true,
|
|
||||||
"manifest": true,
|
|
||||||
"archive": true,
|
|
||||||
"codebase": true,
|
|
||||||
"icon": true,
|
|
||||||
"pluginspage": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
// define URIs to NOT rewrite
|
|
||||||
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
|
||||||
schemeBlacklist = map[string]bool{
|
|
||||||
"data": true,
|
|
||||||
"tel": true,
|
|
||||||
"mailto": true,
|
|
||||||
"file": true,
|
|
||||||
"blob": true,
|
|
||||||
"javascript": true,
|
|
||||||
"about": true,
|
|
||||||
"magnet": true,
|
|
||||||
"ws": true,
|
|
||||||
"wss": true,
|
|
||||||
"ftp": true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
|
|
||||||
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
|
|
||||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
|
||||||
type HTMLResourceURLRewriter struct {
|
|
||||||
baseURL *url.URL
|
|
||||||
tokenizer *html.Tokenizer
|
|
||||||
currentToken html.Token
|
|
||||||
tokenBuffer *bytes.Buffer
|
|
||||||
scriptContentBuffer *bytes.Buffer
|
|
||||||
insideScript bool
|
|
||||||
currentTokenIndex int
|
|
||||||
currentTokenProcessed bool
|
|
||||||
proxyURL string // ladder URL, not proxied site URL
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
|
||||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
|
||||||
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
|
|
||||||
return &HTMLResourceURLRewriter{
|
|
||||||
tokenizer: html.NewTokenizer(src),
|
|
||||||
currentToken: html.Token{},
|
|
||||||
currentTokenIndex: 0,
|
|
||||||
tokenBuffer: new(bytes.Buffer),
|
|
||||||
scriptContentBuffer: new(bytes.Buffer),
|
|
||||||
insideScript: false,
|
|
||||||
baseURL: baseURL,
|
|
||||||
proxyURL: proxyURL,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
|
|
||||||
func (r *HTMLResourceURLRewriter) Close() error {
|
|
||||||
r.tokenBuffer.Reset()
|
|
||||||
r.currentToken = html.Token{}
|
|
||||||
r.currentTokenIndex = 0
|
|
||||||
r.currentTokenProcessed = false
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
|
||||||
// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
|
|
||||||
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
|
||||||
|
|
||||||
if r.currentToken.Data == "" || r.currentTokenProcessed {
|
|
||||||
tokenType := r.tokenizer.Next()
|
|
||||||
|
|
||||||
// done reading html, close out reader
|
|
||||||
if tokenType == html.ErrorToken {
|
|
||||||
if r.tokenizer.Err() == io.EOF {
|
|
||||||
return 0, io.EOF
|
|
||||||
}
|
|
||||||
return 0, r.tokenizer.Err()
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush the current token into an internal buffer
|
|
||||||
// to handle fragmented tokens
|
|
||||||
r.currentToken = r.tokenizer.Token()
|
|
||||||
|
|
||||||
// patch tokens with URLs
|
|
||||||
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
|
|
||||||
if isTokenWithAttribute {
|
|
||||||
patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.tokenBuffer.Reset()
|
|
||||||
|
|
||||||
// unescape script contents, not sure why tokenizer will escape things
|
|
||||||
switch tokenType {
|
|
||||||
case html.StartTagToken:
|
|
||||||
if r.currentToken.Data == "script" {
|
|
||||||
r.insideScript = true
|
|
||||||
r.scriptContentBuffer.Reset() // Reset buffer for new script contents
|
|
||||||
}
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
|
|
||||||
case html.EndTagToken:
|
|
||||||
if r.currentToken.Data == "script" {
|
|
||||||
r.insideScript = false
|
|
||||||
modScript := modifyInlineScript(r.scriptContentBuffer)
|
|
||||||
r.tokenBuffer.WriteString(modScript)
|
|
||||||
}
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
|
||||||
default:
|
|
||||||
if r.insideScript {
|
|
||||||
r.scriptContentBuffer.WriteString(r.currentToken.String())
|
|
||||||
} else {
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// inject <script> right after <head>
|
|
||||||
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
|
||||||
if isHeadToken {
|
|
||||||
injectScript(r.tokenBuffer, rewriteJSResourceUrlsScript)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.currentTokenProcessed = false
|
|
||||||
r.currentTokenIndex = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
n, err := r.tokenBuffer.Read(p)
|
|
||||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
|
||||||
r.currentTokenProcessed = true
|
|
||||||
err = nil // EOF in this context is expected and not an actual error
|
|
||||||
}
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
|
||||||
//
|
|
||||||
//go:embed rewrite_js_resource_urls.js
|
|
||||||
var rewriteJSResourceUrlsScript string
|
|
||||||
|
|
||||||
func injectScript(tokenBuffer *bytes.Buffer, script string) {
|
|
||||||
tokenBuffer.WriteString(
|
|
||||||
fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// possible ad-blocking / bypassing opportunity here
|
|
||||||
func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
|
|
||||||
return html.UnescapeString(scriptContentBuffer.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
|
||||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
// doublecheck this is a valid relative URL
|
|
||||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
//log.Printf("BASEURL patch: %s\n", baseURL)
|
|
||||||
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"/%s://%s/%s",
|
|
||||||
baseURL.Scheme,
|
|
||||||
baseURL.Host,
|
|
||||||
strings.TrimPrefix(attr.Val, "/"),
|
|
||||||
)
|
|
||||||
attr.Val = url.QueryEscape(attr.Val)
|
|
||||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
|
||||||
|
|
||||||
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
|
||||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"%s://%s/%s%s",
|
|
||||||
baseURL.Scheme,
|
|
||||||
strings.Trim(baseURL.Host, "/"),
|
|
||||||
strings.Trim(baseURL.RawPath, "/"),
|
|
||||||
strings.Trim(attr.Val, "/"),
|
|
||||||
)
|
|
||||||
attr.Val = url.QueryEscape(attr.Val)
|
|
||||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
|
||||||
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
|
||||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
|
||||||
handleRootRelativePath(attr, baseURL)
|
|
||||||
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
// check if valid URL
|
|
||||||
u, err := url.Parse(attr.Val)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"/%s",
|
|
||||||
url.QueryEscape(
|
|
||||||
strings.TrimPrefix(attr.Val, "/"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
for i, src := range strings.Split(attr.Val, ",") {
|
|
||||||
src = strings.Trim(src, " ")
|
|
||||||
for j, s := range strings.Split(src, " ") {
|
|
||||||
s = strings.Trim(s, " ")
|
|
||||||
if j == 0 {
|
|
||||||
f := &html.Attribute{Val: s, Key: attr.Key}
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(s, "//"):
|
|
||||||
handleProtocolRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(s, "/"):
|
|
||||||
handleRootRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
|
|
||||||
handleAbsolutePath(f, baseURL)
|
|
||||||
default:
|
|
||||||
handleDocumentRelativePath(f, baseURL)
|
|
||||||
}
|
|
||||||
s = f.Val
|
|
||||||
}
|
|
||||||
if i == 0 && j == 0 {
|
|
||||||
attr.Val = s
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf("%s,", attr.Val)
|
|
||||||
}
|
|
||||||
attr.Val = strings.TrimSuffix(attr.Val, ",")
|
|
||||||
|
|
||||||
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func isBlackedlistedScheme(url string) bool {
|
|
||||||
spl := strings.Split(url, ":")
|
|
||||||
if len(spl) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
scheme := spl[0]
|
|
||||||
return schemeBlacklist[scheme]
|
|
||||||
}
|
|
||||||
|
|
||||||
func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
|
|
||||||
for i := range token.Attr {
|
|
||||||
attr := &token.Attr[i]
|
|
||||||
|
|
||||||
switch {
|
|
||||||
// don't touch attributes except for the ones we defined
|
|
||||||
case !attributesToRewrite[attr.Key]:
|
|
||||||
continue
|
|
||||||
// don't rewrite special URIs that don't make network requests
|
|
||||||
case isBlackedlistedScheme(attr.Val):
|
|
||||||
continue
|
|
||||||
// don't double-overwrite the url
|
|
||||||
case strings.HasPrefix(attr.Val, proxyURL):
|
|
||||||
continue
|
|
||||||
// don't overwrite special URIs
|
|
||||||
case strings.HasPrefix(attr.Val, "data:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "ftp:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "tel:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "javascript:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "file:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "ftp:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "mailto:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "blob:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "about:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "magnet:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "ws:"):
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "wss:"):
|
|
||||||
continue
|
|
||||||
case attr.Key == "srcset":
|
|
||||||
handleSrcSet(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "//"):
|
|
||||||
handleProtocolRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "/"):
|
|
||||||
handleRootRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
|
|
||||||
handleAbsolutePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
handleDocumentRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RewriteHTMLResourceURLs modifies HTTP responses
|
// RewriteHTMLResourceURLs modifies HTTP responses
|
||||||
// to rewrite URLs attributes in HTML content (such as src, href)
|
// to rewrite URLs attributes in HTML content (such as src, href)
|
||||||
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
||||||
@@ -365,10 +35,18 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
|||||||
if !strings.HasPrefix(ct, "text/html") {
|
if !strings.HasPrefix(ct, "text/html") {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
|
||||||
originalURI := chain.Context.Request().URI()
|
originalURI := chain.Context.Request().URI()
|
||||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||||
|
|
||||||
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL, proxyURL)
|
chain.Response.Body = rewriters.
|
||||||
|
NewHTMLResourceURLRewriter(
|
||||||
|
chain.Response.Body,
|
||||||
|
chain.Request.URL,
|
||||||
|
proxyURL,
|
||||||
|
)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package rewriters
|
||||||
|
|
||||||
|
// todo: implement
|
||||||
@@ -0,0 +1,313 @@
|
|||||||
|
package rewriters
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
_ "embed"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
)
|
||||||
|
|
||||||
|
var attributesToRewrite map[string]bool
|
||||||
|
var schemeBlacklist map[string]bool
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Define list of HTML attributes to try to rewrite
|
||||||
|
attributesToRewrite = map[string]bool{
|
||||||
|
"src": true,
|
||||||
|
"href": true,
|
||||||
|
"action": true,
|
||||||
|
"srcset": true,
|
||||||
|
"poster": true,
|
||||||
|
"data": true,
|
||||||
|
"cite": true,
|
||||||
|
"formaction": true,
|
||||||
|
"background": true,
|
||||||
|
"usemap": true,
|
||||||
|
"longdesc": true,
|
||||||
|
"manifest": true,
|
||||||
|
"archive": true,
|
||||||
|
"codebase": true,
|
||||||
|
"icon": true,
|
||||||
|
"pluginspage": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// define URIs to NOT rewrite
|
||||||
|
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
||||||
|
schemeBlacklist = map[string]bool{
|
||||||
|
"data": true,
|
||||||
|
"tel": true,
|
||||||
|
"mailto": true,
|
||||||
|
"file": true,
|
||||||
|
"blob": true,
|
||||||
|
"javascript": true,
|
||||||
|
"about": true,
|
||||||
|
"magnet": true,
|
||||||
|
"ws": true,
|
||||||
|
"wss": true,
|
||||||
|
"ftp": true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
|
||||||
|
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
|
||||||
|
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||||
|
type HTMLResourceURLRewriter struct {
|
||||||
|
baseURL *url.URL
|
||||||
|
tokenizer *html.Tokenizer
|
||||||
|
currentToken html.Token
|
||||||
|
tokenBuffer *bytes.Buffer
|
||||||
|
scriptContentBuffer *bytes.Buffer
|
||||||
|
insideScript bool
|
||||||
|
currentTokenIndex int
|
||||||
|
currentTokenProcessed bool
|
||||||
|
proxyURL string // ladder URL, not proxied site URL
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||||
|
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||||
|
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
|
||||||
|
return &HTMLResourceURLRewriter{
|
||||||
|
tokenizer: html.NewTokenizer(src),
|
||||||
|
currentToken: html.Token{},
|
||||||
|
currentTokenIndex: 0,
|
||||||
|
tokenBuffer: new(bytes.Buffer),
|
||||||
|
scriptContentBuffer: new(bytes.Buffer),
|
||||||
|
insideScript: false,
|
||||||
|
baseURL: baseURL,
|
||||||
|
proxyURL: proxyURL,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
|
||||||
|
func (r *HTMLResourceURLRewriter) Close() error {
|
||||||
|
r.tokenBuffer.Reset()
|
||||||
|
r.currentToken = html.Token{}
|
||||||
|
r.currentTokenIndex = 0
|
||||||
|
r.currentTokenProcessed = false
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||||
|
// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
|
||||||
|
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
||||||
|
|
||||||
|
if r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||||
|
tokenType := r.tokenizer.Next()
|
||||||
|
|
||||||
|
// done reading html, close out reader
|
||||||
|
if tokenType == html.ErrorToken {
|
||||||
|
if r.tokenizer.Err() == io.EOF {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
return 0, r.tokenizer.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush the current token into an internal buffer
|
||||||
|
// to handle fragmented tokens
|
||||||
|
r.currentToken = r.tokenizer.Token()
|
||||||
|
|
||||||
|
// patch tokens with URLs
|
||||||
|
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
|
||||||
|
if isTokenWithAttribute {
|
||||||
|
patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.tokenBuffer.Reset()
|
||||||
|
|
||||||
|
// unescape script contents, not sure why tokenizer will escape things
|
||||||
|
switch tokenType {
|
||||||
|
case html.StartTagToken:
|
||||||
|
if r.currentToken.Data == "script" {
|
||||||
|
r.insideScript = true
|
||||||
|
r.scriptContentBuffer.Reset() // Reset buffer for new script contents
|
||||||
|
}
|
||||||
|
r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
|
||||||
|
case html.EndTagToken:
|
||||||
|
if r.currentToken.Data == "script" {
|
||||||
|
r.insideScript = false
|
||||||
|
modScript := modifyInlineScript(r.scriptContentBuffer)
|
||||||
|
r.tokenBuffer.WriteString(modScript)
|
||||||
|
}
|
||||||
|
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||||
|
default:
|
||||||
|
if r.insideScript {
|
||||||
|
r.scriptContentBuffer.WriteString(r.currentToken.String())
|
||||||
|
} else {
|
||||||
|
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// inject <script> right after <head>
|
||||||
|
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
||||||
|
if isHeadToken {
|
||||||
|
injectScript(r.tokenBuffer, rewriteJSResourceUrlsScript)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.currentTokenProcessed = false
|
||||||
|
r.currentTokenIndex = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := r.tokenBuffer.Read(p)
|
||||||
|
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||||
|
r.currentTokenProcessed = true
|
||||||
|
err = nil // EOF in this context is expected and not an actual error
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
||||||
|
//
|
||||||
|
//go:embed js_resource_url_rewriter.js
|
||||||
|
var rewriteJSResourceUrlsScript string
|
||||||
|
|
||||||
|
func injectScript(tokenBuffer *bytes.Buffer, script string) {
|
||||||
|
tokenBuffer.WriteString(
|
||||||
|
fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// possible ad-blocking / bypassing opportunity here
|
||||||
|
func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
|
||||||
|
return html.UnescapeString(scriptContentBuffer.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||||
|
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
// doublecheck this is a valid relative URL
|
||||||
|
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
//log.Printf("BASEURL patch: %s\n", baseURL)
|
||||||
|
|
||||||
|
attr.Val = fmt.Sprintf(
|
||||||
|
"/%s://%s/%s",
|
||||||
|
baseURL.Scheme,
|
||||||
|
baseURL.Host,
|
||||||
|
strings.TrimPrefix(attr.Val, "/"),
|
||||||
|
)
|
||||||
|
attr.Val = url.QueryEscape(attr.Val)
|
||||||
|
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||||
|
|
||||||
|
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||||
|
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
attr.Val = fmt.Sprintf(
|
||||||
|
"%s://%s/%s%s",
|
||||||
|
baseURL.Scheme,
|
||||||
|
strings.Trim(baseURL.Host, "/"),
|
||||||
|
strings.Trim(baseURL.RawPath, "/"),
|
||||||
|
strings.Trim(attr.Val, "/"),
|
||||||
|
)
|
||||||
|
attr.Val = url.QueryEscape(attr.Val)
|
||||||
|
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||||
|
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||||
|
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||||
|
handleRootRelativePath(attr, baseURL)
|
||||||
|
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
// check if valid URL
|
||||||
|
u, err := url.Parse(attr.Val)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
attr.Val = fmt.Sprintf(
|
||||||
|
"/%s",
|
||||||
|
url.QueryEscape(
|
||||||
|
strings.TrimPrefix(attr.Val, "/"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
for i, src := range strings.Split(attr.Val, ",") {
|
||||||
|
src = strings.Trim(src, " ")
|
||||||
|
for j, s := range strings.Split(src, " ") {
|
||||||
|
s = strings.Trim(s, " ")
|
||||||
|
if j == 0 {
|
||||||
|
f := &html.Attribute{Val: s, Key: attr.Key}
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(s, "//"):
|
||||||
|
handleProtocolRelativePath(f, baseURL)
|
||||||
|
case strings.HasPrefix(s, "/"):
|
||||||
|
handleRootRelativePath(f, baseURL)
|
||||||
|
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
|
||||||
|
handleAbsolutePath(f, baseURL)
|
||||||
|
default:
|
||||||
|
handleDocumentRelativePath(f, baseURL)
|
||||||
|
}
|
||||||
|
s = f.Val
|
||||||
|
}
|
||||||
|
if i == 0 && j == 0 {
|
||||||
|
attr.Val = s
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
|
||||||
|
}
|
||||||
|
attr.Val = fmt.Sprintf("%s,", attr.Val)
|
||||||
|
}
|
||||||
|
attr.Val = strings.TrimSuffix(attr.Val, ",")
|
||||||
|
|
||||||
|
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isBlackedlistedScheme(url string) bool {
|
||||||
|
spl := strings.Split(url, ":")
|
||||||
|
if len(spl) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
scheme := spl[0]
|
||||||
|
return schemeBlacklist[scheme]
|
||||||
|
}
|
||||||
|
|
||||||
|
func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
|
||||||
|
for i := range token.Attr {
|
||||||
|
attr := &token.Attr[i]
|
||||||
|
|
||||||
|
switch {
|
||||||
|
// don't touch attributes except for the ones we defined
|
||||||
|
case !attributesToRewrite[attr.Key]:
|
||||||
|
continue
|
||||||
|
// don't rewrite special URIs that don't make network requests
|
||||||
|
case isBlackedlistedScheme(attr.Val):
|
||||||
|
continue
|
||||||
|
// don't double-overwrite the url
|
||||||
|
case strings.HasPrefix(attr.Val, proxyURL):
|
||||||
|
continue
|
||||||
|
case attr.Key == "srcset":
|
||||||
|
handleSrcSet(attr, baseURL)
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(attr.Val, "//"):
|
||||||
|
handleProtocolRelativePath(attr, baseURL)
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(attr.Val, "/"):
|
||||||
|
handleRootRelativePath(attr, baseURL)
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
|
||||||
|
handleAbsolutePath(attr, baseURL)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
handleDocumentRelativePath(attr, baseURL)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,167 @@
|
|||||||
|
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
|
||||||
|
// Also overrides the attribute setter prototype to modify the request URLs
|
||||||
|
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
||||||
|
(() => {
|
||||||
|
const blacklistedSchemes = [
|
||||||
|
"ftp:",
|
||||||
|
"mailto:",
|
||||||
|
"tel:",
|
||||||
|
"file:",
|
||||||
|
"blob:",
|
||||||
|
"javascript:",
|
||||||
|
"about:",
|
||||||
|
"magnet:",
|
||||||
|
"ws:",
|
||||||
|
"wss:",
|
||||||
|
];
|
||||||
|
|
||||||
|
function rewriteURL(url) {
|
||||||
|
const oldUrl = url
|
||||||
|
if (!url) return url
|
||||||
|
let isStr = (typeof url.startsWith === 'function')
|
||||||
|
if (!isStr) return url
|
||||||
|
// don't rewrite invalid URIs
|
||||||
|
try { new URL(url) } catch { return url }
|
||||||
|
|
||||||
|
// don't rewrite special URIs
|
||||||
|
if (blacklistedSchemes.includes(url)) return url;
|
||||||
|
|
||||||
|
// don't double rewrite
|
||||||
|
const proxyOrigin = globalThis.window.location.origin;
|
||||||
|
if (url.startsWith(proxyOrigin)) return url;
|
||||||
|
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
||||||
|
if (url.startsWith(`/${origin}`)) return url;
|
||||||
|
|
||||||
|
const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||||
|
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
||||||
|
|
||||||
|
if (url.startsWith("//")) {
|
||||||
|
url = `/${origin}/${encodeURIComponent(url.substring(2))}`;
|
||||||
|
} else if (url.startsWith("/")) {
|
||||||
|
url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
|
||||||
|
} else if (url.startsWith(origin)) {
|
||||||
|
url = `/${encodeURIComponent(url)}`
|
||||||
|
} else if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||||
|
url = `/${proxyOrigin}/${encodeURIComponent(url)}`;
|
||||||
|
}
|
||||||
|
console.log(`proxychain: rewrite JS URL: ${oldUrl} -> ${url}`)
|
||||||
|
return url;
|
||||||
|
};
|
||||||
|
|
||||||
|
// monkey patch fetch
|
||||||
|
const oldFetch = globalThis.fetch;
|
||||||
|
globalThis.fetch = async (url, init) => {
|
||||||
|
return oldFetch(rewriteURL(url), init)
|
||||||
|
}
|
||||||
|
|
||||||
|
// monkey patch xmlhttprequest
|
||||||
|
const oldOpen = XMLHttpRequest.prototype.open;
|
||||||
|
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
||||||
|
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||||
|
};
|
||||||
|
const oldSend = XMLHttpRequest.prototype.send;
|
||||||
|
XMLHttpRequest.prototype.send = function(method, url) {
|
||||||
|
return oldSend.call(this, method, rewriteURL(url));
|
||||||
|
};
|
||||||
|
|
||||||
|
// monkey patch service worker registration
|
||||||
|
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||||
|
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
|
||||||
|
return oldRegister.call(this, rewriteURL(scriptURL), options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// monkey patch URL.toString() method
|
||||||
|
const oldToString = URL.prototype.toString
|
||||||
|
URL.prototype.toString = function() {
|
||||||
|
let originalURL = oldToString.call(this)
|
||||||
|
return rewriteURL(originalURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
// monkey patch URL.toJSON() method
|
||||||
|
const oldToJson = URL.prototype.toString
|
||||||
|
URL.prototype.toString = function() {
|
||||||
|
let originalURL = oldToJson.call(this)
|
||||||
|
return rewriteURL(originalURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Monkey patch URL.href getter and setter
|
||||||
|
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
|
||||||
|
Object.defineProperty(URL.prototype, 'href', {
|
||||||
|
get: function() {
|
||||||
|
let originalHref = originalHrefDescriptor.get.call(this);
|
||||||
|
return rewriteURL(originalHref)
|
||||||
|
},
|
||||||
|
set: function(newValue) {
|
||||||
|
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Monkey patch setter
|
||||||
|
const elements = [
|
||||||
|
{ tag: 'a', attribute: 'href' },
|
||||||
|
{ tag: 'img', attribute: 'src' },
|
||||||
|
// { tag: 'img', attribute: 'srcset' }, // TODO: handle srcset
|
||||||
|
{ tag: 'script', attribute: 'src' },
|
||||||
|
{ tag: 'link', attribute: 'href' },
|
||||||
|
{ tag: 'link', attribute: 'icon' },
|
||||||
|
{ tag: 'iframe', attribute: 'src' },
|
||||||
|
{ tag: 'audio', attribute: 'src' },
|
||||||
|
{ tag: 'video', attribute: 'src' },
|
||||||
|
{ tag: 'source', attribute: 'src' },
|
||||||
|
// { tag: 'source', attribute: 'srcset' }, // TODO: handle srcset
|
||||||
|
{ tag: 'embed', attribute: 'src' },
|
||||||
|
{ tag: 'embed', attribute: 'pluginspage' },
|
||||||
|
{ tag: 'html', attribute: 'manifest' },
|
||||||
|
{ tag: 'object', attribute: 'src' },
|
||||||
|
{ tag: 'input', attribute: 'src' },
|
||||||
|
{ tag: 'track', attribute: 'src' },
|
||||||
|
{ tag: 'form', attribute: 'action' },
|
||||||
|
{ tag: 'area', attribute: 'href' },
|
||||||
|
{ tag: 'base', attribute: 'href' },
|
||||||
|
{ tag: 'blockquote', attribute: 'cite' },
|
||||||
|
{ tag: 'del', attribute: 'cite' },
|
||||||
|
{ tag: 'ins', attribute: 'cite' },
|
||||||
|
{ tag: 'q', attribute: 'cite' },
|
||||||
|
{ tag: 'button', attribute: 'formaction' },
|
||||||
|
{ tag: 'input', attribute: 'formaction' },
|
||||||
|
{ tag: 'meta', attribute: 'content' },
|
||||||
|
{ tag: 'object', attribute: 'data' },
|
||||||
|
];
|
||||||
|
|
||||||
|
elements.forEach(({ tag, attribute }) => {
|
||||||
|
const proto = document.createElement(tag).constructor.prototype;
|
||||||
|
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
|
||||||
|
if (descriptor && descriptor.set) {
|
||||||
|
Object.defineProperty(proto, attribute, {
|
||||||
|
...descriptor,
|
||||||
|
set(value) {
|
||||||
|
// calling rewriteURL will end up calling a setter for href,
|
||||||
|
// leading to a recusive loop and a Maximum call stack size exceeded
|
||||||
|
// error, so we guard against this with a local semaphore flag
|
||||||
|
const isRewritingSetKey = Symbol.for('isRewritingSet');
|
||||||
|
if (!this[isRewritingSetKey]) {
|
||||||
|
this[isRewritingSetKey] = true;
|
||||||
|
descriptor.set.call(this, rewriteURL(value));
|
||||||
|
//descriptor.set.call(this, value);
|
||||||
|
this[isRewritingSetKey] = false;
|
||||||
|
} else {
|
||||||
|
// Directly set the value without rewriting
|
||||||
|
descriptor.set.call(this, value);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
get() {
|
||||||
|
const isRewritingGetKey = Symbol.for('isRewritingGet');
|
||||||
|
if (!this[isRewritingGetKey]) {
|
||||||
|
this[isRewritingGetKey] = true;
|
||||||
|
let oldURL = descriptor.get.call(this);
|
||||||
|
let newURL = rewriteURL(oldURL);
|
||||||
|
this[isRewritingGetKey] = false;
|
||||||
|
return newURL
|
||||||
|
} else {
|
||||||
|
return descriptor.get.call(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
})();
|
||||||
Reference in New Issue
Block a user