refactor rewriters to modify html in single pass with multiple rewriters; improve html rewriter edge case handling

2023-11-22 23:51:52 -06:00
parent 7668713b1a
commit 5d55a2f3f0
11 changed files with 639 additions and 435 deletions
--- a/proxychain/proxychain.go
+++ b/proxychain/proxychain.go
@@ -10,6 +10,7 @@ import (
 	"strings"

 	"ladder/pkg/ruleset"
+	rr "ladder/proxychain/responsemodifers/rewriters"

 	"github.com/gofiber/fiber/v2"
 )
@@ -35,6 +36,7 @@ import (

 	rx "ladder/pkg/proxychain/requestmodifers"
 	tx "ladder/pkg/proxychain/responsemodifers"
+	"ladder/pkg/proxychain/responsemodifers/rewriters"
 	"ladder/internal/proxychain"

 )
@@ -87,6 +89,7 @@ type ProxyChain struct {
 	Response             *http.Response
 	requestModifications []RequestModification
 	resultModifications  []ResponseModification
+	htmlTokenRewriters   []rr.IHTMLTokenRewriter
 	Ruleset              *ruleset.RuleSet
 	debugMode            bool
 	abortErr             error
@@ -169,75 +172,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
 	return req, nil
 }

-// _execute sends the request for the ProxyChain and returns the raw body only
-// the caller is responsible for returning a response back to the requestor
-// the caller is also responsible for calling chain._reset() when they are done with the body
-func (chain *ProxyChain) _execute() (io.Reader, error) {
-	if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
-		return nil, chain.abortErr
-	}
-	if chain.Request == nil {
-		return nil, errors.New("proxychain request not yet initialized")
-	}
-	if chain.Request.URL.Scheme == "" {
-		return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
-	}
-
-	// Apply requestModifications to proxychain
-	for _, applyRequestModificationsTo := range chain.requestModifications {
-		err := applyRequestModificationsTo(chain)
-		if err != nil {
-			return nil, chain.abort(err)
-		}
-	}
-
-	// Send Request Upstream
-	resp, err := chain.Client.Do(chain.Request)
-	if err != nil {
-		return nil, chain.abort(err)
-	}
-	chain.Response = resp
-
-	//defer resp.Body.Close()
-
-	/* todo: move to rsm
-	for k, v := range resp.Header {
-		chain.Context.Set(k, resp.Header.Get(k))
-	}
-	*/
-
-	// Apply ResponseModifiers to proxychain
-	for _, applyResultModificationsTo := range chain.resultModifications {
-		err := applyResultModificationsTo(chain)
-		if err != nil {
-			return nil, chain.abort(err)
-		}
-	}
-
-	return chain.Response.Body, nil
-}
-
-// Execute sends the request for the ProxyChain and returns the request to the sender
-// and resets the fields so that the ProxyChain can be reused.
-// if any step in the ProxyChain fails, the request will abort and a 500 error will
-// be returned to the client
-func (chain *ProxyChain) Execute() error {
-	defer chain._reset()
-	body, err := chain._execute()
-	if err != nil {
-		log.Println(err)
-		return err
-	}
-	if chain.Context == nil {
-		return errors.New("no context set")
-	}
-	// Return request back to client
-	chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
-	return chain.Context.SendStream(body)
-
-	//return chain.Context.SendStream(body)
-}
-
 // reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries
 func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) {

@@ -322,6 +256,13 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
 	return reconstructUrlFromReferer(referer, relativePath)
 }

+// AddBodyRewriter adds a HTMLTokenRewriter to the chain
+// HTMLTokenRewriters modify the body response by parsing the HTML
+func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain {
+	chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr)
+	return chain
+}
+
 // SetFiberCtx takes the request ctx from the client
 // for the modifiers and execute function to use.
 // it must be set everytime a new request comes through
@@ -398,3 +339,86 @@ func NewProxyChain() *ProxyChain {
 	chain.Client = http.DefaultClient
 	return chain
 }
+
+/// ========================================================================================================
+
+// _execute sends the request for the ProxyChain and returns the raw body only
+// the caller is responsible for returning a response back to the requestor
+// the caller is also responsible for calling chain._reset() when they are done with the body
+func (chain *ProxyChain) _execute() (io.Reader, error) {
+	if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
+		return nil, chain.abortErr
+	}
+	if chain.Request == nil {
+		return nil, errors.New("proxychain request not yet initialized")
+	}
+	if chain.Request.URL.Scheme == "" {
+		return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
+	}
+
+	// Apply requestModifications to proxychain
+	for _, applyRequestModificationsTo := range chain.requestModifications {
+		err := applyRequestModificationsTo(chain)
+		if err != nil {
+			return nil, chain.abort(err)
+		}
+	}
+
+	// Send Request Upstream
+	resp, err := chain.Client.Do(chain.Request)
+	if err != nil {
+		return nil, chain.abort(err)
+	}
+	chain.Response = resp
+
+	/* todo: move to rsm
+	for k, v := range resp.Header {
+		chain.Context.Set(k, resp.Header.Get(k))
+	}
+	*/
+
+	// Apply ResponseModifiers to proxychain
+	for _, applyResultModificationsTo := range chain.resultModifications {
+		err := applyResultModificationsTo(chain)
+		if err != nil {
+			return nil, chain.abort(err)
+		}
+	}
+
+	// stream request back to client, possibly rewriting the body
+	if len(chain.htmlTokenRewriters) == 0 {
+		return chain.Response.Body, nil
+	}
+
+	ct := chain.Response.Header.Get("content-type")
+	switch {
+	case strings.HasPrefix(ct, "text/html"):
+		fmt.Println("fooox")
+		return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil
+	default:
+		return chain.Response.Body, nil
+	}
+
+}
+
+// Execute sends the request for the ProxyChain and returns the request to the sender
+// and resets the fields so that the ProxyChain can be reused.
+// if any step in the ProxyChain fails, the request will abort and a 500 error will
+// be returned to the client
+func (chain *ProxyChain) Execute() error {
+	defer chain._reset()
+	body, err := chain._execute()
+	if err != nil {
+		log.Println(err)
+		return err
+	}
+	if chain.Context == nil {
+		return errors.New("no context set")
+	}
+
+	// Return request back to client
+	chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
+	return chain.Context.SendStream(body)
+
+	//return chain.Context.SendStream(body)
+}
--- a/proxychain/responsemodifers/bypass_csp.go
+++ b/proxychain/responsemodifers/bypass_csp.go
@@ -4,6 +4,9 @@ import (
 	"ladder/proxychain"
 )

+// TODO: handle edge case where CSP is specified in meta tag:
+// <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
+
 // BypassContentSecurityPolicy modifies response headers to prevent the browser
 // from enforcing any CSP restrictions. This should run at the end of the chain.
 func BypassContentSecurityPolicy() proxychain.ResponseModification {
--- a/proxychain/responsemodifers/inject_script.go
+++ b/proxychain/responsemodifers/inject_script.go
@@ -0,0 +1,27 @@
+package responsemodifers
+
+import (
+	_ "embed"
+	"ladder/proxychain"
+	"ladder/proxychain/responsemodifers/rewriters"
+	"strings"
+)
+
+// InjectScript modifies HTTP responses
+// to execute javascript at a particular time.
+func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
+	return func(chain *proxychain.ProxyChain) error {
+		// don't add rewriter if it's not even html
+		ct := chain.Response.Header.Get("content-type")
+		if !strings.HasPrefix(ct, "text/html") {
+			return nil
+		}
+
+		// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
+		rr := rewriters.NewScriptInjectorRewriter(js, execTime)
+		// we just queue it up here
+		chain.AddHTMLTokenRewriter(rr)
+
+		return nil
+	}
+}
--- a/proxychain/responsemodifers/rewrite_http_resource_urls.go
+++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go
@@ -13,24 +13,9 @@ import (
 //   - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
 //   - This function is designed to allow the proxified page
 //     to still be browsible by routing all resource URLs through the proxy.
-//
-// ---
-//
-//   - It works by replacing the io.ReadCloser of the http.Response.Body
-//     with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
-//
-//   - This process can be done multiple times, so that the response will
-//     be streamed and modified through each pass without buffering the entire response in memory.
-//
-//   - HTMLResourceRewriter reads the http.Response.Body stream,
-//     parsing each HTML token one at a time and replacing attribute tags.
-//
-//   - When ProxyChain.Execute() is called, the response body will be read from the server
-//     and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
-//     without ever buffering the entire HTTP response in memory.
 func RewriteHTMLResourceURLs() proxychain.ResponseModification {
 	return func(chain *proxychain.ProxyChain) error {
-		// return early if it's not HTML
+		// don't add rewriter if it's not even html
 		ct := chain.Response.Header.Get("content-type")
 		if !strings.HasPrefix(ct, "text/html") {
 			return nil
@@ -40,12 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
 		originalURI := chain.Context.Request().URI()
 		proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())

-		chain.Response.Body = rewriters.
-			NewHTMLResourceURLRewriter(
-				chain.Response.Body,
-				chain.Request.URL,
-				proxyURL,
-			)
+		// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
+		rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
+		// we just queue it up here
+		chain.AddHTMLTokenRewriter(rr)

 		return nil
 	}
--- a/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js
+++ b/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js
@@ -0,0 +1,27 @@
+(() => {
+    document.addEventListener('DOMContentLoaded', (event) => {
+        initIdleMutationObserver();
+    });
+    
+    function initIdleMutationObserver() {
+        let debounceTimer;
+        const debounceDelay = 500; // adjust the delay as needed
+    
+        const observer = new MutationObserver((mutations) => {
+            // Clear the previous timer and set a new one
+            clearTimeout(debounceTimer);
+            debounceTimer = setTimeout(() => {
+                execute();
+                observer.disconnect(); // Disconnect after first execution
+            }, debounceDelay);
+        });
+    
+        const config = { attributes: false, childList: true, subtree: true };
+        observer.observe(document.body, config);
+    }
+    
+    function execute() {
+        'SCRIPT_CONTENT_PARAM'
+        //console.log('DOM is now idle. Executing...');
+    }
+})();
--- a/proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go
+++ b/proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go
--- a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go
+++ b/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go
@@ -1,344 +0,0 @@
-package rewriters
-
-import (
-	"bytes"
-	_ "embed"
-	"fmt"
-	"io"
-	"log"
-	"net/url"
-	"strings"
-
-	"golang.org/x/net/html"
-)
-
-var attributesToRewrite map[string]bool
-var schemeBlacklist map[string]bool
-
-func init() {
-	// Define list of HTML attributes to try to rewrite
-	attributesToRewrite = map[string]bool{
-		"src":         true,
-		"href":        true,
-		"action":      true,
-		"srcset":      true,
-		"poster":      true,
-		"data":        true,
-		"cite":        true,
-		"formaction":  true,
-		"background":  true,
-		"usemap":      true,
-		"longdesc":    true,
-		"manifest":    true,
-		"archive":     true,
-		"codebase":    true,
-		"icon":        true,
-		"pluginspage": true,
-	}
-
-	// define URIs to NOT rewrite
-	// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
-	schemeBlacklist = map[string]bool{
-		"data":       true,
-		"tel":        true,
-		"mailto":     true,
-		"file":       true,
-		"blob":       true,
-		"javascript": true,
-		"about":      true,
-		"magnet":     true,
-		"ws":         true,
-		"wss":        true,
-		"ftp":        true,
-	}
-}
-
-// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
-// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
-// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
-type HTMLResourceURLRewriter struct {
-	baseURL               *url.URL
-	tokenizer             *html.Tokenizer
-	currentToken          html.Token
-	tokenBuffer           *bytes.Buffer
-	scriptContentBuffer   *bytes.Buffer
-	insideScript          bool
-	currentTokenIndex     int
-	currentTokenProcessed bool
-	proxyURL              string // ladder URL, not proxied site URL
-}
-
-// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
-// It initializes the tokenizer with the provided source and sets the proxy URL.
-func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
-	return &HTMLResourceURLRewriter{
-		tokenizer:           html.NewTokenizer(src),
-		currentToken:        html.Token{},
-		currentTokenIndex:   0,
-		tokenBuffer:         new(bytes.Buffer),
-		scriptContentBuffer: new(bytes.Buffer),
-		insideScript:        false,
-		baseURL:             baseURL,
-		proxyURL:            proxyURL,
-	}
-}
-
-// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
-func (r *HTMLResourceURLRewriter) Close() error {
-	r.tokenBuffer.Reset()
-	r.currentToken = html.Token{}
-	r.currentTokenIndex = 0
-	r.currentTokenProcessed = false
-	return nil
-}
-
-// Read processes the HTML content, rewriting URLs and managing the state of tokens.
-// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
-func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
-
-	if r.currentToken.Data == "" || r.currentTokenProcessed {
-		tokenType := r.tokenizer.Next()
-
-		// done reading html, close out reader
-		if tokenType == html.ErrorToken {
-			if r.tokenizer.Err() == io.EOF {
-				return 0, io.EOF
-			}
-			return 0, r.tokenizer.Err()
-		}
-
-		// flush the current token into an internal buffer
-		// to handle fragmented tokens
-		r.currentToken = r.tokenizer.Token()
-
-		// patch tokens with URLs
-		isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
-		if isTokenWithAttribute {
-			patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
-		}
-
-		r.tokenBuffer.Reset()
-
-		// unescape script contents, not sure why tokenizer will escape things
-		switch tokenType {
-		case html.StartTagToken:
-			if r.currentToken.Data == "script" {
-				r.insideScript = true
-				r.scriptContentBuffer.Reset() // Reset buffer for new script contents
-			}
-			r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
-		case html.EndTagToken:
-			if r.currentToken.Data == "script" {
-				r.insideScript = false
-				modScript := modifyInlineScript(r.scriptContentBuffer)
-				r.tokenBuffer.WriteString(modScript)
-			}
-			r.tokenBuffer.WriteString(r.currentToken.String())
-		default:
-			if r.insideScript {
-				r.scriptContentBuffer.WriteString(r.currentToken.String())
-			} else {
-				r.tokenBuffer.WriteString(r.currentToken.String())
-			}
-		}
-
-		// inject <script> right after <head>
-		isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
-		if isHeadToken {
-			params := map[string]string{
-				"R_PROXYURL": r.proxyURL,
-				"R_BASEURL":  fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
-			}
-			injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params)
-		}
-
-		r.currentTokenProcessed = false
-		r.currentTokenIndex = 0
-	}
-
-	n, err := r.tokenBuffer.Read(p)
-	if err == io.EOF || r.tokenBuffer.Len() == 0 {
-		r.currentTokenProcessed = true
-		err = nil // EOF in this context is expected and not an actual error
-	}
-	return n, err
-}
-
-// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
-//
-//go:embed js_resource_url_rewriter.js
-var rewriteJSResourceUrlsScript string
-
-func injectScript(tokenBuffer *bytes.Buffer, script string) {
-	tokenBuffer.WriteString(
-		fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
-	)
-}
-
-func injectScriptWithParams(tokenBuffer *bytes.Buffer, script string, params map[string]string) {
-	for old, new := range params {
-		script = strings.ReplaceAll(script, old, new)
-	}
-	tokenBuffer.WriteString(
-		fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
-	)
-}
-
-// possible ad-blocking / bypassing opportunity here
-func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
-	return html.UnescapeString(scriptContentBuffer.String())
-}
-
-// Root-relative URLs: These are relative to the root path and start with a "/".
-func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
-	// doublecheck this is a valid relative URL
-	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
-	_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
-	if err != nil {
-		log.Println(err)
-		return
-	}
-
-	//log.Printf("BASEURL patch:  %s\n", baseURL)
-
-	attr.Val = fmt.Sprintf(
-		"/%s://%s/%s",
-		baseURL.Scheme,
-		baseURL.Host,
-		strings.TrimPrefix(attr.Val, "/"),
-	)
-	attr.Val = url.QueryEscape(attr.Val)
-	attr.Val = fmt.Sprintf("/%s", attr.Val)
-
-	log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
-func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
-	attr.Val = fmt.Sprintf(
-		"%s://%s/%s%s",
-		baseURL.Scheme,
-		strings.Trim(baseURL.Host, "/"),
-		strings.Trim(baseURL.RawPath, "/"),
-		strings.Trim(attr.Val, "/"),
-	)
-	attr.Val = url.QueryEscape(attr.Val)
-	attr.Val = fmt.Sprintf("/%s", attr.Val)
-	log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
-func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
-	attr.Val = strings.TrimPrefix(attr.Val, "/")
-	handleRootRelativePath(attr, baseURL)
-	log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
-	// check if valid URL
-	u, err := url.Parse(attr.Val)
-	if err != nil {
-		return
-	}
-	if !(u.Scheme == "http" || u.Scheme == "https") {
-		return
-	}
-	attr.Val = fmt.Sprintf(
-		"/%s",
-		url.QueryEscape(
-			strings.TrimPrefix(attr.Val, "/"),
-		),
-	)
-	log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
-	var srcSetBuilder strings.Builder
-	srcSetItems := strings.Split(attr.Val, ",")
-
-	for i, srcItem := range srcSetItems {
-		srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
-
-		if len(srcParts) == 0 {
-			continue // skip empty items
-		}
-
-		// Process URL part
-		urlPart := processURLPart(srcParts[0], baseURL)
-
-		// First srcset item without a descriptor
-		if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
-			srcSetBuilder.WriteString(urlPart)
-		} else {
-			srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
-		}
-
-		if i < len(srcSetItems)-1 {
-			srcSetBuilder.WriteString(",") // Add comma for all but last item
-		}
-	}
-
-	attr.Val = srcSetBuilder.String()
-	log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// only for srcset
-func processURLPart(urlPart string, baseURL *url.URL) string {
-	f := &html.Attribute{Val: urlPart, Key: "src"}
-
-	switch {
-	case strings.HasPrefix(urlPart, "//"):
-		handleProtocolRelativePath(f, baseURL)
-	case strings.HasPrefix(urlPart, "/"):
-		handleRootRelativePath(f, baseURL)
-	case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"):
-		handleAbsolutePath(f, baseURL)
-	default:
-		handleDocumentRelativePath(f, baseURL)
-	}
-
-	return f.Val
-}
-
-func isBlackedlistedScheme(url string) bool {
-	spl := strings.Split(url, ":")
-	if len(spl) == 0 {
-		return false
-	}
-	scheme := spl[0]
-	return schemeBlacklist[scheme]
-}
-
-func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
-	for i := range token.Attr {
-		attr := &token.Attr[i]
-
-		switch {
-		// don't touch attributes except for the ones we defined
-		case !attributesToRewrite[attr.Key]:
-			continue
-		// don't rewrite special URIs that don't make network requests
-		case isBlackedlistedScheme(attr.Val):
-			continue
-		// don't double-overwrite the url
-		case strings.HasPrefix(attr.Val, proxyURL):
-			continue
-		case attr.Key == "srcset":
-			handleSrcSet(attr, baseURL)
-			continue
-		case strings.HasPrefix(attr.Val, "//"):
-			handleProtocolRelativePath(attr, baseURL)
-			continue
-		case strings.HasPrefix(attr.Val, "/"):
-			handleRootRelativePath(attr, baseURL)
-			continue
-		case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
-			handleAbsolutePath(attr, baseURL)
-			continue
-		default:
-			handleDocumentRelativePath(attr, baseURL)
-			continue
-		}
-
-	}
-}
--- a/proxychain/responsemodifers/rewriters/html_rewriter.go
+++ b/proxychain/responsemodifers/rewriters/html_rewriter.go
@@ -0,0 +1,131 @@
+package rewriters
+
+import (
+	"bytes"
+	"io"
+
+	"golang.org/x/net/html"
+)
+
+// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
+type IHTMLTokenRewriter interface {
+	// ShouldModify determines whether a given HTML token requires modification.
+	ShouldModify(*html.Token) bool
+
+	// ModifyToken applies modifications to a given HTML token.
+	// It returns strings representing content to be prepended and
+	// appended to the token. If no modifications are required or if an error occurs,
+	// it returns empty strings for both 'prepend' and 'append'.
+	// Note: The original token is not modified if an error occurs.
+	ModifyToken(*html.Token) (prepend, append string)
+}
+
+// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
+// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
+//
+//   - HTMLRewriter reads the http.Response.Body stream,
+//     parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
+//     in a single pass of the tokenizer.
+//
+//   - When ProxyChain.Execute() is called, the response body will be read from the server
+//     and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
+//     without ever buffering the entire HTTP response in memory.
+type HTMLRewriter struct {
+	tokenizer             *html.Tokenizer
+	currentToken          *html.Token
+	tokenBuffer           *bytes.Buffer
+	currentTokenProcessed bool
+	rewriters             []IHTMLTokenRewriter
+}
+
+// NewHTMLRewriter creates a new HTMLRewriter instance.
+// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
+// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
+// applies its specific modifications to the HTML tokens.
+// The HTMLRewriter reads from the provided 'src', applies the modifications,
+// and returns the processed content as a new io.ReadCloser.
+// This new io.ReadCloser can be used to stream the modified content back to the client.
+//
+// Parameters:
+//   - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
+//   - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
+//
+// Returns:
+//   - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
+func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
+	return &HTMLRewriter{
+		tokenizer:             html.NewTokenizer(src),
+		currentToken:          nil,
+		tokenBuffer:           new(bytes.Buffer),
+		currentTokenProcessed: false,
+		rewriters:             rewriters,
+	}
+}
+
+// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
+func (r *HTMLRewriter) Close() error {
+	r.tokenBuffer.Reset()
+	r.currentToken = nil
+	r.currentTokenProcessed = false
+	return nil
+}
+
+// Read processes the HTML content, rewriting URLs and managing the state of tokens.
+func (r *HTMLRewriter) Read(p []byte) (int, error) {
+
+	if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
+		tokenType := r.tokenizer.Next()
+
+		// done reading html, close out reader
+		if tokenType == html.ErrorToken {
+			if r.tokenizer.Err() == io.EOF {
+				return 0, io.EOF
+			}
+			return 0, r.tokenizer.Err()
+		}
+
+		// get the next token; reset buffer
+		t := r.tokenizer.Token()
+		r.currentToken = &t
+		r.tokenBuffer.Reset()
+
+		// buffer += "<prepends> <token> <appends>"
+		// process token through all registered rewriters
+		// rewriters will modify the token, and optionally
+		// return a <prepend> or <append> string token
+		appends := make([]string, 0, len(r.rewriters))
+		for _, rewriter := range r.rewriters {
+			if !rewriter.ShouldModify(r.currentToken) {
+				continue
+			}
+			prepend, a := rewriter.ModifyToken(r.currentToken)
+			appends = append(appends, a)
+			// add <prepends> to buffer
+			r.tokenBuffer.WriteString(prepend)
+		}
+
+		// add <token> to buffer
+		if tokenType == html.TextToken {
+			// don't unescape textTokens (such as inline scripts).
+			// Token.String() by default will escape the inputs, but
+			// we don't want to modify the original source
+			r.tokenBuffer.WriteString(r.currentToken.Data)
+		} else {
+			r.tokenBuffer.WriteString(r.currentToken.String())
+		}
+
+		// add <appends> to buffer
+		for _, a := range appends {
+			r.tokenBuffer.WriteString(a)
+		}
+
+		r.currentTokenProcessed = false
+	}
+
+	n, err := r.tokenBuffer.Read(p)
+	if err == io.EOF || r.tokenBuffer.Len() == 0 {
+		r.currentTokenProcessed = true
+		err = nil // EOF in this context is expected and not an actual error
+	}
+	return n, err
+}
--- a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
+++ b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
@@ -0,0 +1,263 @@
+package rewriters
+
+import (
+	_ "embed"
+	"fmt"
+	"log"
+	"net/url"
+	"regexp"
+	"strings"
+
+	"golang.org/x/net/html"
+)
+
+var rewriteAttrs map[string]map[string]bool
+var specialRewriteAttrs map[string]map[string]bool
+var schemeBlacklist map[string]bool
+
+func init() {
+	// define all tag/attributes which might contain URLs
+	// to attempt to rewrite to point to proxy instead
+	rewriteAttrs = map[string]map[string]bool{
+		"img":        {"src": true, "srcset": true, "longdesc": true, "usemap": true},
+		"a":          {"href": true},
+		"form":       {"action": true},
+		"link":       {"href": true, "manifest": true, "icon": true},
+		"script":     {"src": true},
+		"video":      {"src": true, "poster": true},
+		"audio":      {"src": true},
+		"iframe":     {"src": true, "longdesc": true},
+		"embed":      {"src": true},
+		"object":     {"data": true, "codebase": true},
+		"source":     {"src": true, "srcset": true},
+		"track":      {"src": true},
+		"area":       {"href": true},
+		"base":       {"href": true},
+		"blockquote": {"cite": true},
+		"del":        {"cite": true},
+		"ins":        {"cite": true},
+		"q":          {"cite": true},
+		"body":       {"background": true},
+		"button":     {"formaction": true},
+		"input":      {"src": true, "formaction": true},
+		"meta":       {"content": true},
+	}
+
+	// might contain URL but requires special handling
+	specialRewriteAttrs = map[string]map[string]bool{
+		"img":    {"srcset": true},
+		"source": {"srcset": true},
+		"meta":   {"content": true},
+	}
+
+	// define URIs to NOT rewrite
+	// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
+	schemeBlacklist = map[string]bool{
+		"data":       true,
+		"tel":        true,
+		"mailto":     true,
+		"file":       true,
+		"blob":       true,
+		"javascript": true,
+		"about":      true,
+		"magnet":     true,
+		"ws":         true,
+		"wss":        true,
+		"ftp":        true,
+	}
+
+}
+
+// HTMLTokenURLRewriter implements HTMLTokenRewriter
+// it rewrites URLs within HTML resources to use a specified proxy URL.
+// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
+type HTMLTokenURLRewriter struct {
+	baseURL  *url.URL
+	proxyURL string // ladder URL, not proxied site URL
+}
+
+// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
+// It initializes the tokenizer with the provided source and sets the proxy URL.
+func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
+	return &HTMLTokenURLRewriter{
+		baseURL:  baseURL,
+		proxyURL: proxyURL,
+	}
+}
+
+func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
+	attrLen := len(token.Attr)
+	if attrLen == 0 {
+		return false
+	}
+	if !(token.Type == html.StartTagToken || token.Type == html.SelfClosingTagToken) {
+		return false
+	}
+	return true
+}
+
+func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
+	for i := range token.Attr {
+		attr := &token.Attr[i]
+		switch {
+		// don't touch tag/attributes that don't contain URIs
+		case !rewriteAttrs[token.Data][attr.Key]:
+			continue
+		// don't touch attributes with special URIs (like data:)
+		case schemeBlacklist[strings.Split(attr.Key, ":")[0]]:
+			continue
+		// don't double-overwrite the url
+		case strings.HasPrefix(attr.Val, r.proxyURL):
+			continue
+		case strings.HasPrefix(attr.Val, "/http://"):
+			continue
+		case strings.HasPrefix(attr.Val, "/https://"):
+			continue
+		// handle special rewrites
+		case specialRewriteAttrs[token.Data][attr.Key]:
+			r.handleSpecialAttr(token, attr, r.baseURL)
+			continue
+		default:
+			// rewrite url
+			handleURLPart(attr, r.baseURL)
+		}
+	}
+	return "", ""
+}
+
+// dispatcher for ModifyURL based on URI type
+func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
+	switch {
+	case strings.HasPrefix(attr.Key, "//"):
+		handleProtocolRelativePath(attr, baseURL)
+	case strings.HasPrefix(attr.Key, "/"):
+		handleRootRelativePath(attr, baseURL)
+	case strings.HasPrefix(attr.Key, "https://"):
+		handleAbsolutePath(attr, baseURL)
+	case strings.HasPrefix(attr.Key, "http://"):
+		handleAbsolutePath(attr, baseURL)
+	default:
+		handleDocumentRelativePath(attr, baseURL)
+	}
+}
+
+// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
+func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	attr.Val = strings.TrimPrefix(attr.Val, "/")
+	handleRootRelativePath(attr, baseURL)
+	log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Root-relative URLs: These are relative to the root path and start with a "/".
+func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	// doublecheck this is a valid relative URL
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+	_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+	//log.Printf("BASEURL patch:  %s\n", baseURL)
+
+	attr.Val = fmt.Sprintf(
+		"/%s://%s/%s",
+		baseURL.Scheme,
+		baseURL.Host,
+		strings.TrimPrefix(attr.Val, "/"),
+	)
+	attr.Val = escape(attr.Val)
+	attr.Val = fmt.Sprintf("/%s", attr.Val)
+
+	log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
+func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+	attr.Val = fmt.Sprintf(
+		"%s://%s/%s%s",
+		baseURL.Scheme,
+		strings.Trim(baseURL.Host, "/"),
+		strings.Trim(baseURL.RawPath, "/"),
+		strings.Trim(attr.Val, "/"),
+	)
+	attr.Val = escape(attr.Val)
+	attr.Val = fmt.Sprintf("/%s", attr.Val)
+	log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// full URIs beginning with https?://proxiedsite.com
+func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
+	// check if valid URL
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+	u, err := url.Parse(attr.Val)
+	if err != nil {
+		return
+	}
+	if !(u.Scheme == "http" || u.Scheme == "https") {
+		return
+	}
+	attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
+	log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// handle edge cases for special attributes
+func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
+	switch {
+	// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
+	case token.Data == "img" && attr.Key == "srcset":
+		handleSrcSet(attr, baseURL)
+	case token.Data == "source" && attr.Key == "srcset":
+		handleSrcSet(attr, baseURL)
+	// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
+	case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
+		handleMetaRefresh(attr, baseURL)
+	default:
+		break
+	}
+}
+
+func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
+	sec := strings.Split(attr.Val, ";url=")[0]
+	url := strings.Split(attr.Val, ";url=")[1]
+	f := &html.Attribute{Val: url, Key: "src"}
+	handleURLPart(f, baseURL)
+	attr.Val = fmt.Sprintf("%s;url=%s", sec, url)
+}
+
+func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
+	var srcSetBuilder strings.Builder
+	srcSetItems := strings.Split(attr.Val, ",")
+
+	for i, srcItem := range srcSetItems {
+		srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
+
+		if len(srcParts) == 0 {
+			continue // skip empty items
+		}
+
+		// rewrite each URL part by passing in fake attribute
+		f := &html.Attribute{Val: srcParts[0], Key: "src"}
+		handleURLPart(f, baseURL)
+		urlPart := f.Key
+
+		// First srcset item without a descriptor
+		if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
+			srcSetBuilder.WriteString(urlPart)
+		} else {
+			srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
+		}
+
+		if i < len(srcSetItems)-1 {
+			srcSetBuilder.WriteString(",") // Add comma for all but last item
+		}
+	}
+
+	attr.Val = srcSetBuilder.String()
+	log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+func escape(str string) string {
+	return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
+}
--- a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js
+++ b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js
@@ -284,4 +284,34 @@ const originalSetters = {};
    });


+})();
+
+
+
+(() => {
+    document.addEventListener('DOMContentLoaded', (event) => {
+        initIdleMutationObserver();
+    });
+    
+    function initIdleMutationObserver() {
+        let debounceTimer;
+        const debounceDelay = 500; // adjust the delay as needed
+    
+        const observer = new MutationObserver((mutations) => {
+            // Clear the previous timer and set a new one
+            clearTimeout(debounceTimer);
+            debounceTimer = setTimeout(() => {
+                execute();
+                observer.disconnect(); // Disconnect after first execution
+            }, debounceDelay);
+        });
+    
+        const config = { attributes: false, childList: true, subtree: true };
+        observer.observe(document.body, config);
+    }
+    
+    function execute() {
+        console.log('DOM is now idle. Executing...');
+    }
+
 })();
--- a/proxychain/responsemodifers/rewriters/script_injector_rewriter.go
+++ b/proxychain/responsemodifers/rewriters/script_injector_rewriter.go
@@ -0,0 +1,60 @@
+package rewriters
+
+import (
+	_ "embed"
+	"fmt"
+	"strings"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+// ScriptInjectorRewriter implements HTMLTokenRewriter
+// ScriptInjectorRewriter is a struct that injects JS into the page
+// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
+type ScriptInjectorRewriter struct {
+	execTime ScriptExecTime
+	script   string
+}
+
+type ScriptExecTime int
+
+const (
+	BeforeDOMContentLoaded ScriptExecTime = iota
+	AfterDOMContentLoaded
+	AfterDOMIdle
+)
+
+func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
+	// modify if token == <head>
+	return token.DataAtom == atom.Head && token.Type == html.StartTagToken
+}
+
+//go:embed after_dom_idle_script_injector.js
+var afterDomIdleScriptInjector string
+
+func (r *ScriptInjectorRewriter) ModifyToken(token *html.Token) (string, string) {
+	switch {
+	case r.execTime == BeforeDOMContentLoaded:
+		return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
+
+	case r.execTime == AfterDOMContentLoaded:
+		return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
+
+	case r.execTime == AfterDOMIdle:
+		s := strings.Replace(afterDomIdleScriptInjector, `'SCRIPT_CONTENT_PARAM'`, r.script, 1)
+		return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
+
+	default:
+		return "", ""
+	}
+}
+
+// NewScriptInjectorRewriter implements a HtmlTokenRewriter
+// and injects JS into the page for execution at a particular time
+func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
+	return &ScriptInjectorRewriter{
+		execTime: execTime,
+		script:   script,
+	}
+}