fix "modifer" -> "modifier" typo everywhere

2023-12-03 17:04:30 -06:00
parent 117ded5653
commit 6c0721dcb8
67 changed files with 73 additions and 77 deletions
--- a/proxychain/responsemodifiers/rewriters/after_dom_idle_script_injector.js
+++ b/proxychain/responsemodifiers/rewriters/after_dom_idle_script_injector.js
@@ -0,0 +1,28 @@
+(() => {
+    document.addEventListener("DOMContentLoaded", (event) => {
+        initIdleMutationObserver();
+    });
+
+    function initIdleMutationObserver() {
+        let debounceTimer;
+        const debounceDelay = 500; // adjust the delay as needed
+
+        const observer = new MutationObserver((mutations) => {
+            // Clear the previous timer and set a new one
+            clearTimeout(debounceTimer);
+            debounceTimer = setTimeout(() => {
+                execute();
+                observer.disconnect(); // Disconnect after first execution
+            }, debounceDelay);
+        });
+
+        const config = { attributes: false, childList: true, subtree: true };
+        observer.observe(document.body, config);
+    }
+
+    function execute() {
+        "{{AFTER_DOM_IDLE_SCRIPT}}";
+        //console.log('DOM is now idle. Executing...');
+    }
+})();
+
--- a/proxychain/responsemodifiers/rewriters/css_rewriter.go
+++ b/proxychain/responsemodifiers/rewriters/css_rewriter.go
@@ -0,0 +1,3 @@
+package rewriters
+
+// todo: implement
--- a/proxychain/responsemodifiers/rewriters/html_rewriter.go
+++ b/proxychain/responsemodifiers/rewriters/html_rewriter.go
@@ -0,0 +1,133 @@
+package rewriters
+
+import (
+	"bytes"
+	"io"
+
+	"golang.org/x/net/html"
+)
+
+// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
+type IHTMLTokenRewriter interface {
+	// ShouldModify determines whether a given HTML token requires modification.
+	ShouldModify(*html.Token) bool
+
+	// ModifyToken applies modifications to a given HTML token.
+	// It returns strings representing content to be prepended and
+	// appended to the token. If no modifications are required or if an error occurs,
+	// it returns empty strings for both 'prepend' and 'append'.
+	// Note: The original token is not modified if an error occurs.
+	ModifyToken(*html.Token) (prepend, append string)
+}
+
+// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
+// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
+//
+//   - HTMLRewriter reads the http.Response.Body stream,
+//     parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
+//
+//   - When ProxyChain.Execute() is called, the response body will be read from the server
+//     and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
+//     without ever buffering the entire HTTP response in memory.
+type HTMLRewriter struct {
+	tokenizer             *html.Tokenizer
+	currentToken          *html.Token
+	tokenBuffer           *bytes.Buffer
+	currentTokenProcessed bool
+	rewriters             []IHTMLTokenRewriter
+}
+
+// NewHTMLRewriter creates a new HTMLRewriter instance.
+// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
+// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
+// applies its specific modifications to the HTML tokens.
+// The HTMLRewriter reads from the provided 'src', applies the modifications,
+// and returns the processed content as a new io.ReadCloser.
+// This new io.ReadCloser can be used to stream the modified content back to the client.
+//
+// Parameters:
+//   - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
+//   - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
+//
+// Returns:
+//   - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
+func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
+	return &HTMLRewriter{
+		tokenizer:             html.NewTokenizer(src),
+		currentToken:          nil,
+		tokenBuffer:           new(bytes.Buffer),
+		currentTokenProcessed: false,
+		rewriters:             rewriters,
+	}
+}
+
+// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
+func (r *HTMLRewriter) Close() error {
+	r.tokenBuffer.Reset()
+	r.currentToken = nil
+	r.currentTokenProcessed = false
+
+	return nil
+}
+
+// Read processes the HTML content, rewriting URLs and managing the state of tokens.
+func (r *HTMLRewriter) Read(p []byte) (int, error) {
+	if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
+		tokenType := r.tokenizer.Next()
+
+		// done reading html, close out reader
+		if tokenType == html.ErrorToken {
+			if r.tokenizer.Err() == io.EOF {
+				return 0, io.EOF
+			}
+
+			return 0, r.tokenizer.Err()
+		}
+
+		// get the next token; reset buffer
+		t := r.tokenizer.Token()
+		r.currentToken = &t
+		r.tokenBuffer.Reset()
+
+		// buffer += "<prepends> <token> <appends>"
+		// process token through all registered rewriters
+		// rewriters will modify the token, and optionally
+		// return a <prepend> or <append> string token
+		appends := make([]string, 0, len(r.rewriters))
+		for _, rewriter := range r.rewriters {
+			if !rewriter.ShouldModify(r.currentToken) {
+				continue
+			}
+
+			prepend, a := rewriter.ModifyToken(r.currentToken)
+			appends = append(appends, a)
+			// add <prepends> to buffer
+			r.tokenBuffer.WriteString(prepend)
+		}
+
+		// add <token> to buffer
+		if tokenType == html.TextToken {
+			// don't unescape textTokens (such as inline scripts).
+			// Token.String() by default will escape the inputs, but
+			// we don't want to modify the original source
+			r.tokenBuffer.WriteString(r.currentToken.Data)
+		} else {
+			r.tokenBuffer.WriteString(r.currentToken.String())
+		}
+
+		// add <appends> to buffer
+		for _, a := range appends {
+			r.tokenBuffer.WriteString(a)
+		}
+
+		r.currentTokenProcessed = false
+	}
+
+	n, err := r.tokenBuffer.Read(p)
+	if err == io.EOF || r.tokenBuffer.Len() == 0 {
+		r.currentTokenProcessed = true
+		err = nil // EOF in this context is expected and not an actual error
+	}
+
+	return n, err
+}
--- a/proxychain/responsemodifiers/rewriters/html_token_url_rewriter.go
+++ b/proxychain/responsemodifiers/rewriters/html_token_url_rewriter.go
@@ -0,0 +1,288 @@
+package rewriters
+
+import (
+	_ "embed"
+	"fmt"
+	"log"
+	"net/url"
+	"path"
+	"regexp"
+	"strings"
+
+	"golang.org/x/net/html/atom"
+
+	"golang.org/x/net/html"
+)
+
+var (
+	rewriteAttrs        map[string]map[string]bool
+	specialRewriteAttrs map[string]map[string]bool
+	schemeBlacklist     map[string]bool
+)
+
+func init() {
+	// define all tag/attributes which might contain URLs
+	// to attempt to rewrite to point to proxy instead
+	rewriteAttrs = map[string]map[string]bool{
+		"img":        {"src": true, "srcset": true, "longdesc": true, "usemap": true},
+		"a":          {"href": true},
+		"form":       {"action": true},
+		"link":       {"href": true, "manifest": true, "icon": true},
+		"script":     {"src": true},
+		"video":      {"src": true, "poster": true},
+		"audio":      {"src": true},
+		"iframe":     {"src": true, "longdesc": true},
+		"embed":      {"src": true},
+		"object":     {"data": true, "codebase": true},
+		"source":     {"src": true, "srcset": true},
+		"track":      {"src": true},
+		"area":       {"href": true},
+		"base":       {"href": true},
+		"blockquote": {"cite": true},
+		"del":        {"cite": true},
+		"ins":        {"cite": true},
+		"q":          {"cite": true},
+		"body":       {"background": true},
+		"button":     {"formaction": true},
+		"input":      {"src": true, "formaction": true},
+		"meta":       {"content": true},
+	}
+
+	// might contain URL but requires special handling
+	specialRewriteAttrs = map[string]map[string]bool{
+		"img":    {"srcset": true},
+		"source": {"srcset": true},
+		"meta":   {"content": true},
+	}
+
+	// define URIs to NOT rewrite
+	// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
+	schemeBlacklist = map[string]bool{
+		"data":       true,
+		"tel":        true,
+		"mailto":     true,
+		"file":       true,
+		"blob":       true,
+		"javascript": true,
+		"about":      true,
+		"magnet":     true,
+		"ws":         true,
+		"wss":        true,
+		"ftp":        true,
+	}
+}
+
+// HTMLTokenURLRewriter implements HTMLTokenRewriter
+// it rewrites URLs within HTML resources to use a specified proxy URL.
+// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
+type HTMLTokenURLRewriter struct {
+	baseURL  *url.URL
+	proxyURL string // ladder URL, not proxied site URL
+}
+
+// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
+// It initializes the tokenizer with the provided source and sets the proxy URL.
+// baseURL might be https://medium.com/foobar
+// proxyURL is http://localhost:8080
+func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
+	return &HTMLTokenURLRewriter{
+		baseURL:  baseURL,
+		proxyURL: proxyURL,
+	}
+}
+
+func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
+	// fmt.Printf("touch token: %s\n", token.String())
+	attrLen := len(token.Attr)
+	if attrLen == 0 {
+		return false
+	}
+
+	if token.Type == html.StartTagToken {
+		return true
+	}
+
+	if token.Type == html.SelfClosingTagToken {
+		return true
+	}
+	return false
+}
+
+func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
+	for i := range token.Attr {
+		attr := &token.Attr[i]
+
+		switch {
+		// don't touch tag/attributes that don't contain URIs
+		case !rewriteAttrs[token.Data][attr.Key]:
+			continue
+		// don't touch attributes with special URIs (like data:)
+		case schemeBlacklist[strings.Split(attr.Val, ":")[0]]:
+			continue
+		// don't double-overwrite the url
+		case strings.HasPrefix(attr.Val, r.proxyURL):
+			continue
+		case strings.HasPrefix(attr.Val, "/http://"):
+			continue
+		case strings.HasPrefix(attr.Val, "/https://"):
+			continue
+		// handle special rewrites
+		case specialRewriteAttrs[token.Data][attr.Key]:
+			r.handleSpecialAttr(token, attr, r.baseURL)
+			continue
+		default:
+			// rewrite url
+			handleURLPart(attr, r.baseURL)
+		}
+	}
+	return "", ""
+}
+
+// dispatcher for ModifyURL based on URI type
+func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
+	switch {
+	case strings.HasPrefix(attr.Val, "//"):
+		handleProtocolRelativePath(attr, baseURL)
+	case strings.HasPrefix(attr.Val, "/"):
+		handleRootRelativePath(attr, baseURL)
+	case strings.HasPrefix(attr.Val, "https://"):
+		handleAbsolutePath(attr, baseURL)
+	case strings.HasPrefix(attr.Val, "http://"):
+		handleAbsolutePath(attr, baseURL)
+	default:
+		handleDocumentRelativePath(attr, baseURL)
+	}
+}
+
+// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
+func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	attr.Val = strings.TrimPrefix(attr.Val, "/")
+	handleRootRelativePath(attr, baseURL)
+	log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Root-relative URLs: These are relative to the root path and start with a "/".
+func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	// Skip processing if it's already in the correct format
+	if strings.HasPrefix(attr.Val, "/http://") || strings.HasPrefix(attr.Val, "/https://") {
+		return
+	}
+
+	// doublecheck this is a valid relative URL
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+	_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+	// log.Printf("BASEURL patch:  %s\n", baseURL)
+
+	attr.Val = fmt.Sprintf(
+		"%s://%s/%s",
+		baseURL.Scheme,
+		baseURL.Host,
+		strings.TrimPrefix(attr.Val, "/"),
+	)
+	attr.Val = escape(attr.Val)
+	attr.Val = fmt.Sprintf("/%s", attr.Val)
+
+	log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
+func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+
+	if strings.HasPrefix(attr.Val, "#") {
+		return
+	}
+
+	relativePath := path.Join(strings.Trim(baseURL.RawPath, "/"), strings.Trim(attr.Val, "/"))
+	attr.Val = fmt.Sprintf(
+		"%s://%s/%s",
+		baseURL.Scheme,
+		strings.Trim(baseURL.Host, "/"),
+		relativePath,
+	)
+	attr.Val = escape(attr.Val)
+	attr.Val = fmt.Sprintf("/%s", attr.Val)
+
+	log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// full URIs beginning with https?://proxiedsite.com
+func handleAbsolutePath(attr *html.Attribute, _ *url.URL) {
+	// check if valid URL
+	log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+
+	u, err := url.Parse(attr.Val)
+	if err != nil {
+		return
+	}
+	if !(u.Scheme == "http" || u.Scheme == "https") {
+		return
+	}
+
+	attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
+	// attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
+
+	log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// handle edge cases for special attributes
+func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
+	switch {
+	// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
+	case token.DataAtom == atom.Img && attr.Key == "srcset":
+		handleSrcSet(attr, baseURL)
+	case token.DataAtom == atom.Source && attr.Key == "srcset":
+		handleSrcSet(attr, baseURL)
+	// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
+	case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
+		handleMetaRefresh(attr, baseURL)
+	default:
+		break
+	}
+}
+
+func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
+	sec := strings.Split(attr.Val, ";url=")[0]
+	url := strings.Split(attr.Val, ";url=")[1]
+	f := &html.Attribute{Val: url, Key: "src"}
+	handleURLPart(f, baseURL)
+	attr.Val = fmt.Sprintf("%s;url=%s", sec, f.Val)
+}
+
+func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
+	var srcSetBuilder strings.Builder
+	srcSetItems := strings.Split(attr.Val, ",")
+
+	for i, srcItem := range srcSetItems {
+		srcParts := strings.Fields(srcItem)
+
+		if len(srcParts) == 0 {
+			continue
+		}
+
+		f := &html.Attribute{Val: srcParts[0], Key: "src"}
+		handleURLPart(f, baseURL)
+
+		if i > 0 {
+			srcSetBuilder.WriteString(", ")
+		}
+
+		srcSetBuilder.WriteString(f.Val)
+		if len(srcParts) > 1 {
+			srcSetBuilder.WriteString(" ")
+			srcSetBuilder.WriteString(strings.Join(srcParts[1:], " "))
+		}
+	}
+
+	attr.Val = srcSetBuilder.String()
+}
+
+func escape(str string) string {
+	// return str
+	return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
+}
--- a/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go
+++ b/proxychain/responsemodifiers/rewriters/script_injector_rewriter.go
@@ -0,0 +1,92 @@
+package rewriters
+
+import (
+	_ "embed"
+	"fmt"
+	"sort"
+	"strings"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+)
+
+// ScriptInjectorRewriter implements HTMLTokenRewriter
+// ScriptInjectorRewriter is a struct that injects JS into the page
+// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
+type ScriptInjectorRewriter struct {
+	execTime ScriptExecTime
+	script   string
+}
+
+type ScriptExecTime int
+
+const (
+	BeforeDOMContentLoaded ScriptExecTime = iota
+	AfterDOMContentLoaded
+	AfterDOMIdle
+)
+
+func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
+	// modify if token == <head>
+	return token.DataAtom == atom.Head && token.Type == html.StartTagToken
+}
+
+//go:embed after_dom_idle_script_injector.js
+var afterDomIdleScriptInjector string
+
+func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
+	switch {
+	case r.execTime == BeforeDOMContentLoaded:
+		return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
+
+	case r.execTime == AfterDOMContentLoaded:
+		return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
+
+	case r.execTime == AfterDOMIdle:
+		s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
+		return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
+
+	default:
+		return "", ""
+	}
+}
+
+// applies parameters by string replacement of the template script
+func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
+	// Sort the keys by length in descending order
+	keys := make([]string, 0, len(params))
+	for key := range params {
+		keys = append(keys, key)
+	}
+
+	sort.Slice(keys, func(i, j int) bool {
+		return len(keys[i]) > len(keys[j])
+	})
+
+	for _, key := range keys {
+		r.script = strings.ReplaceAll(r.script, key, params[key])
+	}
+}
+
+// NewScriptInjectorRewriter implements a HtmlTokenRewriter
+// and injects JS into the page for execution at a particular time
+func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
+	return &ScriptInjectorRewriter{
+		execTime: execTime,
+		script:   script,
+	}
+}
+
+// NewScriptInjectorRewriterWith implements a HtmlTokenRewriter
+// and injects JS into the page for execution at a particular time
+// accepting arguments into the script, which will be added via a string replace
+// the params map represents the key-value pair of the params.
+// the key will be string replaced with the value
+func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
+	rr := &ScriptInjectorRewriter{
+		execTime: execTime,
+		script:   script,
+	}
+	rr.applyParams(params)
+	return rr
+}