diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go
index 7e30bff..545e35f 100644
--- a/proxychain/proxychain.go
+++ b/proxychain/proxychain.go
@@ -10,6 +10,7 @@ import (
"strings"
"ladder/pkg/ruleset"
+ rr "ladder/proxychain/responsemodifers/rewriters"
"github.com/gofiber/fiber/v2"
)
@@ -35,6 +36,7 @@ import (
rx "ladder/pkg/proxychain/requestmodifers"
tx "ladder/pkg/proxychain/responsemodifers"
+ "ladder/pkg/proxychain/responsemodifers/rewriters"
"ladder/internal/proxychain"
)
@@ -87,6 +89,7 @@ type ProxyChain struct {
Response *http.Response
requestModifications []RequestModification
resultModifications []ResponseModification
+ htmlTokenRewriters []rr.IHTMLTokenRewriter
Ruleset *ruleset.RuleSet
debugMode bool
abortErr error
@@ -169,75 +172,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
return req, nil
}
-// _execute sends the request for the ProxyChain and returns the raw body only
-// the caller is responsible for returning a response back to the requestor
-// the caller is also responsible for calling chain._reset() when they are done with the body
-func (chain *ProxyChain) _execute() (io.Reader, error) {
- if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
- return nil, chain.abortErr
- }
- if chain.Request == nil {
- return nil, errors.New("proxychain request not yet initialized")
- }
- if chain.Request.URL.Scheme == "" {
- return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
- }
-
- // Apply requestModifications to proxychain
- for _, applyRequestModificationsTo := range chain.requestModifications {
- err := applyRequestModificationsTo(chain)
- if err != nil {
- return nil, chain.abort(err)
- }
- }
-
- // Send Request Upstream
- resp, err := chain.Client.Do(chain.Request)
- if err != nil {
- return nil, chain.abort(err)
- }
- chain.Response = resp
-
- //defer resp.Body.Close()
-
- /* todo: move to rsm
- for k, v := range resp.Header {
- chain.Context.Set(k, resp.Header.Get(k))
- }
- */
-
- // Apply ResponseModifiers to proxychain
- for _, applyResultModificationsTo := range chain.resultModifications {
- err := applyResultModificationsTo(chain)
- if err != nil {
- return nil, chain.abort(err)
- }
- }
-
- return chain.Response.Body, nil
-}
-
-// Execute sends the request for the ProxyChain and returns the request to the sender
-// and resets the fields so that the ProxyChain can be reused.
-// if any step in the ProxyChain fails, the request will abort and a 500 error will
-// be returned to the client
-func (chain *ProxyChain) Execute() error {
- defer chain._reset()
- body, err := chain._execute()
- if err != nil {
- log.Println(err)
- return err
- }
- if chain.Context == nil {
- return errors.New("no context set")
- }
- // Return request back to client
- chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
- return chain.Context.SendStream(body)
-
- //return chain.Context.SendStream(body)
-}
-
// reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries
func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) {
@@ -322,6 +256,13 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
return reconstructUrlFromReferer(referer, relativePath)
}
+// AddBodyRewriter adds a HTMLTokenRewriter to the chain
+// HTMLTokenRewriters modify the body response by parsing the HTML
+func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain {
+ chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr)
+ return chain
+}
+
// SetFiberCtx takes the request ctx from the client
// for the modifiers and execute function to use.
// it must be set everytime a new request comes through
@@ -398,3 +339,86 @@ func NewProxyChain() *ProxyChain {
chain.Client = http.DefaultClient
return chain
}
+
+/// ========================================================================================================
+
+// _execute sends the request for the ProxyChain and returns the raw body only
+// the caller is responsible for returning a response back to the requestor
+// the caller is also responsible for calling chain._reset() when they are done with the body
+func (chain *ProxyChain) _execute() (io.Reader, error) {
+ if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
+ return nil, chain.abortErr
+ }
+ if chain.Request == nil {
+ return nil, errors.New("proxychain request not yet initialized")
+ }
+ if chain.Request.URL.Scheme == "" {
+ return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
+ }
+
+ // Apply requestModifications to proxychain
+ for _, applyRequestModificationsTo := range chain.requestModifications {
+ err := applyRequestModificationsTo(chain)
+ if err != nil {
+ return nil, chain.abort(err)
+ }
+ }
+
+ // Send Request Upstream
+ resp, err := chain.Client.Do(chain.Request)
+ if err != nil {
+ return nil, chain.abort(err)
+ }
+ chain.Response = resp
+
+ /* todo: move to rsm
+ for k, v := range resp.Header {
+ chain.Context.Set(k, resp.Header.Get(k))
+ }
+ */
+
+ // Apply ResponseModifiers to proxychain
+ for _, applyResultModificationsTo := range chain.resultModifications {
+ err := applyResultModificationsTo(chain)
+ if err != nil {
+ return nil, chain.abort(err)
+ }
+ }
+
+ // stream request back to client, possibly rewriting the body
+ if len(chain.htmlTokenRewriters) == 0 {
+ return chain.Response.Body, nil
+ }
+
+ ct := chain.Response.Header.Get("content-type")
+ switch {
+ case strings.HasPrefix(ct, "text/html"):
+ fmt.Println("fooox")
+ return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil
+ default:
+ return chain.Response.Body, nil
+ }
+
+}
+
+// Execute sends the request for the ProxyChain and returns the request to the sender
+// and resets the fields so that the ProxyChain can be reused.
+// if any step in the ProxyChain fails, the request will abort and a 500 error will
+// be returned to the client
+func (chain *ProxyChain) Execute() error {
+ defer chain._reset()
+ body, err := chain._execute()
+ if err != nil {
+ log.Println(err)
+ return err
+ }
+ if chain.Context == nil {
+ return errors.New("no context set")
+ }
+
+ // Return request back to client
+ chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
+ return chain.Context.SendStream(body)
+
+ //return chain.Context.SendStream(body)
+}
diff --git a/proxychain/responsemodifers/bypass_csp.go b/proxychain/responsemodifers/bypass_csp.go
index 71d4d73..07912c8 100644
--- a/proxychain/responsemodifers/bypass_csp.go
+++ b/proxychain/responsemodifers/bypass_csp.go
@@ -4,6 +4,9 @@ import (
"ladder/proxychain"
)
+// TODO: handle edge case where CSP is specified in meta tag:
+//
+
// BypassContentSecurityPolicy modifies response headers to prevent the browser
// from enforcing any CSP restrictions. This should run at the end of the chain.
func BypassContentSecurityPolicy() proxychain.ResponseModification {
diff --git a/proxychain/responsemodifers/inject_script.go b/proxychain/responsemodifers/inject_script.go
new file mode 100644
index 0000000..1d8812d
--- /dev/null
+++ b/proxychain/responsemodifers/inject_script.go
@@ -0,0 +1,27 @@
+package responsemodifers
+
+import (
+ _ "embed"
+ "ladder/proxychain"
+ "ladder/proxychain/responsemodifers/rewriters"
+ "strings"
+)
+
+// InjectScript modifies HTTP responses
+// to execute javascript at a particular time.
+func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
+ return func(chain *proxychain.ProxyChain) error {
+ // don't add rewriter if it's not even html
+ ct := chain.Response.Header.Get("content-type")
+ if !strings.HasPrefix(ct, "text/html") {
+ return nil
+ }
+
+ // the rewriting actually happens in chain.Execute() as the client is streaming the response body back
+ rr := rewriters.NewScriptInjectorRewriter(js, execTime)
+ // we just queue it up here
+ chain.AddHTMLTokenRewriter(rr)
+
+ return nil
+ }
+}
diff --git a/proxychain/responsemodifers/rewrite_http_resource_urls.go b/proxychain/responsemodifers/rewrite_http_resource_urls.go
index 7d72c7d..7cdbcc8 100644
--- a/proxychain/responsemodifers/rewrite_http_resource_urls.go
+++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go
@@ -13,24 +13,9 @@ import (
// - `
` -> `
`
// - This function is designed to allow the proxified page
// to still be browsible by routing all resource URLs through the proxy.
-//
-// ---
-//
-// - It works by replacing the io.ReadCloser of the http.Response.Body
-// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
-//
-// - This process can be done multiple times, so that the response will
-// be streamed and modified through each pass without buffering the entire response in memory.
-//
-// - HTMLResourceRewriter reads the http.Response.Body stream,
-// parsing each HTML token one at a time and replacing attribute tags.
-//
-// - When ProxyChain.Execute() is called, the response body will be read from the server
-// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
-// without ever buffering the entire HTTP response in memory.
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
- // return early if it's not HTML
+ // don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
@@ -40,12 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
- chain.Response.Body = rewriters.
- NewHTMLResourceURLRewriter(
- chain.Response.Body,
- chain.Request.URL,
- proxyURL,
- )
+ // the rewriting actually happens in chain.Execute() as the client is streaming the response body back
+ rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
+ // we just queue it up here
+ chain.AddHTMLTokenRewriter(rr)
return nil
}
diff --git a/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js b/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js
new file mode 100644
index 0000000..47aa44f
--- /dev/null
+++ b/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js
@@ -0,0 +1,27 @@
+(() => {
+ document.addEventListener('DOMContentLoaded', (event) => {
+ initIdleMutationObserver();
+ });
+
+ function initIdleMutationObserver() {
+ let debounceTimer;
+ const debounceDelay = 500; // adjust the delay as needed
+
+ const observer = new MutationObserver((mutations) => {
+ // Clear the previous timer and set a new one
+ clearTimeout(debounceTimer);
+ debounceTimer = setTimeout(() => {
+ execute();
+ observer.disconnect(); // Disconnect after first execution
+ }, debounceDelay);
+ });
+
+ const config = { attributes: false, childList: true, subtree: true };
+ observer.observe(document.body, config);
+ }
+
+ function execute() {
+ 'SCRIPT_CONTENT_PARAM'
+ //console.log('DOM is now idle. Executing...');
+ }
+})();
\ No newline at end of file
diff --git a/proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go b/proxychain/responsemodifers/rewriters/css_rewriter.go
similarity index 100%
rename from proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go
rename to proxychain/responsemodifers/rewriters/css_rewriter.go
diff --git a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go
deleted file mode 100644
index 1997f67..0000000
--- a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go
+++ /dev/null
@@ -1,344 +0,0 @@
-package rewriters
-
-import (
- "bytes"
- _ "embed"
- "fmt"
- "io"
- "log"
- "net/url"
- "strings"
-
- "golang.org/x/net/html"
-)
-
-var attributesToRewrite map[string]bool
-var schemeBlacklist map[string]bool
-
-func init() {
- // Define list of HTML attributes to try to rewrite
- attributesToRewrite = map[string]bool{
- "src": true,
- "href": true,
- "action": true,
- "srcset": true,
- "poster": true,
- "data": true,
- "cite": true,
- "formaction": true,
- "background": true,
- "usemap": true,
- "longdesc": true,
- "manifest": true,
- "archive": true,
- "codebase": true,
- "icon": true,
- "pluginspage": true,
- }
-
- // define URIs to NOT rewrite
- // for example: don't overwrite
"
- schemeBlacklist = map[string]bool{
- "data": true,
- "tel": true,
- "mailto": true,
- "file": true,
- "blob": true,
- "javascript": true,
- "about": true,
- "magnet": true,
- "ws": true,
- "wss": true,
- "ftp": true,
- }
-}
-
-// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
-// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
-//
->
-type HTMLResourceURLRewriter struct {
- baseURL *url.URL
- tokenizer *html.Tokenizer
- currentToken html.Token
- tokenBuffer *bytes.Buffer
- scriptContentBuffer *bytes.Buffer
- insideScript bool
- currentTokenIndex int
- currentTokenProcessed bool
- proxyURL string // ladder URL, not proxied site URL
-}
-
-// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
-// It initializes the tokenizer with the provided source and sets the proxy URL.
-func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
- return &HTMLResourceURLRewriter{
- tokenizer: html.NewTokenizer(src),
- currentToken: html.Token{},
- currentTokenIndex: 0,
- tokenBuffer: new(bytes.Buffer),
- scriptContentBuffer: new(bytes.Buffer),
- insideScript: false,
- baseURL: baseURL,
- proxyURL: proxyURL,
- }
-}
-
-// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
-func (r *HTMLResourceURLRewriter) Close() error {
- r.tokenBuffer.Reset()
- r.currentToken = html.Token{}
- r.currentTokenIndex = 0
- r.currentTokenProcessed = false
- return nil
-}
-
-// Read processes the HTML content, rewriting URLs and managing the state of tokens.
-// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
-func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
-
- if r.currentToken.Data == "" || r.currentTokenProcessed {
- tokenType := r.tokenizer.Next()
-
- // done reading html, close out reader
- if tokenType == html.ErrorToken {
- if r.tokenizer.Err() == io.EOF {
- return 0, io.EOF
- }
- return 0, r.tokenizer.Err()
- }
-
- // flush the current token into an internal buffer
- // to handle fragmented tokens
- r.currentToken = r.tokenizer.Token()
-
- // patch tokens with URLs
- isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
- if isTokenWithAttribute {
- patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
- }
-
- r.tokenBuffer.Reset()
-
- // unescape script contents, not sure why tokenizer will escape things
- switch tokenType {
- case html.StartTagToken:
- if r.currentToken.Data == "script" {
- r.insideScript = true
- r.scriptContentBuffer.Reset() // Reset buffer for new script contents
- }
- r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
- case html.EndTagToken:
- if r.currentToken.Data == "script" {
- r.insideScript = false
- modScript := modifyInlineScript(r.scriptContentBuffer)
- r.tokenBuffer.WriteString(modScript)
- }
- r.tokenBuffer.WriteString(r.currentToken.String())
- default:
- if r.insideScript {
- r.scriptContentBuffer.WriteString(r.currentToken.String())
- } else {
- r.tokenBuffer.WriteString(r.currentToken.String())
- }
- }
-
- // inject \n", script),
- )
-}
-
-func injectScriptWithParams(tokenBuffer *bytes.Buffer, script string, params map[string]string) {
- for old, new := range params {
- script = strings.ReplaceAll(script, old, new)
- }
- tokenBuffer.WriteString(
- fmt.Sprintf("\n\n", script),
- )
-}
-
-// possible ad-blocking / bypassing opportunity here
-func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
- return html.UnescapeString(scriptContentBuffer.String())
-}
-
-// Root-relative URLs: These are relative to the root path and start with a "/".
-func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
- // doublecheck this is a valid relative URL
- log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
- _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
- if err != nil {
- log.Println(err)
- return
- }
-
- //log.Printf("BASEURL patch: %s\n", baseURL)
-
- attr.Val = fmt.Sprintf(
- "/%s://%s/%s",
- baseURL.Scheme,
- baseURL.Host,
- strings.TrimPrefix(attr.Val, "/"),
- )
- attr.Val = url.QueryEscape(attr.Val)
- attr.Val = fmt.Sprintf("/%s", attr.Val)
-
- log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
-func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
- attr.Val = fmt.Sprintf(
- "%s://%s/%s%s",
- baseURL.Scheme,
- strings.Trim(baseURL.Host, "/"),
- strings.Trim(baseURL.RawPath, "/"),
- strings.Trim(attr.Val, "/"),
- )
- attr.Val = url.QueryEscape(attr.Val)
- attr.Val = fmt.Sprintf("/%s", attr.Val)
- log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
-func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
- attr.Val = strings.TrimPrefix(attr.Val, "/")
- handleRootRelativePath(attr, baseURL)
- log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
- // check if valid URL
- u, err := url.Parse(attr.Val)
- if err != nil {
- return
- }
- if !(u.Scheme == "http" || u.Scheme == "https") {
- return
- }
- attr.Val = fmt.Sprintf(
- "/%s",
- url.QueryEscape(
- strings.TrimPrefix(attr.Val, "/"),
- ),
- )
- log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
- var srcSetBuilder strings.Builder
- srcSetItems := strings.Split(attr.Val, ",")
-
- for i, srcItem := range srcSetItems {
- srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
-
- if len(srcParts) == 0 {
- continue // skip empty items
- }
-
- // Process URL part
- urlPart := processURLPart(srcParts[0], baseURL)
-
- // First srcset item without a descriptor
- if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
- srcSetBuilder.WriteString(urlPart)
- } else {
- srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
- }
-
- if i < len(srcSetItems)-1 {
- srcSetBuilder.WriteString(",") // Add comma for all but last item
- }
- }
-
- attr.Val = srcSetBuilder.String()
- log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
-}
-
-// only for srcset
-func processURLPart(urlPart string, baseURL *url.URL) string {
- f := &html.Attribute{Val: urlPart, Key: "src"}
-
- switch {
- case strings.HasPrefix(urlPart, "//"):
- handleProtocolRelativePath(f, baseURL)
- case strings.HasPrefix(urlPart, "/"):
- handleRootRelativePath(f, baseURL)
- case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"):
- handleAbsolutePath(f, baseURL)
- default:
- handleDocumentRelativePath(f, baseURL)
- }
-
- return f.Val
-}
-
-func isBlackedlistedScheme(url string) bool {
- spl := strings.Split(url, ":")
- if len(spl) == 0 {
- return false
- }
- scheme := spl[0]
- return schemeBlacklist[scheme]
-}
-
-func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
- for i := range token.Attr {
- attr := &token.Attr[i]
-
- switch {
- // don't touch attributes except for the ones we defined
- case !attributesToRewrite[attr.Key]:
- continue
- // don't rewrite special URIs that don't make network requests
- case isBlackedlistedScheme(attr.Val):
- continue
- // don't double-overwrite the url
- case strings.HasPrefix(attr.Val, proxyURL):
- continue
- case attr.Key == "srcset":
- handleSrcSet(attr, baseURL)
- continue
- case strings.HasPrefix(attr.Val, "//"):
- handleProtocolRelativePath(attr, baseURL)
- continue
- case strings.HasPrefix(attr.Val, "/"):
- handleRootRelativePath(attr, baseURL)
- continue
- case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
- handleAbsolutePath(attr, baseURL)
- continue
- default:
- handleDocumentRelativePath(attr, baseURL)
- continue
- }
-
- }
-}
diff --git a/proxychain/responsemodifers/rewriters/html_rewriter.go b/proxychain/responsemodifers/rewriters/html_rewriter.go
new file mode 100644
index 0000000..12767e2
--- /dev/null
+++ b/proxychain/responsemodifers/rewriters/html_rewriter.go
@@ -0,0 +1,131 @@
+package rewriters
+
+import (
+ "bytes"
+ "io"
+
+ "golang.org/x/net/html"
+)
+
+// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
+type IHTMLTokenRewriter interface {
+ // ShouldModify determines whether a given HTML token requires modification.
+ ShouldModify(*html.Token) bool
+
+ // ModifyToken applies modifications to a given HTML token.
+ // It returns strings representing content to be prepended and
+ // appended to the token. If no modifications are required or if an error occurs,
+ // it returns empty strings for both 'prepend' and 'append'.
+ // Note: The original token is not modified if an error occurs.
+ ModifyToken(*html.Token) (prepend, append string)
+}
+
+// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
+// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
+//
+// - HTMLRewriter reads the http.Response.Body stream,
+// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
+// in a single pass of the tokenizer.
+//
+// - When ProxyChain.Execute() is called, the response body will be read from the server
+// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
+// without ever buffering the entire HTTP response in memory.
+type HTMLRewriter struct {
+ tokenizer *html.Tokenizer
+ currentToken *html.Token
+ tokenBuffer *bytes.Buffer
+ currentTokenProcessed bool
+ rewriters []IHTMLTokenRewriter
+}
+
+// NewHTMLRewriter creates a new HTMLRewriter instance.
+// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
+// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
+// applies its specific modifications to the HTML tokens.
+// The HTMLRewriter reads from the provided 'src', applies the modifications,
+// and returns the processed content as a new io.ReadCloser.
+// This new io.ReadCloser can be used to stream the modified content back to the client.
+//
+// Parameters:
+// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
+// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
+//
+// Returns:
+// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
+func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
+ return &HTMLRewriter{
+ tokenizer: html.NewTokenizer(src),
+ currentToken: nil,
+ tokenBuffer: new(bytes.Buffer),
+ currentTokenProcessed: false,
+ rewriters: rewriters,
+ }
+}
+
+// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
+func (r *HTMLRewriter) Close() error {
+ r.tokenBuffer.Reset()
+ r.currentToken = nil
+ r.currentTokenProcessed = false
+ return nil
+}
+
+// Read processes the HTML content, rewriting URLs and managing the state of tokens.
+func (r *HTMLRewriter) Read(p []byte) (int, error) {
+
+ if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
+ tokenType := r.tokenizer.Next()
+
+ // done reading html, close out reader
+ if tokenType == html.ErrorToken {
+ if r.tokenizer.Err() == io.EOF {
+ return 0, io.EOF
+ }
+ return 0, r.tokenizer.Err()
+ }
+
+ // get the next token; reset buffer
+ t := r.tokenizer.Token()
+ r.currentToken = &t
+ r.tokenBuffer.Reset()
+
+ // buffer += " "
+ // process token through all registered rewriters
+ // rewriters will modify the token, and optionally
+ // return a or string token
+ appends := make([]string, 0, len(r.rewriters))
+ for _, rewriter := range r.rewriters {
+ if !rewriter.ShouldModify(r.currentToken) {
+ continue
+ }
+ prepend, a := rewriter.ModifyToken(r.currentToken)
+ appends = append(appends, a)
+ // add to buffer
+ r.tokenBuffer.WriteString(prepend)
+ }
+
+ // add to buffer
+ if tokenType == html.TextToken {
+ // don't unescape textTokens (such as inline scripts).
+ // Token.String() by default will escape the inputs, but
+ // we don't want to modify the original source
+ r.tokenBuffer.WriteString(r.currentToken.Data)
+ } else {
+ r.tokenBuffer.WriteString(r.currentToken.String())
+ }
+
+ // add to buffer
+ for _, a := range appends {
+ r.tokenBuffer.WriteString(a)
+ }
+
+ r.currentTokenProcessed = false
+ }
+
+ n, err := r.tokenBuffer.Read(p)
+ if err == io.EOF || r.tokenBuffer.Len() == 0 {
+ r.currentTokenProcessed = true
+ err = nil // EOF in this context is expected and not an actual error
+ }
+ return n, err
+}
diff --git a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
new file mode 100644
index 0000000..d2ebc04
--- /dev/null
+++ b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
@@ -0,0 +1,263 @@
+package rewriters
+
+import (
+ _ "embed"
+ "fmt"
+ "log"
+ "net/url"
+ "regexp"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+var rewriteAttrs map[string]map[string]bool
+var specialRewriteAttrs map[string]map[string]bool
+var schemeBlacklist map[string]bool
+
+func init() {
+ // define all tag/attributes which might contain URLs
+ // to attempt to rewrite to point to proxy instead
+ rewriteAttrs = map[string]map[string]bool{
+ "img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
+ "a": {"href": true},
+ "form": {"action": true},
+ "link": {"href": true, "manifest": true, "icon": true},
+ "script": {"src": true},
+ "video": {"src": true, "poster": true},
+ "audio": {"src": true},
+ "iframe": {"src": true, "longdesc": true},
+ "embed": {"src": true},
+ "object": {"data": true, "codebase": true},
+ "source": {"src": true, "srcset": true},
+ "track": {"src": true},
+ "area": {"href": true},
+ "base": {"href": true},
+ "blockquote": {"cite": true},
+ "del": {"cite": true},
+ "ins": {"cite": true},
+ "q": {"cite": true},
+ "body": {"background": true},
+ "button": {"formaction": true},
+ "input": {"src": true, "formaction": true},
+ "meta": {"content": true},
+ }
+
+ // might contain URL but requires special handling
+ specialRewriteAttrs = map[string]map[string]bool{
+ "img": {"srcset": true},
+ "source": {"srcset": true},
+ "meta": {"content": true},
+ }
+
+ // define URIs to NOT rewrite
+ // for example: don't overwrite
"
+ schemeBlacklist = map[string]bool{
+ "data": true,
+ "tel": true,
+ "mailto": true,
+ "file": true,
+ "blob": true,
+ "javascript": true,
+ "about": true,
+ "magnet": true,
+ "ws": true,
+ "wss": true,
+ "ftp": true,
+ }
+
+}
+
+// HTMLTokenURLRewriter implements HTMLTokenRewriter
+// it rewrites URLs within HTML resources to use a specified proxy URL.
+//
->
+type HTMLTokenURLRewriter struct {
+ baseURL *url.URL
+ proxyURL string // ladder URL, not proxied site URL
+}
+
+// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
+// It initializes the tokenizer with the provided source and sets the proxy URL.
+func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
+ return &HTMLTokenURLRewriter{
+ baseURL: baseURL,
+ proxyURL: proxyURL,
+ }
+}
+
+func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
+ attrLen := len(token.Attr)
+ if attrLen == 0 {
+ return false
+ }
+ if !(token.Type == html.StartTagToken || token.Type == html.SelfClosingTagToken) {
+ return false
+ }
+ return true
+}
+
+func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
+ for i := range token.Attr {
+ attr := &token.Attr[i]
+ switch {
+ // don't touch tag/attributes that don't contain URIs
+ case !rewriteAttrs[token.Data][attr.Key]:
+ continue
+ // don't touch attributes with special URIs (like data:)
+ case schemeBlacklist[strings.Split(attr.Key, ":")[0]]:
+ continue
+ // don't double-overwrite the url
+ case strings.HasPrefix(attr.Val, r.proxyURL):
+ continue
+ case strings.HasPrefix(attr.Val, "/http://"):
+ continue
+ case strings.HasPrefix(attr.Val, "/https://"):
+ continue
+ // handle special rewrites
+ case specialRewriteAttrs[token.Data][attr.Key]:
+ r.handleSpecialAttr(token, attr, r.baseURL)
+ continue
+ default:
+ // rewrite url
+ handleURLPart(attr, r.baseURL)
+ }
+ }
+ return "", ""
+}
+
+// dispatcher for ModifyURL based on URI type
+func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
+ switch {
+ case strings.HasPrefix(attr.Key, "//"):
+ handleProtocolRelativePath(attr, baseURL)
+ case strings.HasPrefix(attr.Key, "/"):
+ handleRootRelativePath(attr, baseURL)
+ case strings.HasPrefix(attr.Key, "https://"):
+ handleAbsolutePath(attr, baseURL)
+ case strings.HasPrefix(attr.Key, "http://"):
+ handleAbsolutePath(attr, baseURL)
+ default:
+ handleDocumentRelativePath(attr, baseURL)
+ }
+}
+
+// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
+func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
+ attr.Val = strings.TrimPrefix(attr.Val, "/")
+ handleRootRelativePath(attr, baseURL)
+ log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Root-relative URLs: These are relative to the root path and start with a "/".
+func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
+ // doublecheck this is a valid relative URL
+ log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+ _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
+ if err != nil {
+ log.Println(err)
+ return
+ }
+
+ //log.Printf("BASEURL patch: %s\n", baseURL)
+
+ attr.Val = fmt.Sprintf(
+ "/%s://%s/%s",
+ baseURL.Scheme,
+ baseURL.Host,
+ strings.TrimPrefix(attr.Val, "/"),
+ )
+ attr.Val = escape(attr.Val)
+ attr.Val = fmt.Sprintf("/%s", attr.Val)
+
+ log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
+func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
+ log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+ attr.Val = fmt.Sprintf(
+ "%s://%s/%s%s",
+ baseURL.Scheme,
+ strings.Trim(baseURL.Host, "/"),
+ strings.Trim(baseURL.RawPath, "/"),
+ strings.Trim(attr.Val, "/"),
+ )
+ attr.Val = escape(attr.Val)
+ attr.Val = fmt.Sprintf("/%s", attr.Val)
+ log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// full URIs beginning with https?://proxiedsite.com
+func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
+ // check if valid URL
+ log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
+ u, err := url.Parse(attr.Val)
+ if err != nil {
+ return
+ }
+ if !(u.Scheme == "http" || u.Scheme == "https") {
+ return
+ }
+ attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
+ log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+// handle edge cases for special attributes
+func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
+ switch {
+ // srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
+ case token.Data == "img" && attr.Key == "srcset":
+ handleSrcSet(attr, baseURL)
+ case token.Data == "source" && attr.Key == "srcset":
+ handleSrcSet(attr, baseURL)
+ // meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
+ case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
+ handleMetaRefresh(attr, baseURL)
+ default:
+ break
+ }
+}
+
+func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
+ sec := strings.Split(attr.Val, ";url=")[0]
+ url := strings.Split(attr.Val, ";url=")[1]
+ f := &html.Attribute{Val: url, Key: "src"}
+ handleURLPart(f, baseURL)
+ attr.Val = fmt.Sprintf("%s;url=%s", sec, url)
+}
+
+func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
+ var srcSetBuilder strings.Builder
+ srcSetItems := strings.Split(attr.Val, ",")
+
+ for i, srcItem := range srcSetItems {
+ srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
+
+ if len(srcParts) == 0 {
+ continue // skip empty items
+ }
+
+ // rewrite each URL part by passing in fake attribute
+ f := &html.Attribute{Val: srcParts[0], Key: "src"}
+ handleURLPart(f, baseURL)
+ urlPart := f.Key
+
+ // First srcset item without a descriptor
+ if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
+ srcSetBuilder.WriteString(urlPart)
+ } else {
+ srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
+ }
+
+ if i < len(srcSetItems)-1 {
+ srcSetBuilder.WriteString(",") // Add comma for all but last item
+ }
+ }
+
+ attr.Val = srcSetBuilder.String()
+ log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+}
+
+func escape(str string) string {
+ return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
+}
diff --git a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js
index 9c04281..a6f09e1 100644
--- a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js
+++ b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js
@@ -284,4 +284,34 @@ const originalSetters = {};
});
+})();
+
+
+
+(() => {
+ document.addEventListener('DOMContentLoaded', (event) => {
+ initIdleMutationObserver();
+ });
+
+ function initIdleMutationObserver() {
+ let debounceTimer;
+ const debounceDelay = 500; // adjust the delay as needed
+
+ const observer = new MutationObserver((mutations) => {
+ // Clear the previous timer and set a new one
+ clearTimeout(debounceTimer);
+ debounceTimer = setTimeout(() => {
+ execute();
+ observer.disconnect(); // Disconnect after first execution
+ }, debounceDelay);
+ });
+
+ const config = { attributes: false, childList: true, subtree: true };
+ observer.observe(document.body, config);
+ }
+
+ function execute() {
+ console.log('DOM is now idle. Executing...');
+ }
+
})();
\ No newline at end of file
diff --git a/proxychain/responsemodifers/rewriters/script_injector_rewriter.go b/proxychain/responsemodifers/rewriters/script_injector_rewriter.go
new file mode 100644
index 0000000..b08498b
--- /dev/null
+++ b/proxychain/responsemodifers/rewriters/script_injector_rewriter.go
@@ -0,0 +1,60 @@
+package rewriters
+
+import (
+ _ "embed"
+ "fmt"
+ "strings"
+
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
+)
+
+// ScriptInjectorRewriter implements HTMLTokenRewriter
+// ScriptInjectorRewriter is a struct that injects JS into the page
+// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
+type ScriptInjectorRewriter struct {
+ execTime ScriptExecTime
+ script string
+}
+
+type ScriptExecTime int
+
+const (
+ BeforeDOMContentLoaded ScriptExecTime = iota
+ AfterDOMContentLoaded
+ AfterDOMIdle
+)
+
+func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
+ // modify if token ==
+ return token.DataAtom == atom.Head && token.Type == html.StartTagToken
+}
+
+//go:embed after_dom_idle_script_injector.js
+var afterDomIdleScriptInjector string
+
+func (r *ScriptInjectorRewriter) ModifyToken(token *html.Token) (string, string) {
+ switch {
+ case r.execTime == BeforeDOMContentLoaded:
+ return "", fmt.Sprintf("\n\n", r.script)
+
+ case r.execTime == AfterDOMContentLoaded:
+ return "", fmt.Sprintf("\n", r.script)
+
+ case r.execTime == AfterDOMIdle:
+ s := strings.Replace(afterDomIdleScriptInjector, `'SCRIPT_CONTENT_PARAM'`, r.script, 1)
+ return "", fmt.Sprintf("\n\n", s)
+
+ default:
+ return "", ""
+ }
+}
+
+// NewScriptInjectorRewriter implements a HtmlTokenRewriter
+// and injects JS into the page for execution at a particular time
+func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
+ return &ScriptInjectorRewriter{
+ execTime: execTime,
+ script: script,
+ }
+}