diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index 7e30bff..545e35f 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -10,6 +10,7 @@ import ( "strings" "ladder/pkg/ruleset" + rr "ladder/proxychain/responsemodifers/rewriters" "github.com/gofiber/fiber/v2" ) @@ -35,6 +36,7 @@ import ( rx "ladder/pkg/proxychain/requestmodifers" tx "ladder/pkg/proxychain/responsemodifers" + "ladder/pkg/proxychain/responsemodifers/rewriters" "ladder/internal/proxychain" ) @@ -87,6 +89,7 @@ type ProxyChain struct { Response *http.Response requestModifications []RequestModification resultModifications []ResponseModification + htmlTokenRewriters []rr.IHTMLTokenRewriter Ruleset *ruleset.RuleSet debugMode bool abortErr error @@ -169,75 +172,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) { return req, nil } -// _execute sends the request for the ProxyChain and returns the raw body only -// the caller is responsible for returning a response back to the requestor -// the caller is also responsible for calling chain._reset() when they are done with the body -func (chain *ProxyChain) _execute() (io.Reader, error) { - if chain.validateCtxIsSet() != nil || chain.abortErr != nil { - return nil, chain.abortErr - } - if chain.Request == nil { - return nil, errors.New("proxychain request not yet initialized") - } - if chain.Request.URL.Scheme == "" { - return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues") - } - - // Apply requestModifications to proxychain - for _, applyRequestModificationsTo := range chain.requestModifications { - err := applyRequestModificationsTo(chain) - if err != nil { - return nil, chain.abort(err) - } - } - - // Send Request Upstream - resp, err := chain.Client.Do(chain.Request) - if err != nil { - return nil, chain.abort(err) - } - chain.Response = resp - - //defer resp.Body.Close() - - /* todo: move to rsm - for k, v := range resp.Header { - chain.Context.Set(k, resp.Header.Get(k)) - } - */ - - // Apply ResponseModifiers to proxychain - for _, applyResultModificationsTo := range chain.resultModifications { - err := applyResultModificationsTo(chain) - if err != nil { - return nil, chain.abort(err) - } - } - - return chain.Response.Body, nil -} - -// Execute sends the request for the ProxyChain and returns the request to the sender -// and resets the fields so that the ProxyChain can be reused. -// if any step in the ProxyChain fails, the request will abort and a 500 error will -// be returned to the client -func (chain *ProxyChain) Execute() error { - defer chain._reset() - body, err := chain._execute() - if err != nil { - log.Println(err) - return err - } - if chain.Context == nil { - return errors.New("no context set") - } - // Return request back to client - chain.Context.Set("content-type", chain.Response.Header.Get("content-type")) - return chain.Context.SendStream(body) - - //return chain.Context.SendStream(body) -} - // reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) { @@ -322,6 +256,13 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) { return reconstructUrlFromReferer(referer, relativePath) } +// AddBodyRewriter adds a HTMLTokenRewriter to the chain +// HTMLTokenRewriters modify the body response by parsing the HTML +func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain { + chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr) + return chain +} + // SetFiberCtx takes the request ctx from the client // for the modifiers and execute function to use. // it must be set everytime a new request comes through @@ -398,3 +339,86 @@ func NewProxyChain() *ProxyChain { chain.Client = http.DefaultClient return chain } + +/// ======================================================================================================== + +// _execute sends the request for the ProxyChain and returns the raw body only +// the caller is responsible for returning a response back to the requestor +// the caller is also responsible for calling chain._reset() when they are done with the body +func (chain *ProxyChain) _execute() (io.Reader, error) { + if chain.validateCtxIsSet() != nil || chain.abortErr != nil { + return nil, chain.abortErr + } + if chain.Request == nil { + return nil, errors.New("proxychain request not yet initialized") + } + if chain.Request.URL.Scheme == "" { + return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues") + } + + // Apply requestModifications to proxychain + for _, applyRequestModificationsTo := range chain.requestModifications { + err := applyRequestModificationsTo(chain) + if err != nil { + return nil, chain.abort(err) + } + } + + // Send Request Upstream + resp, err := chain.Client.Do(chain.Request) + if err != nil { + return nil, chain.abort(err) + } + chain.Response = resp + + /* todo: move to rsm + for k, v := range resp.Header { + chain.Context.Set(k, resp.Header.Get(k)) + } + */ + + // Apply ResponseModifiers to proxychain + for _, applyResultModificationsTo := range chain.resultModifications { + err := applyResultModificationsTo(chain) + if err != nil { + return nil, chain.abort(err) + } + } + + // stream request back to client, possibly rewriting the body + if len(chain.htmlTokenRewriters) == 0 { + return chain.Response.Body, nil + } + + ct := chain.Response.Header.Get("content-type") + switch { + case strings.HasPrefix(ct, "text/html"): + fmt.Println("fooox") + return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil + default: + return chain.Response.Body, nil + } + +} + +// Execute sends the request for the ProxyChain and returns the request to the sender +// and resets the fields so that the ProxyChain can be reused. +// if any step in the ProxyChain fails, the request will abort and a 500 error will +// be returned to the client +func (chain *ProxyChain) Execute() error { + defer chain._reset() + body, err := chain._execute() + if err != nil { + log.Println(err) + return err + } + if chain.Context == nil { + return errors.New("no context set") + } + + // Return request back to client + chain.Context.Set("content-type", chain.Response.Header.Get("content-type")) + return chain.Context.SendStream(body) + + //return chain.Context.SendStream(body) +} diff --git a/proxychain/responsemodifers/bypass_csp.go b/proxychain/responsemodifers/bypass_csp.go index 71d4d73..07912c8 100644 --- a/proxychain/responsemodifers/bypass_csp.go +++ b/proxychain/responsemodifers/bypass_csp.go @@ -4,6 +4,9 @@ import ( "ladder/proxychain" ) +// TODO: handle edge case where CSP is specified in meta tag: +// + // BypassContentSecurityPolicy modifies response headers to prevent the browser // from enforcing any CSP restrictions. This should run at the end of the chain. func BypassContentSecurityPolicy() proxychain.ResponseModification { diff --git a/proxychain/responsemodifers/inject_script.go b/proxychain/responsemodifers/inject_script.go new file mode 100644 index 0000000..1d8812d --- /dev/null +++ b/proxychain/responsemodifers/inject_script.go @@ -0,0 +1,27 @@ +package responsemodifers + +import ( + _ "embed" + "ladder/proxychain" + "ladder/proxychain/responsemodifers/rewriters" + "strings" +) + +// InjectScript modifies HTTP responses +// to execute javascript at a particular time. +func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification { + return func(chain *proxychain.ProxyChain) error { + // don't add rewriter if it's not even html + ct := chain.Response.Header.Get("content-type") + if !strings.HasPrefix(ct, "text/html") { + return nil + } + + // the rewriting actually happens in chain.Execute() as the client is streaming the response body back + rr := rewriters.NewScriptInjectorRewriter(js, execTime) + // we just queue it up here + chain.AddHTMLTokenRewriter(rr) + + return nil + } +} diff --git a/proxychain/responsemodifers/rewrite_http_resource_urls.go b/proxychain/responsemodifers/rewrite_http_resource_urls.go index 7d72c7d..7cdbcc8 100644 --- a/proxychain/responsemodifers/rewrite_http_resource_urls.go +++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go @@ -13,24 +13,9 @@ import ( // - `` -> `` // - This function is designed to allow the proxified page // to still be browsible by routing all resource URLs through the proxy. -// -// --- -// -// - It works by replacing the io.ReadCloser of the http.Response.Body -// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one. -// -// - This process can be done multiple times, so that the response will -// be streamed and modified through each pass without buffering the entire response in memory. -// -// - HTMLResourceRewriter reads the http.Response.Body stream, -// parsing each HTML token one at a time and replacing attribute tags. -// -// - When ProxyChain.Execute() is called, the response body will be read from the server -// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body -// without ever buffering the entire HTTP response in memory. func RewriteHTMLResourceURLs() proxychain.ResponseModification { return func(chain *proxychain.ProxyChain) error { - // return early if it's not HTML + // don't add rewriter if it's not even html ct := chain.Response.Header.Get("content-type") if !strings.HasPrefix(ct, "text/html") { return nil @@ -40,12 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification { originalURI := chain.Context.Request().URI() proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host()) - chain.Response.Body = rewriters. - NewHTMLResourceURLRewriter( - chain.Response.Body, - chain.Request.URL, - proxyURL, - ) + // the rewriting actually happens in chain.Execute() as the client is streaming the response body back + rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL) + // we just queue it up here + chain.AddHTMLTokenRewriter(rr) return nil } diff --git a/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js b/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js new file mode 100644 index 0000000..47aa44f --- /dev/null +++ b/proxychain/responsemodifers/rewriters/after_dom_idle_script_injector.js @@ -0,0 +1,27 @@ +(() => { + document.addEventListener('DOMContentLoaded', (event) => { + initIdleMutationObserver(); + }); + + function initIdleMutationObserver() { + let debounceTimer; + const debounceDelay = 500; // adjust the delay as needed + + const observer = new MutationObserver((mutations) => { + // Clear the previous timer and set a new one + clearTimeout(debounceTimer); + debounceTimer = setTimeout(() => { + execute(); + observer.disconnect(); // Disconnect after first execution + }, debounceDelay); + }); + + const config = { attributes: false, childList: true, subtree: true }; + observer.observe(document.body, config); + } + + function execute() { + 'SCRIPT_CONTENT_PARAM' + //console.log('DOM is now idle. Executing...'); + } +})(); \ No newline at end of file diff --git a/proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go b/proxychain/responsemodifers/rewriters/css_rewriter.go similarity index 100% rename from proxychain/responsemodifers/rewriters/css_resource_url_rewriter.go rename to proxychain/responsemodifers/rewriters/css_rewriter.go diff --git a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go deleted file mode 100644 index 1997f67..0000000 --- a/proxychain/responsemodifers/rewriters/html_resource_url_rewriter.go +++ /dev/null @@ -1,344 +0,0 @@ -package rewriters - -import ( - "bytes" - _ "embed" - "fmt" - "io" - "log" - "net/url" - "strings" - - "golang.org/x/net/html" -) - -var attributesToRewrite map[string]bool -var schemeBlacklist map[string]bool - -func init() { - // Define list of HTML attributes to try to rewrite - attributesToRewrite = map[string]bool{ - "src": true, - "href": true, - "action": true, - "srcset": true, - "poster": true, - "data": true, - "cite": true, - "formaction": true, - "background": true, - "usemap": true, - "longdesc": true, - "manifest": true, - "archive": true, - "codebase": true, - "icon": true, - "pluginspage": true, - } - - // define URIs to NOT rewrite - // for example: don't overwrite " - schemeBlacklist = map[string]bool{ - "data": true, - "tel": true, - "mailto": true, - "file": true, - "blob": true, - "javascript": true, - "about": true, - "magnet": true, - "ws": true, - "wss": true, - "ftp": true, - } -} - -// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL. -// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes. -// -> -type HTMLResourceURLRewriter struct { - baseURL *url.URL - tokenizer *html.Tokenizer - currentToken html.Token - tokenBuffer *bytes.Buffer - scriptContentBuffer *bytes.Buffer - insideScript bool - currentTokenIndex int - currentTokenProcessed bool - proxyURL string // ladder URL, not proxied site URL -} - -// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter. -// It initializes the tokenizer with the provided source and sets the proxy URL. -func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter { - return &HTMLResourceURLRewriter{ - tokenizer: html.NewTokenizer(src), - currentToken: html.Token{}, - currentTokenIndex: 0, - tokenBuffer: new(bytes.Buffer), - scriptContentBuffer: new(bytes.Buffer), - insideScript: false, - baseURL: baseURL, - proxyURL: proxyURL, - } -} - -// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data. -func (r *HTMLResourceURLRewriter) Close() error { - r.tokenBuffer.Reset() - r.currentToken = html.Token{} - r.currentTokenIndex = 0 - r.currentTokenProcessed = false - return nil -} - -// Read processes the HTML content, rewriting URLs and managing the state of tokens. -// It reads HTML content, token by token, rewriting URLs to route through the specified proxy. -func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { - - if r.currentToken.Data == "" || r.currentTokenProcessed { - tokenType := r.tokenizer.Next() - - // done reading html, close out reader - if tokenType == html.ErrorToken { - if r.tokenizer.Err() == io.EOF { - return 0, io.EOF - } - return 0, r.tokenizer.Err() - } - - // flush the current token into an internal buffer - // to handle fragmented tokens - r.currentToken = r.tokenizer.Token() - - // patch tokens with URLs - isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken - if isTokenWithAttribute { - patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL) - } - - r.tokenBuffer.Reset() - - // unescape script contents, not sure why tokenizer will escape things - switch tokenType { - case html.StartTagToken: - if r.currentToken.Data == "script" { - r.insideScript = true - r.scriptContentBuffer.Reset() // Reset buffer for new script contents - } - r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag - case html.EndTagToken: - if r.currentToken.Data == "script" { - r.insideScript = false - modScript := modifyInlineScript(r.scriptContentBuffer) - r.tokenBuffer.WriteString(modScript) - } - r.tokenBuffer.WriteString(r.currentToken.String()) - default: - if r.insideScript { - r.scriptContentBuffer.WriteString(r.currentToken.String()) - } else { - r.tokenBuffer.WriteString(r.currentToken.String()) - } - } - - // inject \n", script), - ) -} - -func injectScriptWithParams(tokenBuffer *bytes.Buffer, script string, params map[string]string) { - for old, new := range params { - script = strings.ReplaceAll(script, old, new) - } - tokenBuffer.WriteString( - fmt.Sprintf("\n\n", script), - ) -} - -// possible ad-blocking / bypassing opportunity here -func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string { - return html.UnescapeString(scriptContentBuffer.String()) -} - -// Root-relative URLs: These are relative to the root path and start with a "/". -func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) { - // doublecheck this is a valid relative URL - log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val) - _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val)) - if err != nil { - log.Println(err) - return - } - - //log.Printf("BASEURL patch: %s\n", baseURL) - - attr.Val = fmt.Sprintf( - "/%s://%s/%s", - baseURL.Scheme, - baseURL.Host, - strings.TrimPrefix(attr.Val, "/"), - ) - attr.Val = url.QueryEscape(attr.Val) - attr.Val = fmt.Sprintf("/%s", attr.Val) - - log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) -} - -// Document-relative URLs: These are relative to the current document's path and don't start with a "/". -func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) { - attr.Val = fmt.Sprintf( - "%s://%s/%s%s", - baseURL.Scheme, - strings.Trim(baseURL.Host, "/"), - strings.Trim(baseURL.RawPath, "/"), - strings.Trim(attr.Val, "/"), - ) - attr.Val = url.QueryEscape(attr.Val) - attr.Val = fmt.Sprintf("/%s", attr.Val) - log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) -} - -// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page. -func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) { - attr.Val = strings.TrimPrefix(attr.Val, "/") - handleRootRelativePath(attr, baseURL) - log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) -} - -func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) { - // check if valid URL - u, err := url.Parse(attr.Val) - if err != nil { - return - } - if !(u.Scheme == "http" || u.Scheme == "https") { - return - } - attr.Val = fmt.Sprintf( - "/%s", - url.QueryEscape( - strings.TrimPrefix(attr.Val, "/"), - ), - ) - log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val) -} - -func handleSrcSet(attr *html.Attribute, baseURL *url.URL) { - var srcSetBuilder strings.Builder - srcSetItems := strings.Split(attr.Val, ",") - - for i, srcItem := range srcSetItems { - srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them - - if len(srcParts) == 0 { - continue // skip empty items - } - - // Process URL part - urlPart := processURLPart(srcParts[0], baseURL) - - // First srcset item without a descriptor - if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) { - srcSetBuilder.WriteString(urlPart) - } else { - srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1])) - } - - if i < len(srcSetItems)-1 { - srcSetBuilder.WriteString(",") // Add comma for all but last item - } - } - - attr.Val = srcSetBuilder.String() - log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val) -} - -// only for srcset -func processURLPart(urlPart string, baseURL *url.URL) string { - f := &html.Attribute{Val: urlPart, Key: "src"} - - switch { - case strings.HasPrefix(urlPart, "//"): - handleProtocolRelativePath(f, baseURL) - case strings.HasPrefix(urlPart, "/"): - handleRootRelativePath(f, baseURL) - case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"): - handleAbsolutePath(f, baseURL) - default: - handleDocumentRelativePath(f, baseURL) - } - - return f.Val -} - -func isBlackedlistedScheme(url string) bool { - spl := strings.Split(url, ":") - if len(spl) == 0 { - return false - } - scheme := spl[0] - return schemeBlacklist[scheme] -} - -func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) { - for i := range token.Attr { - attr := &token.Attr[i] - - switch { - // don't touch attributes except for the ones we defined - case !attributesToRewrite[attr.Key]: - continue - // don't rewrite special URIs that don't make network requests - case isBlackedlistedScheme(attr.Val): - continue - // don't double-overwrite the url - case strings.HasPrefix(attr.Val, proxyURL): - continue - case attr.Key == "srcset": - handleSrcSet(attr, baseURL) - continue - case strings.HasPrefix(attr.Val, "//"): - handleProtocolRelativePath(attr, baseURL) - continue - case strings.HasPrefix(attr.Val, "/"): - handleRootRelativePath(attr, baseURL) - continue - case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"): - handleAbsolutePath(attr, baseURL) - continue - default: - handleDocumentRelativePath(attr, baseURL) - continue - } - - } -} diff --git a/proxychain/responsemodifers/rewriters/html_rewriter.go b/proxychain/responsemodifers/rewriters/html_rewriter.go new file mode 100644 index 0000000..12767e2 --- /dev/null +++ b/proxychain/responsemodifers/rewriters/html_rewriter.go @@ -0,0 +1,131 @@ +package rewriters + +import ( + "bytes" + "io" + + "golang.org/x/net/html" +) + +// IHTMLTokenRewriter defines an interface for modifying HTML tokens. +type IHTMLTokenRewriter interface { + // ShouldModify determines whether a given HTML token requires modification. + ShouldModify(*html.Token) bool + + // ModifyToken applies modifications to a given HTML token. + // It returns strings representing content to be prepended and + // appended to the token. If no modifications are required or if an error occurs, + // it returns empty strings for both 'prepend' and 'append'. + // Note: The original token is not modified if an error occurs. + ModifyToken(*html.Token) (prepend, append string) +} + +// HTMLRewriter is a struct that can take multiple TokenHandlers and process all +// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser +// +// - HTMLRewriter reads the http.Response.Body stream, +// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter) +// in a single pass of the tokenizer. +// +// - When ProxyChain.Execute() is called, the response body will be read from the server +// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body +// without ever buffering the entire HTTP response in memory. +type HTMLRewriter struct { + tokenizer *html.Tokenizer + currentToken *html.Token + tokenBuffer *bytes.Buffer + currentTokenProcessed bool + rewriters []IHTMLTokenRewriter +} + +// NewHTMLRewriter creates a new HTMLRewriter instance. +// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body) +// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice +// applies its specific modifications to the HTML tokens. +// The HTMLRewriter reads from the provided 'src', applies the modifications, +// and returns the processed content as a new io.ReadCloser. +// This new io.ReadCloser can be used to stream the modified content back to the client. +// +// Parameters: +// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body. +// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens. +// +// Returns: +// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content. +func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter { + return &HTMLRewriter{ + tokenizer: html.NewTokenizer(src), + currentToken: nil, + tokenBuffer: new(bytes.Buffer), + currentTokenProcessed: false, + rewriters: rewriters, + } +} + +// Close resets the internal state of HTMLRewriter, clearing buffers and token data. +func (r *HTMLRewriter) Close() error { + r.tokenBuffer.Reset() + r.currentToken = nil + r.currentTokenProcessed = false + return nil +} + +// Read processes the HTML content, rewriting URLs and managing the state of tokens. +func (r *HTMLRewriter) Read(p []byte) (int, error) { + + if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed { + tokenType := r.tokenizer.Next() + + // done reading html, close out reader + if tokenType == html.ErrorToken { + if r.tokenizer.Err() == io.EOF { + return 0, io.EOF + } + return 0, r.tokenizer.Err() + } + + // get the next token; reset buffer + t := r.tokenizer.Token() + r.currentToken = &t + r.tokenBuffer.Reset() + + // buffer += " " + // process token through all registered rewriters + // rewriters will modify the token, and optionally + // return a or string token + appends := make([]string, 0, len(r.rewriters)) + for _, rewriter := range r.rewriters { + if !rewriter.ShouldModify(r.currentToken) { + continue + } + prepend, a := rewriter.ModifyToken(r.currentToken) + appends = append(appends, a) + // add to buffer + r.tokenBuffer.WriteString(prepend) + } + + // add to buffer + if tokenType == html.TextToken { + // don't unescape textTokens (such as inline scripts). + // Token.String() by default will escape the inputs, but + // we don't want to modify the original source + r.tokenBuffer.WriteString(r.currentToken.Data) + } else { + r.tokenBuffer.WriteString(r.currentToken.String()) + } + + // add to buffer + for _, a := range appends { + r.tokenBuffer.WriteString(a) + } + + r.currentTokenProcessed = false + } + + n, err := r.tokenBuffer.Read(p) + if err == io.EOF || r.tokenBuffer.Len() == 0 { + r.currentTokenProcessed = true + err = nil // EOF in this context is expected and not an actual error + } + return n, err +} diff --git a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go new file mode 100644 index 0000000..d2ebc04 --- /dev/null +++ b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go @@ -0,0 +1,263 @@ +package rewriters + +import ( + _ "embed" + "fmt" + "log" + "net/url" + "regexp" + "strings" + + "golang.org/x/net/html" +) + +var rewriteAttrs map[string]map[string]bool +var specialRewriteAttrs map[string]map[string]bool +var schemeBlacklist map[string]bool + +func init() { + // define all tag/attributes which might contain URLs + // to attempt to rewrite to point to proxy instead + rewriteAttrs = map[string]map[string]bool{ + "img": {"src": true, "srcset": true, "longdesc": true, "usemap": true}, + "a": {"href": true}, + "form": {"action": true}, + "link": {"href": true, "manifest": true, "icon": true}, + "script": {"src": true}, + "video": {"src": true, "poster": true}, + "audio": {"src": true}, + "iframe": {"src": true, "longdesc": true}, + "embed": {"src": true}, + "object": {"data": true, "codebase": true}, + "source": {"src": true, "srcset": true}, + "track": {"src": true}, + "area": {"href": true}, + "base": {"href": true}, + "blockquote": {"cite": true}, + "del": {"cite": true}, + "ins": {"cite": true}, + "q": {"cite": true}, + "body": {"background": true}, + "button": {"formaction": true}, + "input": {"src": true, "formaction": true}, + "meta": {"content": true}, + } + + // might contain URL but requires special handling + specialRewriteAttrs = map[string]map[string]bool{ + "img": {"srcset": true}, + "source": {"srcset": true}, + "meta": {"content": true}, + } + + // define URIs to NOT rewrite + // for example: don't overwrite " + schemeBlacklist = map[string]bool{ + "data": true, + "tel": true, + "mailto": true, + "file": true, + "blob": true, + "javascript": true, + "about": true, + "magnet": true, + "ws": true, + "wss": true, + "ftp": true, + } + +} + +// HTMLTokenURLRewriter implements HTMLTokenRewriter +// it rewrites URLs within HTML resources to use a specified proxy URL. +// -> +type HTMLTokenURLRewriter struct { + baseURL *url.URL + proxyURL string // ladder URL, not proxied site URL +} + +// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter. +// It initializes the tokenizer with the provided source and sets the proxy URL. +func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter { + return &HTMLTokenURLRewriter{ + baseURL: baseURL, + proxyURL: proxyURL, + } +} + +func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool { + attrLen := len(token.Attr) + if attrLen == 0 { + return false + } + if !(token.Type == html.StartTagToken || token.Type == html.SelfClosingTagToken) { + return false + } + return true +} + +func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) { + for i := range token.Attr { + attr := &token.Attr[i] + switch { + // don't touch tag/attributes that don't contain URIs + case !rewriteAttrs[token.Data][attr.Key]: + continue + // don't touch attributes with special URIs (like data:) + case schemeBlacklist[strings.Split(attr.Key, ":")[0]]: + continue + // don't double-overwrite the url + case strings.HasPrefix(attr.Val, r.proxyURL): + continue + case strings.HasPrefix(attr.Val, "/http://"): + continue + case strings.HasPrefix(attr.Val, "/https://"): + continue + // handle special rewrites + case specialRewriteAttrs[token.Data][attr.Key]: + r.handleSpecialAttr(token, attr, r.baseURL) + continue + default: + // rewrite url + handleURLPart(attr, r.baseURL) + } + } + return "", "" +} + +// dispatcher for ModifyURL based on URI type +func handleURLPart(attr *html.Attribute, baseURL *url.URL) { + switch { + case strings.HasPrefix(attr.Key, "//"): + handleProtocolRelativePath(attr, baseURL) + case strings.HasPrefix(attr.Key, "/"): + handleRootRelativePath(attr, baseURL) + case strings.HasPrefix(attr.Key, "https://"): + handleAbsolutePath(attr, baseURL) + case strings.HasPrefix(attr.Key, "http://"): + handleAbsolutePath(attr, baseURL) + default: + handleDocumentRelativePath(attr, baseURL) + } +} + +// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page. +func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) { + attr.Val = strings.TrimPrefix(attr.Val, "/") + handleRootRelativePath(attr, baseURL) + log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) +} + +// Root-relative URLs: These are relative to the root path and start with a "/". +func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) { + // doublecheck this is a valid relative URL + log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val) + _, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val)) + if err != nil { + log.Println(err) + return + } + + //log.Printf("BASEURL patch: %s\n", baseURL) + + attr.Val = fmt.Sprintf( + "/%s://%s/%s", + baseURL.Scheme, + baseURL.Host, + strings.TrimPrefix(attr.Val, "/"), + ) + attr.Val = escape(attr.Val) + attr.Val = fmt.Sprintf("/%s", attr.Val) + + log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) +} + +// Document-relative URLs: These are relative to the current document's path and don't start with a "/". +func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) { + log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val) + attr.Val = fmt.Sprintf( + "%s://%s/%s%s", + baseURL.Scheme, + strings.Trim(baseURL.Host, "/"), + strings.Trim(baseURL.RawPath, "/"), + strings.Trim(attr.Val, "/"), + ) + attr.Val = escape(attr.Val) + attr.Val = fmt.Sprintf("/%s", attr.Val) + log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val) +} + +// full URIs beginning with https?://proxiedsite.com +func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) { + // check if valid URL + log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val) + u, err := url.Parse(attr.Val) + if err != nil { + return + } + if !(u.Scheme == "http" || u.Scheme == "https") { + return + } + attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/"))) + log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val) +} + +// handle edge cases for special attributes +func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) { + switch { + // srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions). + case token.Data == "img" && attr.Key == "srcset": + handleSrcSet(attr, baseURL) + case token.Data == "source" && attr.Key == "srcset": + handleSrcSet(attr, baseURL) + // meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/". + case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val): + handleMetaRefresh(attr, baseURL) + default: + break + } +} + +func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) { + sec := strings.Split(attr.Val, ";url=")[0] + url := strings.Split(attr.Val, ";url=")[1] + f := &html.Attribute{Val: url, Key: "src"} + handleURLPart(f, baseURL) + attr.Val = fmt.Sprintf("%s;url=%s", sec, url) +} + +func handleSrcSet(attr *html.Attribute, baseURL *url.URL) { + var srcSetBuilder strings.Builder + srcSetItems := strings.Split(attr.Val, ",") + + for i, srcItem := range srcSetItems { + srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them + + if len(srcParts) == 0 { + continue // skip empty items + } + + // rewrite each URL part by passing in fake attribute + f := &html.Attribute{Val: srcParts[0], Key: "src"} + handleURLPart(f, baseURL) + urlPart := f.Key + + // First srcset item without a descriptor + if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) { + srcSetBuilder.WriteString(urlPart) + } else { + srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1])) + } + + if i < len(srcSetItems)-1 { + srcSetBuilder.WriteString(",") // Add comma for all but last item + } + } + + attr.Val = srcSetBuilder.String() + log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val) +} + +func escape(str string) string { + return strings.ReplaceAll(url.PathEscape(str), "%2F", "/") +} diff --git a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js index 9c04281..a6f09e1 100644 --- a/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js +++ b/proxychain/responsemodifers/rewriters/js_resource_url_rewriter.js @@ -284,4 +284,34 @@ const originalSetters = {}; }); +})(); + + + +(() => { + document.addEventListener('DOMContentLoaded', (event) => { + initIdleMutationObserver(); + }); + + function initIdleMutationObserver() { + let debounceTimer; + const debounceDelay = 500; // adjust the delay as needed + + const observer = new MutationObserver((mutations) => { + // Clear the previous timer and set a new one + clearTimeout(debounceTimer); + debounceTimer = setTimeout(() => { + execute(); + observer.disconnect(); // Disconnect after first execution + }, debounceDelay); + }); + + const config = { attributes: false, childList: true, subtree: true }; + observer.observe(document.body, config); + } + + function execute() { + console.log('DOM is now idle. Executing...'); + } + })(); \ No newline at end of file diff --git a/proxychain/responsemodifers/rewriters/script_injector_rewriter.go b/proxychain/responsemodifers/rewriters/script_injector_rewriter.go new file mode 100644 index 0000000..b08498b --- /dev/null +++ b/proxychain/responsemodifers/rewriters/script_injector_rewriter.go @@ -0,0 +1,60 @@ +package rewriters + +import ( + _ "embed" + "fmt" + "strings" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// ScriptInjectorRewriter implements HTMLTokenRewriter +// ScriptInjectorRewriter is a struct that injects JS into the page +// It uses an HTML tokenizer to process HTML content and injects JS at a specified location +type ScriptInjectorRewriter struct { + execTime ScriptExecTime + script string +} + +type ScriptExecTime int + +const ( + BeforeDOMContentLoaded ScriptExecTime = iota + AfterDOMContentLoaded + AfterDOMIdle +) + +func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool { + // modify if token == + return token.DataAtom == atom.Head && token.Type == html.StartTagToken +} + +//go:embed after_dom_idle_script_injector.js +var afterDomIdleScriptInjector string + +func (r *ScriptInjectorRewriter) ModifyToken(token *html.Token) (string, string) { + switch { + case r.execTime == BeforeDOMContentLoaded: + return "", fmt.Sprintf("\n\n", r.script) + + case r.execTime == AfterDOMContentLoaded: + return "", fmt.Sprintf("\n", r.script) + + case r.execTime == AfterDOMIdle: + s := strings.Replace(afterDomIdleScriptInjector, `'SCRIPT_CONTENT_PARAM'`, r.script, 1) + return "", fmt.Sprintf("\n\n", s) + + default: + return "", "" + } +} + +// NewScriptInjectorRewriter implements a HtmlTokenRewriter +// and injects JS into the page for execution at a particular time +func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter { + return &ScriptInjectorRewriter{ + execTime: execTime, + script: script, + } +}