simplify rewriters api usage

This commit is contained in:
Kevin Pham
2023-11-26 22:51:00 -06:00
parent ae48429da7
commit 98d6b65057
9 changed files with 38 additions and 117 deletions

View File

@@ -7,9 +7,9 @@ import (
"strings"
)
// InjectScript modifies HTTP responses
// injectScript modifies HTTP responses
// to execute javascript at a particular time.
func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
func injectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
@@ -17,11 +17,24 @@ func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.Respo
return nil
}
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}
// InjectScriptBeforeDOMContentLoaded modifies HTTP responses to inject a JS before DOM Content is loaded (script tag in head)
func InjectScriptBeforeDOMContentLoaded(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.BeforeDOMContentLoaded)
}
// InjectScriptAfterDOMContentLoaded modifies HTTP responses to inject a JS after DOM Content is loaded (script tag in head)
func InjectScriptAfterDOMContentLoaded(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMContentLoaded)
}
// InjectScriptAfterDOMIdle modifies HTTP responses to inject a JS after the DOM is idle (ie: js framework loaded)
func InjectScriptAfterDOMIdle(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMIdle)
}

View File

@@ -41,14 +41,14 @@ func PatchDynamicResourceURLs() proxychain.ResponseModification {
"{{ORIGIN}}": fmt.Sprintf("%s://%s", reqURL.Scheme, reqURL.Host),
}
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
rr := rewriters.NewScriptInjectorRewriterWithParams(
patchDynamicResourceURLsScript,
rewriters.BeforeDOMContentLoaded,
params,
)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}

View File

@@ -25,10 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}

View File

@@ -25,7 +25,6 @@ type IHTMLTokenRewriter interface {
//
// - HTMLRewriter reads the http.Response.Body stream,
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
// in a single pass of the tokenizer.
//
// - When ProxyChain.Execute() is called, the response body will be read from the server
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
@@ -52,7 +51,7 @@ type HTMLRewriter struct {
//
// Returns:
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
return &HTMLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: nil,

View File

@@ -3,6 +3,7 @@ package rewriters
import (
_ "embed"
"fmt"
"golang.org/x/net/html/atom"
"log"
"net/url"
"regexp"
@@ -128,13 +129,13 @@ func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
// dispatcher for ModifyURL based on URI type
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
switch {
case strings.HasPrefix(attr.Key, "//"):
case strings.HasPrefix(attr.Val, "//"):
handleProtocolRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "/"):
case strings.HasPrefix(attr.Val, "/"):
handleRootRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "https://"):
case strings.HasPrefix(attr.Val, "https://"):
handleAbsolutePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "http://"):
case strings.HasPrefix(attr.Val, "http://"):
handleAbsolutePath(attr, baseURL)
default:
handleDocumentRelativePath(attr, baseURL)
@@ -206,12 +207,12 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
switch {
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
case token.Data == "img" && attr.Key == "srcset":
case token.DataAtom == atom.Img && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
case token.Data == "source" && attr.Key == "srcset":
case token.DataAtom == atom.Source && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
handleMetaRefresh(attr, baseURL)
default:
break