simplify rewriters api usage

This commit is contained in:
Kevin Pham
2023-11-26 22:51:00 -06:00
parent ae48429da7
commit 98d6b65057
9 changed files with 38 additions and 117 deletions

View File

@@ -2,7 +2,6 @@ package handlers
import (
_ "embed"
"log"
"github.com/gofiber/fiber/v2"
)
@@ -12,48 +11,5 @@ import (
var version string
func Api(c *fiber.Ctx) error {
// Get the url from the URL
urlQuery := c.Params("*")
queries := c.Queries()
body, req, resp, err := fetchSite(urlQuery, queries)
if err != nil {
log.Println("ERROR:", err)
c.SendStatus(500)
return c.SendString(err.Error())
}
response := Response{
Version: version,
Body: body,
}
response.Request.Headers = make([]any, 0, len(req.Header))
for k, v := range req.Header {
response.Request.Headers = append(response.Request.Headers, map[string]string{
"key": k,
"value": v[0],
})
}
response.Response.Headers = make([]any, 0, len(resp.Header))
for k, v := range resp.Header {
response.Response.Headers = append(response.Response.Headers, map[string]string{
"key": k,
"value": v[0],
})
}
return c.JSON(response)
}
type Response struct {
Version string `json:"version"`
Body string `json:"body"`
Request struct {
Headers []interface{} `json:"headers"`
} `json:"request"`
Response struct {
Headers []interface{} `json:"headers"`
} `json:"response"`
return nil
}

View File

@@ -1,21 +1,9 @@
package handlers
import (
"log"
"github.com/gofiber/fiber/v2"
)
func Raw(c *fiber.Ctx) error {
// Get the url from the URL
urlQuery := c.Params("*")
queries := c.Queries()
body, _, _, err := fetchSite(urlQuery, queries)
if err != nil {
log.Println("ERROR:", err)
c.SendStatus(500)
return c.SendString(err.Error())
}
return c.SendString(body)
return nil
}

View File

@@ -1,23 +1,9 @@
package handlers
import (
"os"
"github.com/gofiber/fiber/v2"
"gopkg.in/yaml.v3"
)
func Ruleset(c *fiber.Ctx) error {
if os.Getenv("EXPOSE_RULESET") == "false" {
c.SendStatus(fiber.StatusForbidden)
return c.SendString("Rules Disabled")
}
body, err := yaml.Marshal(rulesSet)
if err != nil {
c.SendStatus(fiber.StatusInternalServerError)
return c.SendString(err.Error())
}
return c.SendString(string(body))
return nil
}

View File

@@ -256,16 +256,6 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
return reconstructUrlFromReferer(referer, relativePath)
}
// AddBodyRewriter adds a HTMLTokenRewriter to the chain.
// - HTMLTokenRewriters modify the body response by parsing the HTML
// and making changes to the DOM as it streams to the client
// - In most cases, you don't need to use this method. It's usually called by
// a ResponseModifier to batch queue changes for performance reasons.
func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain {
chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr)
return chain
}
// SetFiberCtx takes the request ctx from the client
// for the modifiers and execute function to use.
// it must be set everytime a new request comes through
@@ -388,19 +378,7 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
}
}
// stream request back to client, possibly rewriting the body
if len(chain.htmlTokenRewriters) == 0 {
return chain.Response.Body, nil
}
ct := chain.Response.Header.Get("content-type")
switch {
case strings.HasPrefix(ct, "text/html"):
fmt.Println("fooox")
return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil
default:
return chain.Response.Body, nil
}
}

View File

@@ -7,9 +7,9 @@ import (
"strings"
)
// InjectScript modifies HTTP responses
// injectScript modifies HTTP responses
// to execute javascript at a particular time.
func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
func injectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
@@ -17,11 +17,24 @@ func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.Respo
return nil
}
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}
// InjectScriptBeforeDOMContentLoaded modifies HTTP responses to inject a JS before DOM Content is loaded (script tag in head)
func InjectScriptBeforeDOMContentLoaded(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.BeforeDOMContentLoaded)
}
// InjectScriptAfterDOMContentLoaded modifies HTTP responses to inject a JS after DOM Content is loaded (script tag in head)
func InjectScriptAfterDOMContentLoaded(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMContentLoaded)
}
// InjectScriptAfterDOMIdle modifies HTTP responses to inject a JS after the DOM is idle (ie: js framework loaded)
func InjectScriptAfterDOMIdle(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMIdle)
}

View File

@@ -41,14 +41,14 @@ func PatchDynamicResourceURLs() proxychain.ResponseModification {
"{{ORIGIN}}": fmt.Sprintf("%s://%s", reqURL.Scheme, reqURL.Host),
}
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
rr := rewriters.NewScriptInjectorRewriterWithParams(
patchDynamicResourceURLsScript,
rewriters.BeforeDOMContentLoaded,
params,
)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}

View File

@@ -25,10 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
// we just queue it up here
chain.AddHTMLTokenRewriter(rr)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}

View File

@@ -25,7 +25,6 @@ type IHTMLTokenRewriter interface {
//
// - HTMLRewriter reads the http.Response.Body stream,
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
// in a single pass of the tokenizer.
//
// - When ProxyChain.Execute() is called, the response body will be read from the server
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
@@ -52,7 +51,7 @@ type HTMLRewriter struct {
//
// Returns:
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
return &HTMLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: nil,

View File

@@ -3,6 +3,7 @@ package rewriters
import (
_ "embed"
"fmt"
"golang.org/x/net/html/atom"
"log"
"net/url"
"regexp"
@@ -128,13 +129,13 @@ func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
// dispatcher for ModifyURL based on URI type
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
switch {
case strings.HasPrefix(attr.Key, "//"):
case strings.HasPrefix(attr.Val, "//"):
handleProtocolRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "/"):
case strings.HasPrefix(attr.Val, "/"):
handleRootRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "https://"):
case strings.HasPrefix(attr.Val, "https://"):
handleAbsolutePath(attr, baseURL)
case strings.HasPrefix(attr.Key, "http://"):
case strings.HasPrefix(attr.Val, "http://"):
handleAbsolutePath(attr, baseURL)
default:
handleDocumentRelativePath(attr, baseURL)
@@ -206,12 +207,12 @@ func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
switch {
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
case token.Data == "img" && attr.Key == "srcset":
case token.DataAtom == atom.Img && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
case token.Data == "source" && attr.Key == "srcset":
case token.DataAtom == atom.Source && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
handleMetaRefresh(attr, baseURL)
default:
break