refactor wip

This commit is contained in:
Kevin Pham
2023-11-19 15:03:11 -06:00
parent 98fa53287b
commit ee9066dedb
27 changed files with 377 additions and 210 deletions

View File

@@ -0,0 +1,114 @@
package responsemodifers
import (
"bytes"
"io"
"ladder/proxychain"
"net/url"
"strings"
"golang.org/x/net/html"
)
type HTMLResourceURLRewriter struct {
src io.Reader
buffer *bytes.Buffer // buffer to temporarily hold rewritten output for the reader
proxyURL *url.URL // proxyURL is the URL of the proxy, not the upstream URL
}
func NewHTMLResourceURLRewriter(src io.Reader, proxyURL *url.URL) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{
src: src,
buffer: new(bytes.Buffer),
proxyURL: proxyURL,
}
}
func rewriteToken(token *html.Token, baseURL *url.URL) {
attrsToRewrite := map[string]bool{"href": true, "src": true, "action": true, "srcset": true}
for i := range token.Attr {
attr := &token.Attr[i]
if attrsToRewrite[attr.Key] && strings.HasPrefix(attr.Val, "/") {
// Make URL absolute
attr.Val = "/https://" + baseURL.Host + attr.Val
}
}
}
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
if r.buffer.Len() != 0 {
return r.buffer.Read(p)
}
tokenizer := html.NewTokenizer(r.src)
for {
tokenType := tokenizer.Next()
if tokenType == html.ErrorToken {
err := tokenizer.Err()
if err == io.EOF {
return 0, io.EOF // End of document
}
return 0, err // Actual error
}
token := tokenizer.Token()
if tokenType == html.StartTagToken || tokenType == html.SelfClosingTagToken {
rewriteToken(&token, r.url)
}
r.buffer.WriteString(token.String())
if r.buffer.Len() > 0 {
break
}
}
}
}
// RewriteHTMLResourceURLs updates src/href attributes in HTML content to route through the proxy.
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
ct := chain.Response.Header.Get("content-type")
if ct != "text/html" {
return nil
}
// parse dom
tokenizer := html.NewTokenizer(chain.Body)
var buffer bytes.Buffer
// traverse dom and proxify existing src/img resource links
for {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
// End of the document, set the new body
chain.Body = io.ReaderFrom(buffer)
return nil
case html.StartTagToken, html.SelfClosingTagToken:
token := tokenizer.Token()
// Rewrite the necessary attributes
token = rewriteToken(token, u)
buffer.WriteString(token.String())
case html.TextToken, html.CommentToken, html.DoctypeToken, html.EndTagToken:
// Write the token to the buffer as is
buffer.WriteString(tokenizer.Token().String())
}
}
}
}
// rewriteToken rewrites the tokens with URLs to point to the proxy server.
func rewriteToken(token html.Token, u *url.URL) html.Token {
// Define attributes to rewrite, add more as needed such as "srcset"
rewriteAttrs := map[string]bool{"href": true, "src": true, "action": true, "srcset": true}
for i, attr := range token.Attr {
_, shouldRewrite := rewriteAttrs[attr.Key]
if shouldRewrite {
val := attr.Val
if strings.HasPrefix(val, "/") {
token.Attr[i].Val = "/https://" + u.Host + val
}
}
}
return token
}