wip

2023-11-20 15:37:07 -06:00
parent 1d88f14de2
commit 5d46adc486
5 changed files with 142 additions and 74 deletions
--- a/proxychain/cache/memcache.go
+++ b/proxychain/cache/memcache.go
--- a/proxychain/proxychain.go
+++ b/proxychain/proxychain.go
@@ -22,7 +22,8 @@ applying request and response modifications along the way.
    request modifiers (ReqMods) and response modifiers (ResMods) before passing the
    upstream response back to the client.

-  - ProxyChains can be reused to avoid memory allocations.
+  - ProxyChains can be reused to avoid memory allocations. However, they are not concurrent-safe
+    so a ProxyChainPool should be used with mutexes to avoid memory errors.

 ---

@@ -48,6 +49,7 @@ proxychain.NewProxyChain().
 	).
 	SetResultModifications(
 		tx.BlockIncomingCookies(),
+		tx.RewriteHTMLResourceURLs()
 	).
 	Execute()

@@ -130,7 +132,6 @@ func (chain *ProxyChain) AddRuleset(rs *ruleset.RuleSet) *ProxyChain {
 }

 func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
-	log.Println("ir 1")
 	if chain.Context == nil {
 		chain.abortErr = chain.abort(errors.New("no context set"))
 		return nil, chain.abortErr
@@ -140,7 +141,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
 	if err != nil {
 		return nil, err
 	}
-	log.Println("ir 2")
 	chain.Request = req
 	switch chain.Context.Method() {
 	case "GET":
@@ -157,7 +157,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
 		return nil, fmt.Errorf("unsupported request method from client: '%s'", chain.Context.Method())
 	}

-	log.Println("ir 3")
 	/*
 		// copy client request headers to upstream request headers
 		forwardHeaders := func(key []byte, val []byte) {
@@ -166,7 +165,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
 		clientHeaders := &chain.Context.Request().Header
 		clientHeaders.VisitAll(forwardHeaders)
 	*/
-	log.Println("ir 4")

 	return req, nil
 }
@@ -184,18 +182,14 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
 	if chain.Request.URL.Scheme == "" {
 		return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
 	}
-	log.Println("A")

 	// Apply requestModifications to proxychain
 	for _, applyRequestModificationsTo := range chain.requestModifications {
-		log.Println("AA")
-		log.Println(applyRequestModificationsTo)
 		err := applyRequestModificationsTo(chain)
 		if err != nil {
 			return nil, chain.abort(err)
 		}
 	}
-	log.Println("B")

 	// Send Request Upstream
 	resp, err := chain.Client.Do(chain.Request)
@@ -203,7 +197,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
 		return nil, chain.abort(err)
 	}
 	chain.Response = resp
-	log.Println("C")

 	//defer resp.Body.Close()

@@ -220,7 +213,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
 			return nil, chain.abort(err)
 		}
 	}
-	log.Println("D")

 	return chain.Response.Body, nil
 }
@@ -231,15 +223,11 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
 // be returned to the client
 func (chain *ProxyChain) Execute() error {
 	defer chain._reset()
-	log.Println("1")
 	body, err := chain._execute()
-	log.Println("2")
 	if err != nil {
 		log.Println(err)
 		return err
 	}
-	log.Println("3")
-	log.Println(chain)
 	if chain.Context == nil {
 		return errors.New("no context set")
 	}
@@ -281,13 +269,11 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
 	if err != nil {
 		reqUrl = chain.Context.Params("*") // fallback
 	}
-	fmt.Println(reqUrl)

 	urlQuery, err := url.Parse(reqUrl)
 	if err != nil {
 		return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err)
 	}
-	fmt.Println(urlQuery)

 	// Handle standard paths
 	// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
@@ -327,6 +313,7 @@ func (chain *ProxyChain) SetFiberCtx(ctx *fiber.Ctx) *ProxyChain {
 		chain.abortErr = chain.abort(err)
 	}
 	chain.Request.URL = url
+	fmt.Printf("extracted URL: %s\n", chain.Request.URL)

 	return chain
 }
--- a/proxychain/proxychain_cache.go
+++ b/proxychain/proxychain_cache.go
@@ -1,19 +0,0 @@
-package proxychain
-
-import "time"
-
-// Cache provides an interface for caching mechanisms.
-// It supports operations to get, set, and invalidate cache entries.
-// Implementations should ensure thread safety, efficiency
-type Cache interface {
-	// Get Retrieves a cached value by its key. Returns the value and a boolean indicating
-	Get(key string) (value interface{}, found bool)
-
-	// Set - Stores a value associated with a key in the cache for a specified time-to-live (ttl).
-	// If ttl is zero, the cache item has no expiration.
-	Set(key string, value interface{}, ttl time.Duration)
-
-	// Invalidate - Removes a value from the cache by its key. If the key does not exist,
-	// it should perform a no-op or return a suitable error.
-	Invalidate(key string) error
-}
--- a/proxychain/responsemodifers/rewrite_http_resource_urls.go
+++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go
@@ -2,6 +2,7 @@ package responsemodifers

 import (
 	"bytes"
+	"fmt"
 	"io"
 	"ladder/proxychain"
 	"log"
@@ -11,8 +12,37 @@ import (
 	"golang.org/x/net/html"
 )

+// Define list of HTML attributes to try to rewrite
+var AttributesToRewrite map[string]bool
+
+func init() {
+	AttributesToRewrite = map[string]bool{
+		"src":  true,
+		"href": true,
+		/*
+			"action":      true,
+			"srcset":      true,
+			"poster":      true,
+			"data":        true,
+			"cite":        true,
+			"formaction":  true,
+			"background":  true,
+			"usemap":      true,
+			"longdesc":    true,
+			"manifest":    true,
+			"archive":     true,
+			"codebase":    true,
+			"icon":        true,
+			"pluginspage": true,
+		*/
+	}
+}
+
+// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
+// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
+// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
 type HTMLResourceURLRewriter struct {
-	proxyURL              *url.URL // proxyURL is the URL of the proxy, not the upstream URL; TODO: implement
+	baseURL               string // eg: https://proxiedsite.com  (note, no trailing '/')
 	tokenizer             *html.Tokenizer
 	currentToken          html.Token
 	tokenBuffer           *bytes.Buffer
@@ -20,17 +50,19 @@ type HTMLResourceURLRewriter struct {
 	currentTokenProcessed bool
 }

-func NewHTMLResourceURLRewriter(src io.ReadCloser, proxyURL *url.URL) *HTMLResourceURLRewriter {
-	log.Println("tokenize")
+// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
+// It initializes the tokenizer with the provided source and sets the proxy URL.
+func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter {
 	return &HTMLResourceURLRewriter{
 		tokenizer:         html.NewTokenizer(src),
 		currentToken:      html.Token{},
 		currentTokenIndex: 0,
 		tokenBuffer:       new(bytes.Buffer),
-		proxyURL:          proxyURL,
+		baseURL:           baseURL,
 	}
 }

+// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
 func (r *HTMLResourceURLRewriter) Close() error {
 	r.tokenBuffer.Reset()
 	r.currentToken = html.Token{}
@@ -39,6 +71,8 @@ func (r *HTMLResourceURLRewriter) Close() error {
 	return nil
 }

+// Read processes the HTML content, rewriting URLs and managing the state of tokens.
+// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
 func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {

 	if r.currentToken.Data == "" || r.currentTokenProcessed {
@@ -55,6 +89,13 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
 		// flush the current token into an internal buffer
 		// to handle fragmented tokens
 		r.currentToken = r.tokenizer.Token()
+
+		// patch tokens with URLs
+		isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
+		if isTokenWithAttribute {
+			patchResourceURL(&r.currentToken, r.baseURL)
+		}
+
 		r.tokenBuffer.Reset()
 		r.tokenBuffer.WriteString(r.currentToken.String())
 		r.currentTokenProcessed = false
@@ -62,44 +103,111 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
 	}

 	n, err := r.tokenBuffer.Read(p)
-
 	if err == io.EOF || r.tokenBuffer.Len() == 0 {
 		r.currentTokenProcessed = true
-		err = nil // Reset error to nil because EOF in this context is expected and not an actual error
+		err = nil // EOF in this context is expected and not an actual error
 	}
 	return n, err
-
 }

-// RewriteHTMLResourceURLs updates src/href attributes in HTML content to route through the proxy.
+func patchResourceURL(token *html.Token, baseURL string) {
+	for i := range token.Attr {
+		attr := &token.Attr[i]
+		// dont touch attributes except for the ones we defined
+		_, exists := AttributesToRewrite[attr.Key]
+		if !exists {
+			continue
+		}
+
+		isRelativePath := strings.HasPrefix(attr.Val, "/")
+		//log.Printf("PRE '%s'='%s'", attr.Key, attr.Val)
+
+		// double check if attribute is valid http URL before modifying
+		if isRelativePath {
+			_, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val))
+			if err != nil {
+				return
+			}
+		} else {
+			u, err := url.Parse(attr.Val)
+			if err != nil {
+				return
+			}
+			if !(u.Scheme == "http" || u.Scheme == "https") {
+				return
+			}
+		}
+
+		// patch relative paths
+		// <img src="/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
+		if isRelativePath {
+			log.Printf("BASEURL patch:  %s\n", baseURL)
+
+			attr.Val = fmt.Sprintf(
+				"/%s/%s",
+				baseURL,
+				//url.QueryEscape(
+				strings.TrimPrefix(attr.Val, "/"),
+				//),
+			)
+
+			log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+			continue
+		}
+
+		// patch absolute paths to relative path pointing to ladder proxy
+		// <img src="http://images.cdn.proxiedsite.com/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
+
+		//log.Printf("abolute patch:  %s\n", attr.Val)
+		attr.Val = fmt.Sprintf(
+			"/%s",
+			//url.QueryEscape(attr.Val),
+			//url.QueryEscape(
+			strings.TrimPrefix(attr.Val, "/"),
+			//),
+			//attr.Val,
+		)
+		log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+	}
+}
+
+// RewriteHTMLResourceURLs modifies HTTP responses
+// to rewrite URLs attributes in HTML content (such as src, href)
+//   - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
+//   - This function is designed to allow the proxified page
+//     to still be browsible by routing all resource URLs through the proxy.
+//
+// ---
+//
+//   - It works by replacing the io.ReadCloser of the http.Response.Body
+//     with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
+//
+//   - This process can be done multiple times, so that the response will
+//     be streamed and modified through each pass without buffering the entire response in memory.
+//
+//   - HTMLResourceRewriter reads the http.Response.Body stream,
+//     parsing each HTML token one at a time and replacing attribute tags.
+//
+//   - When ProxyChain.Execute() is called, the response body will be read from the server
+//     and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
+//     without ever buffering the entire HTTP response in memory.
 func RewriteHTMLResourceURLs() proxychain.ResponseModification {
 	return func(chain *proxychain.ProxyChain) error {
-		log.Println("rhru")
+		// return early if it's not HTML
 		ct := chain.Response.Header.Get("content-type")
-		log.Println(ct)
 		if !strings.HasPrefix(ct, "text/html") {
 			return nil
 		}
-		log.Println("rhru2")
-		// chain.Response.Body is an unread http.Response.Body
-		chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
+
+		// should be site being requested to proxy
+		baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host)
+		/*
+			log.Println("--------------------")
+			log.Println(baseUrl)
+			log.Println("--------------------")
+		*/
+
+		chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl)
 		return nil
 	}
 }
-
-func rewriteToken(token *html.Token, baseURL *url.URL) {
-	log.Println(token.String())
-	attrsToRewrite := map[string]bool{"href": true, "src": true, "action": true, "srcset": true}
-	for i := range token.Attr {
-		attr := &token.Attr[i]
-		if attrsToRewrite[attr.Key] {
-			attr.Val = "/" + attr.Val
-		}
-		/*
-			if attrsToRewrite[attr.Key] && strings.HasPrefix(attr.Val, "/") {
-				// Make URL absolute
-				attr.Val = "/https://" + baseURL.Host + attr.Val
-			}
-		*/
-	}
-}
--- a/proxychain/strategy/cloudflare.go
+++ b/proxychain/strategy/cloudflare.go
@@ -1,8 +0,0 @@
-package strategy
-
-/*
-var Cloudflare = proxy.Strategy{
-    tactic.NoCookie(),
-    // ... other tactics ...
-}
-*/