diff --git a/proxychain/cache/memcache.go b/proxychain/cache/memcache.go
deleted file mode 100644
index e69de29..0000000
diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go
index a6f5948..319213b 100644
--- a/proxychain/proxychain.go
+++ b/proxychain/proxychain.go
@@ -22,7 +22,8 @@ applying request and response modifications along the way.
request modifiers (ReqMods) and response modifiers (ResMods) before passing the
upstream response back to the client.
- - ProxyChains can be reused to avoid memory allocations.
+ - ProxyChains can be reused to avoid memory allocations. However, they are not concurrent-safe
+ so a ProxyChainPool should be used with mutexes to avoid memory errors.
---
@@ -48,6 +49,7 @@ proxychain.NewProxyChain().
).
SetResultModifications(
tx.BlockIncomingCookies(),
+ tx.RewriteHTMLResourceURLs()
).
Execute()
@@ -130,7 +132,6 @@ func (chain *ProxyChain) AddRuleset(rs *ruleset.RuleSet) *ProxyChain {
}
func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
- log.Println("ir 1")
if chain.Context == nil {
chain.abortErr = chain.abort(errors.New("no context set"))
return nil, chain.abortErr
@@ -140,7 +141,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
if err != nil {
return nil, err
}
- log.Println("ir 2")
chain.Request = req
switch chain.Context.Method() {
case "GET":
@@ -157,7 +157,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
return nil, fmt.Errorf("unsupported request method from client: '%s'", chain.Context.Method())
}
- log.Println("ir 3")
/*
// copy client request headers to upstream request headers
forwardHeaders := func(key []byte, val []byte) {
@@ -166,7 +165,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
clientHeaders := &chain.Context.Request().Header
clientHeaders.VisitAll(forwardHeaders)
*/
- log.Println("ir 4")
return req, nil
}
@@ -184,18 +182,14 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
if chain.Request.URL.Scheme == "" {
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
}
- log.Println("A")
// Apply requestModifications to proxychain
for _, applyRequestModificationsTo := range chain.requestModifications {
- log.Println("AA")
- log.Println(applyRequestModificationsTo)
err := applyRequestModificationsTo(chain)
if err != nil {
return nil, chain.abort(err)
}
}
- log.Println("B")
// Send Request Upstream
resp, err := chain.Client.Do(chain.Request)
@@ -203,7 +197,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
return nil, chain.abort(err)
}
chain.Response = resp
- log.Println("C")
//defer resp.Body.Close()
@@ -220,7 +213,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
return nil, chain.abort(err)
}
}
- log.Println("D")
return chain.Response.Body, nil
}
@@ -231,15 +223,11 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
// be returned to the client
func (chain *ProxyChain) Execute() error {
defer chain._reset()
- log.Println("1")
body, err := chain._execute()
- log.Println("2")
if err != nil {
log.Println(err)
return err
}
- log.Println("3")
- log.Println(chain)
if chain.Context == nil {
return errors.New("no context set")
}
@@ -281,13 +269,11 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
if err != nil {
reqUrl = chain.Context.Params("*") // fallback
}
- fmt.Println(reqUrl)
urlQuery, err := url.Parse(reqUrl)
if err != nil {
return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err)
}
- fmt.Println(urlQuery)
// Handle standard paths
// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
@@ -327,6 +313,7 @@ func (chain *ProxyChain) SetFiberCtx(ctx *fiber.Ctx) *ProxyChain {
chain.abortErr = chain.abort(err)
}
chain.Request.URL = url
+ fmt.Printf("extracted URL: %s\n", chain.Request.URL)
return chain
}
diff --git a/proxychain/proxychain_cache.go b/proxychain/proxychain_cache.go
deleted file mode 100644
index 44289af..0000000
--- a/proxychain/proxychain_cache.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package proxychain
-
-import "time"
-
-// Cache provides an interface for caching mechanisms.
-// It supports operations to get, set, and invalidate cache entries.
-// Implementations should ensure thread safety, efficiency
-type Cache interface {
- // Get Retrieves a cached value by its key. Returns the value and a boolean indicating
- Get(key string) (value interface{}, found bool)
-
- // Set - Stores a value associated with a key in the cache for a specified time-to-live (ttl).
- // If ttl is zero, the cache item has no expiration.
- Set(key string, value interface{}, ttl time.Duration)
-
- // Invalidate - Removes a value from the cache by its key. If the key does not exist,
- // it should perform a no-op or return a suitable error.
- Invalidate(key string) error
-}
diff --git a/proxychain/responsemodifers/rewrite_http_resource_urls.go b/proxychain/responsemodifers/rewrite_http_resource_urls.go
index 1e051a2..cdc92b1 100644
--- a/proxychain/responsemodifers/rewrite_http_resource_urls.go
+++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go
@@ -2,6 +2,7 @@ package responsemodifers
import (
"bytes"
+ "fmt"
"io"
"ladder/proxychain"
"log"
@@ -11,8 +12,37 @@ import (
"golang.org/x/net/html"
)
+// Define list of HTML attributes to try to rewrite
+var AttributesToRewrite map[string]bool
+
+func init() {
+ AttributesToRewrite = map[string]bool{
+ "src": true,
+ "href": true,
+ /*
+ "action": true,
+ "srcset": true,
+ "poster": true,
+ "data": true,
+ "cite": true,
+ "formaction": true,
+ "background": true,
+ "usemap": true,
+ "longdesc": true,
+ "manifest": true,
+ "archive": true,
+ "codebase": true,
+ "icon": true,
+ "pluginspage": true,
+ */
+ }
+}
+
+// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
+// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
+//
->
type HTMLResourceURLRewriter struct {
- proxyURL *url.URL // proxyURL is the URL of the proxy, not the upstream URL; TODO: implement
+ baseURL string // eg: https://proxiedsite.com (note, no trailing '/')
tokenizer *html.Tokenizer
currentToken html.Token
tokenBuffer *bytes.Buffer
@@ -20,17 +50,19 @@ type HTMLResourceURLRewriter struct {
currentTokenProcessed bool
}
-func NewHTMLResourceURLRewriter(src io.ReadCloser, proxyURL *url.URL) *HTMLResourceURLRewriter {
- log.Println("tokenize")
+// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
+// It initializes the tokenizer with the provided source and sets the proxy URL.
+func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: html.Token{},
currentTokenIndex: 0,
tokenBuffer: new(bytes.Buffer),
- proxyURL: proxyURL,
+ baseURL: baseURL,
}
}
+// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
func (r *HTMLResourceURLRewriter) Close() error {
r.tokenBuffer.Reset()
r.currentToken = html.Token{}
@@ -39,6 +71,8 @@ func (r *HTMLResourceURLRewriter) Close() error {
return nil
}
+// Read processes the HTML content, rewriting URLs and managing the state of tokens.
+// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
if r.currentToken.Data == "" || r.currentTokenProcessed {
@@ -55,6 +89,13 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
// flush the current token into an internal buffer
// to handle fragmented tokens
r.currentToken = r.tokenizer.Token()
+
+ // patch tokens with URLs
+ isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
+ if isTokenWithAttribute {
+ patchResourceURL(&r.currentToken, r.baseURL)
+ }
+
r.tokenBuffer.Reset()
r.tokenBuffer.WriteString(r.currentToken.String())
r.currentTokenProcessed = false
@@ -62,44 +103,111 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
}
n, err := r.tokenBuffer.Read(p)
-
if err == io.EOF || r.tokenBuffer.Len() == 0 {
r.currentTokenProcessed = true
- err = nil // Reset error to nil because EOF in this context is expected and not an actual error
+ err = nil // EOF in this context is expected and not an actual error
}
return n, err
-
}
-// RewriteHTMLResourceURLs updates src/href attributes in HTML content to route through the proxy.
+func patchResourceURL(token *html.Token, baseURL string) {
+ for i := range token.Attr {
+ attr := &token.Attr[i]
+ // dont touch attributes except for the ones we defined
+ _, exists := AttributesToRewrite[attr.Key]
+ if !exists {
+ continue
+ }
+
+ isRelativePath := strings.HasPrefix(attr.Val, "/")
+ //log.Printf("PRE '%s'='%s'", attr.Key, attr.Val)
+
+ // double check if attribute is valid http URL before modifying
+ if isRelativePath {
+ _, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val))
+ if err != nil {
+ return
+ }
+ } else {
+ u, err := url.Parse(attr.Val)
+ if err != nil {
+ return
+ }
+ if !(u.Scheme == "http" || u.Scheme == "https") {
+ return
+ }
+ }
+
+ // patch relative paths
+ //
->
+ if isRelativePath {
+ log.Printf("BASEURL patch: %s\n", baseURL)
+
+ attr.Val = fmt.Sprintf(
+ "/%s/%s",
+ baseURL,
+ //url.QueryEscape(
+ strings.TrimPrefix(attr.Val, "/"),
+ //),
+ )
+
+ log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+ continue
+ }
+
+ // patch absolute paths to relative path pointing to ladder proxy
+ //
->
+
+ //log.Printf("abolute patch: %s\n", attr.Val)
+ attr.Val = fmt.Sprintf(
+ "/%s",
+ //url.QueryEscape(attr.Val),
+ //url.QueryEscape(
+ strings.TrimPrefix(attr.Val, "/"),
+ //),
+ //attr.Val,
+ )
+ log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
+ }
+}
+
+// RewriteHTMLResourceURLs modifies HTTP responses
+// to rewrite URLs attributes in HTML content (such as src, href)
+// - `
` -> `
`
+// - This function is designed to allow the proxified page
+// to still be browsible by routing all resource URLs through the proxy.
+//
+// ---
+//
+// - It works by replacing the io.ReadCloser of the http.Response.Body
+// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
+//
+// - This process can be done multiple times, so that the response will
+// be streamed and modified through each pass without buffering the entire response in memory.
+//
+// - HTMLResourceRewriter reads the http.Response.Body stream,
+// parsing each HTML token one at a time and replacing attribute tags.
+//
+// - When ProxyChain.Execute() is called, the response body will be read from the server
+// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
+// without ever buffering the entire HTTP response in memory.
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
- log.Println("rhru")
+ // return early if it's not HTML
ct := chain.Response.Header.Get("content-type")
- log.Println(ct)
if !strings.HasPrefix(ct, "text/html") {
return nil
}
- log.Println("rhru2")
- // chain.Response.Body is an unread http.Response.Body
- chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
+
+ // should be site being requested to proxy
+ baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host)
+ /*
+ log.Println("--------------------")
+ log.Println(baseUrl)
+ log.Println("--------------------")
+ */
+
+ chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl)
return nil
}
}
-
-func rewriteToken(token *html.Token, baseURL *url.URL) {
- log.Println(token.String())
- attrsToRewrite := map[string]bool{"href": true, "src": true, "action": true, "srcset": true}
- for i := range token.Attr {
- attr := &token.Attr[i]
- if attrsToRewrite[attr.Key] {
- attr.Val = "/" + attr.Val
- }
- /*
- if attrsToRewrite[attr.Key] && strings.HasPrefix(attr.Val, "/") {
- // Make URL absolute
- attr.Val = "/https://" + baseURL.Host + attr.Val
- }
- */
- }
-}
diff --git a/proxychain/strategy/cloudflare.go b/proxychain/strategy/cloudflare.go
deleted file mode 100644
index 6387736..0000000
--- a/proxychain/strategy/cloudflare.go
+++ /dev/null
@@ -1,8 +0,0 @@
-package strategy
-
-/*
-var Cloudflare = proxy.Strategy{
- tactic.NoCookie(),
- // ... other tactics ...
-}
-*/