diff --git a/proxychain/cache/memcache.go b/proxychain/cache/memcache.go deleted file mode 100644 index e69de29..0000000 diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index a6f5948..319213b 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -22,7 +22,8 @@ applying request and response modifications along the way. request modifiers (ReqMods) and response modifiers (ResMods) before passing the upstream response back to the client. - - ProxyChains can be reused to avoid memory allocations. + - ProxyChains can be reused to avoid memory allocations. However, they are not concurrent-safe + so a ProxyChainPool should be used with mutexes to avoid memory errors. --- @@ -48,6 +49,7 @@ proxychain.NewProxyChain(). ). SetResultModifications( tx.BlockIncomingCookies(), + tx.RewriteHTMLResourceURLs() ). Execute() @@ -130,7 +132,6 @@ func (chain *ProxyChain) AddRuleset(rs *ruleset.RuleSet) *ProxyChain { } func (chain *ProxyChain) _initialize_request() (*http.Request, error) { - log.Println("ir 1") if chain.Context == nil { chain.abortErr = chain.abort(errors.New("no context set")) return nil, chain.abortErr @@ -140,7 +141,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) { if err != nil { return nil, err } - log.Println("ir 2") chain.Request = req switch chain.Context.Method() { case "GET": @@ -157,7 +157,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) { return nil, fmt.Errorf("unsupported request method from client: '%s'", chain.Context.Method()) } - log.Println("ir 3") /* // copy client request headers to upstream request headers forwardHeaders := func(key []byte, val []byte) { @@ -166,7 +165,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) { clientHeaders := &chain.Context.Request().Header clientHeaders.VisitAll(forwardHeaders) */ - log.Println("ir 4") return req, nil } @@ -184,18 +182,14 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { if chain.Request.URL.Scheme == "" { return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues") } - log.Println("A") // Apply requestModifications to proxychain for _, applyRequestModificationsTo := range chain.requestModifications { - log.Println("AA") - log.Println(applyRequestModificationsTo) err := applyRequestModificationsTo(chain) if err != nil { return nil, chain.abort(err) } } - log.Println("B") // Send Request Upstream resp, err := chain.Client.Do(chain.Request) @@ -203,7 +197,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { return nil, chain.abort(err) } chain.Response = resp - log.Println("C") //defer resp.Body.Close() @@ -220,7 +213,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { return nil, chain.abort(err) } } - log.Println("D") return chain.Response.Body, nil } @@ -231,15 +223,11 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { // be returned to the client func (chain *ProxyChain) Execute() error { defer chain._reset() - log.Println("1") body, err := chain._execute() - log.Println("2") if err != nil { log.Println(err) return err } - log.Println("3") - log.Println(chain) if chain.Context == nil { return errors.New("no context set") } @@ -281,13 +269,11 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) { if err != nil { reqUrl = chain.Context.Params("*") // fallback } - fmt.Println(reqUrl) urlQuery, err := url.Parse(reqUrl) if err != nil { return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err) } - fmt.Println(urlQuery) // Handle standard paths // eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg @@ -327,6 +313,7 @@ func (chain *ProxyChain) SetFiberCtx(ctx *fiber.Ctx) *ProxyChain { chain.abortErr = chain.abort(err) } chain.Request.URL = url + fmt.Printf("extracted URL: %s\n", chain.Request.URL) return chain } diff --git a/proxychain/proxychain_cache.go b/proxychain/proxychain_cache.go deleted file mode 100644 index 44289af..0000000 --- a/proxychain/proxychain_cache.go +++ /dev/null @@ -1,19 +0,0 @@ -package proxychain - -import "time" - -// Cache provides an interface for caching mechanisms. -// It supports operations to get, set, and invalidate cache entries. -// Implementations should ensure thread safety, efficiency -type Cache interface { - // Get Retrieves a cached value by its key. Returns the value and a boolean indicating - Get(key string) (value interface{}, found bool) - - // Set - Stores a value associated with a key in the cache for a specified time-to-live (ttl). - // If ttl is zero, the cache item has no expiration. - Set(key string, value interface{}, ttl time.Duration) - - // Invalidate - Removes a value from the cache by its key. If the key does not exist, - // it should perform a no-op or return a suitable error. - Invalidate(key string) error -} diff --git a/proxychain/responsemodifers/rewrite_http_resource_urls.go b/proxychain/responsemodifers/rewrite_http_resource_urls.go index 1e051a2..cdc92b1 100644 --- a/proxychain/responsemodifers/rewrite_http_resource_urls.go +++ b/proxychain/responsemodifers/rewrite_http_resource_urls.go @@ -2,6 +2,7 @@ package responsemodifers import ( "bytes" + "fmt" "io" "ladder/proxychain" "log" @@ -11,8 +12,37 @@ import ( "golang.org/x/net/html" ) +// Define list of HTML attributes to try to rewrite +var AttributesToRewrite map[string]bool + +func init() { + AttributesToRewrite = map[string]bool{ + "src": true, + "href": true, + /* + "action": true, + "srcset": true, + "poster": true, + "data": true, + "cite": true, + "formaction": true, + "background": true, + "usemap": true, + "longdesc": true, + "manifest": true, + "archive": true, + "codebase": true, + "icon": true, + "pluginspage": true, + */ + } +} + +// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL. +// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes. +// -> type HTMLResourceURLRewriter struct { - proxyURL *url.URL // proxyURL is the URL of the proxy, not the upstream URL; TODO: implement + baseURL string // eg: https://proxiedsite.com (note, no trailing '/') tokenizer *html.Tokenizer currentToken html.Token tokenBuffer *bytes.Buffer @@ -20,17 +50,19 @@ type HTMLResourceURLRewriter struct { currentTokenProcessed bool } -func NewHTMLResourceURLRewriter(src io.ReadCloser, proxyURL *url.URL) *HTMLResourceURLRewriter { - log.Println("tokenize") +// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter. +// It initializes the tokenizer with the provided source and sets the proxy URL. +func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter { return &HTMLResourceURLRewriter{ tokenizer: html.NewTokenizer(src), currentToken: html.Token{}, currentTokenIndex: 0, tokenBuffer: new(bytes.Buffer), - proxyURL: proxyURL, + baseURL: baseURL, } } +// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data. func (r *HTMLResourceURLRewriter) Close() error { r.tokenBuffer.Reset() r.currentToken = html.Token{} @@ -39,6 +71,8 @@ func (r *HTMLResourceURLRewriter) Close() error { return nil } +// Read processes the HTML content, rewriting URLs and managing the state of tokens. +// It reads HTML content, token by token, rewriting URLs to route through the specified proxy. func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { if r.currentToken.Data == "" || r.currentTokenProcessed { @@ -55,6 +89,13 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { // flush the current token into an internal buffer // to handle fragmented tokens r.currentToken = r.tokenizer.Token() + + // patch tokens with URLs + isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken + if isTokenWithAttribute { + patchResourceURL(&r.currentToken, r.baseURL) + } + r.tokenBuffer.Reset() r.tokenBuffer.WriteString(r.currentToken.String()) r.currentTokenProcessed = false @@ -62,44 +103,111 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { } n, err := r.tokenBuffer.Read(p) - if err == io.EOF || r.tokenBuffer.Len() == 0 { r.currentTokenProcessed = true - err = nil // Reset error to nil because EOF in this context is expected and not an actual error + err = nil // EOF in this context is expected and not an actual error } return n, err - } -// RewriteHTMLResourceURLs updates src/href attributes in HTML content to route through the proxy. +func patchResourceURL(token *html.Token, baseURL string) { + for i := range token.Attr { + attr := &token.Attr[i] + // dont touch attributes except for the ones we defined + _, exists := AttributesToRewrite[attr.Key] + if !exists { + continue + } + + isRelativePath := strings.HasPrefix(attr.Val, "/") + //log.Printf("PRE '%s'='%s'", attr.Key, attr.Val) + + // double check if attribute is valid http URL before modifying + if isRelativePath { + _, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val)) + if err != nil { + return + } + } else { + u, err := url.Parse(attr.Val) + if err != nil { + return + } + if !(u.Scheme == "http" || u.Scheme == "https") { + return + } + } + + // patch relative paths + // -> + if isRelativePath { + log.Printf("BASEURL patch: %s\n", baseURL) + + attr.Val = fmt.Sprintf( + "/%s/%s", + baseURL, + //url.QueryEscape( + strings.TrimPrefix(attr.Val, "/"), + //), + ) + + log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val) + continue + } + + // patch absolute paths to relative path pointing to ladder proxy + // -> + + //log.Printf("abolute patch: %s\n", attr.Val) + attr.Val = fmt.Sprintf( + "/%s", + //url.QueryEscape(attr.Val), + //url.QueryEscape( + strings.TrimPrefix(attr.Val, "/"), + //), + //attr.Val, + ) + log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val) + } +} + +// RewriteHTMLResourceURLs modifies HTTP responses +// to rewrite URLs attributes in HTML content (such as src, href) +// - `` -> `` +// - This function is designed to allow the proxified page +// to still be browsible by routing all resource URLs through the proxy. +// +// --- +// +// - It works by replacing the io.ReadCloser of the http.Response.Body +// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one. +// +// - This process can be done multiple times, so that the response will +// be streamed and modified through each pass without buffering the entire response in memory. +// +// - HTMLResourceRewriter reads the http.Response.Body stream, +// parsing each HTML token one at a time and replacing attribute tags. +// +// - When ProxyChain.Execute() is called, the response body will be read from the server +// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body +// without ever buffering the entire HTTP response in memory. func RewriteHTMLResourceURLs() proxychain.ResponseModification { return func(chain *proxychain.ProxyChain) error { - log.Println("rhru") + // return early if it's not HTML ct := chain.Response.Header.Get("content-type") - log.Println(ct) if !strings.HasPrefix(ct, "text/html") { return nil } - log.Println("rhru2") - // chain.Response.Body is an unread http.Response.Body - chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL) + + // should be site being requested to proxy + baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host) + /* + log.Println("--------------------") + log.Println(baseUrl) + log.Println("--------------------") + */ + + chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl) return nil } } - -func rewriteToken(token *html.Token, baseURL *url.URL) { - log.Println(token.String()) - attrsToRewrite := map[string]bool{"href": true, "src": true, "action": true, "srcset": true} - for i := range token.Attr { - attr := &token.Attr[i] - if attrsToRewrite[attr.Key] { - attr.Val = "/" + attr.Val - } - /* - if attrsToRewrite[attr.Key] && strings.HasPrefix(attr.Val, "/") { - // Make URL absolute - attr.Val = "/https://" + baseURL.Host + attr.Val - } - */ - } -} diff --git a/proxychain/strategy/cloudflare.go b/proxychain/strategy/cloudflare.go deleted file mode 100644 index 6387736..0000000 --- a/proxychain/strategy/cloudflare.go +++ /dev/null @@ -1,8 +0,0 @@ -package strategy - -/* -var Cloudflare = proxy.Strategy{ - tactic.NoCookie(), - // ... other tactics ... -} -*/