This commit is contained in:
Kevin Pham
2023-11-20 15:37:07 -06:00
parent 1d88f14de2
commit 5d46adc486
5 changed files with 142 additions and 74 deletions

View File

View File

@@ -22,7 +22,8 @@ applying request and response modifications along the way.
request modifiers (ReqMods) and response modifiers (ResMods) before passing the request modifiers (ReqMods) and response modifiers (ResMods) before passing the
upstream response back to the client. upstream response back to the client.
- ProxyChains can be reused to avoid memory allocations. - ProxyChains can be reused to avoid memory allocations. However, they are not concurrent-safe
so a ProxyChainPool should be used with mutexes to avoid memory errors.
--- ---
@@ -48,6 +49,7 @@ proxychain.NewProxyChain().
). ).
SetResultModifications( SetResultModifications(
tx.BlockIncomingCookies(), tx.BlockIncomingCookies(),
tx.RewriteHTMLResourceURLs()
). ).
Execute() Execute()
@@ -130,7 +132,6 @@ func (chain *ProxyChain) AddRuleset(rs *ruleset.RuleSet) *ProxyChain {
} }
func (chain *ProxyChain) _initialize_request() (*http.Request, error) { func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
log.Println("ir 1")
if chain.Context == nil { if chain.Context == nil {
chain.abortErr = chain.abort(errors.New("no context set")) chain.abortErr = chain.abort(errors.New("no context set"))
return nil, chain.abortErr return nil, chain.abortErr
@@ -140,7 +141,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
log.Println("ir 2")
chain.Request = req chain.Request = req
switch chain.Context.Method() { switch chain.Context.Method() {
case "GET": case "GET":
@@ -157,7 +157,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
return nil, fmt.Errorf("unsupported request method from client: '%s'", chain.Context.Method()) return nil, fmt.Errorf("unsupported request method from client: '%s'", chain.Context.Method())
} }
log.Println("ir 3")
/* /*
// copy client request headers to upstream request headers // copy client request headers to upstream request headers
forwardHeaders := func(key []byte, val []byte) { forwardHeaders := func(key []byte, val []byte) {
@@ -166,7 +165,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
clientHeaders := &chain.Context.Request().Header clientHeaders := &chain.Context.Request().Header
clientHeaders.VisitAll(forwardHeaders) clientHeaders.VisitAll(forwardHeaders)
*/ */
log.Println("ir 4")
return req, nil return req, nil
} }
@@ -184,18 +182,14 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
if chain.Request.URL.Scheme == "" { if chain.Request.URL.Scheme == "" {
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues") return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
} }
log.Println("A")
// Apply requestModifications to proxychain // Apply requestModifications to proxychain
for _, applyRequestModificationsTo := range chain.requestModifications { for _, applyRequestModificationsTo := range chain.requestModifications {
log.Println("AA")
log.Println(applyRequestModificationsTo)
err := applyRequestModificationsTo(chain) err := applyRequestModificationsTo(chain)
if err != nil { if err != nil {
return nil, chain.abort(err) return nil, chain.abort(err)
} }
} }
log.Println("B")
// Send Request Upstream // Send Request Upstream
resp, err := chain.Client.Do(chain.Request) resp, err := chain.Client.Do(chain.Request)
@@ -203,7 +197,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
return nil, chain.abort(err) return nil, chain.abort(err)
} }
chain.Response = resp chain.Response = resp
log.Println("C")
//defer resp.Body.Close() //defer resp.Body.Close()
@@ -220,7 +213,6 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
return nil, chain.abort(err) return nil, chain.abort(err)
} }
} }
log.Println("D")
return chain.Response.Body, nil return chain.Response.Body, nil
} }
@@ -231,15 +223,11 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
// be returned to the client // be returned to the client
func (chain *ProxyChain) Execute() error { func (chain *ProxyChain) Execute() error {
defer chain._reset() defer chain._reset()
log.Println("1")
body, err := chain._execute() body, err := chain._execute()
log.Println("2")
if err != nil { if err != nil {
log.Println(err) log.Println(err)
return err return err
} }
log.Println("3")
log.Println(chain)
if chain.Context == nil { if chain.Context == nil {
return errors.New("no context set") return errors.New("no context set")
} }
@@ -281,13 +269,11 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
if err != nil { if err != nil {
reqUrl = chain.Context.Params("*") // fallback reqUrl = chain.Context.Params("*") // fallback
} }
fmt.Println(reqUrl)
urlQuery, err := url.Parse(reqUrl) urlQuery, err := url.Parse(reqUrl)
if err != nil { if err != nil {
return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err) return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err)
} }
fmt.Println(urlQuery)
// Handle standard paths // Handle standard paths
// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg // eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
@@ -327,6 +313,7 @@ func (chain *ProxyChain) SetFiberCtx(ctx *fiber.Ctx) *ProxyChain {
chain.abortErr = chain.abort(err) chain.abortErr = chain.abort(err)
} }
chain.Request.URL = url chain.Request.URL = url
fmt.Printf("extracted URL: %s\n", chain.Request.URL)
return chain return chain
} }

View File

@@ -1,19 +0,0 @@
package proxychain
import "time"
// Cache provides an interface for caching mechanisms.
// It supports operations to get, set, and invalidate cache entries.
// Implementations should ensure thread safety, efficiency
type Cache interface {
// Get Retrieves a cached value by its key. Returns the value and a boolean indicating
Get(key string) (value interface{}, found bool)
// Set - Stores a value associated with a key in the cache for a specified time-to-live (ttl).
// If ttl is zero, the cache item has no expiration.
Set(key string, value interface{}, ttl time.Duration)
// Invalidate - Removes a value from the cache by its key. If the key does not exist,
// it should perform a no-op or return a suitable error.
Invalidate(key string) error
}

View File

@@ -2,6 +2,7 @@ package responsemodifers
import ( import (
"bytes" "bytes"
"fmt"
"io" "io"
"ladder/proxychain" "ladder/proxychain"
"log" "log"
@@ -11,8 +12,37 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
// Define list of HTML attributes to try to rewrite
var AttributesToRewrite map[string]bool
func init() {
AttributesToRewrite = map[string]bool{
"src": true,
"href": true,
/*
"action": true,
"srcset": true,
"poster": true,
"data": true,
"cite": true,
"formaction": true,
"background": true,
"usemap": true,
"longdesc": true,
"manifest": true,
"archive": true,
"codebase": true,
"icon": true,
"pluginspage": true,
*/
}
}
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
type HTMLResourceURLRewriter struct { type HTMLResourceURLRewriter struct {
proxyURL *url.URL // proxyURL is the URL of the proxy, not the upstream URL; TODO: implement baseURL string // eg: https://proxiedsite.com (note, no trailing '/')
tokenizer *html.Tokenizer tokenizer *html.Tokenizer
currentToken html.Token currentToken html.Token
tokenBuffer *bytes.Buffer tokenBuffer *bytes.Buffer
@@ -20,17 +50,19 @@ type HTMLResourceURLRewriter struct {
currentTokenProcessed bool currentTokenProcessed bool
} }
func NewHTMLResourceURLRewriter(src io.ReadCloser, proxyURL *url.URL) *HTMLResourceURLRewriter { // NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
log.Println("tokenize") // It initializes the tokenizer with the provided source and sets the proxy URL.
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL string) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{ return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src), tokenizer: html.NewTokenizer(src),
currentToken: html.Token{}, currentToken: html.Token{},
currentTokenIndex: 0, currentTokenIndex: 0,
tokenBuffer: new(bytes.Buffer), tokenBuffer: new(bytes.Buffer),
proxyURL: proxyURL, baseURL: baseURL,
} }
} }
// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
func (r *HTMLResourceURLRewriter) Close() error { func (r *HTMLResourceURLRewriter) Close() error {
r.tokenBuffer.Reset() r.tokenBuffer.Reset()
r.currentToken = html.Token{} r.currentToken = html.Token{}
@@ -39,6 +71,8 @@ func (r *HTMLResourceURLRewriter) Close() error {
return nil return nil
} }
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) { func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
if r.currentToken.Data == "" || r.currentTokenProcessed { if r.currentToken.Data == "" || r.currentTokenProcessed {
@@ -55,6 +89,13 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
// flush the current token into an internal buffer // flush the current token into an internal buffer
// to handle fragmented tokens // to handle fragmented tokens
r.currentToken = r.tokenizer.Token() r.currentToken = r.tokenizer.Token()
// patch tokens with URLs
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
if isTokenWithAttribute {
patchResourceURL(&r.currentToken, r.baseURL)
}
r.tokenBuffer.Reset() r.tokenBuffer.Reset()
r.tokenBuffer.WriteString(r.currentToken.String()) r.tokenBuffer.WriteString(r.currentToken.String())
r.currentTokenProcessed = false r.currentTokenProcessed = false
@@ -62,44 +103,111 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
} }
n, err := r.tokenBuffer.Read(p) n, err := r.tokenBuffer.Read(p)
if err == io.EOF || r.tokenBuffer.Len() == 0 { if err == io.EOF || r.tokenBuffer.Len() == 0 {
r.currentTokenProcessed = true r.currentTokenProcessed = true
err = nil // Reset error to nil because EOF in this context is expected and not an actual error err = nil // EOF in this context is expected and not an actual error
} }
return n, err return n, err
} }
// RewriteHTMLResourceURLs updates src/href attributes in HTML content to route through the proxy. func patchResourceURL(token *html.Token, baseURL string) {
for i := range token.Attr {
attr := &token.Attr[i]
// dont touch attributes except for the ones we defined
_, exists := AttributesToRewrite[attr.Key]
if !exists {
continue
}
isRelativePath := strings.HasPrefix(attr.Val, "/")
//log.Printf("PRE '%s'='%s'", attr.Key, attr.Val)
// double check if attribute is valid http URL before modifying
if isRelativePath {
_, err := url.Parse(fmt.Sprintf("http://localhost%s", attr.Val))
if err != nil {
return
}
} else {
u, err := url.Parse(attr.Val)
if err != nil {
return
}
if !(u.Scheme == "http" || u.Scheme == "https") {
return
}
}
// patch relative paths
// <img src="/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
if isRelativePath {
log.Printf("BASEURL patch: %s\n", baseURL)
attr.Val = fmt.Sprintf(
"/%s/%s",
baseURL,
//url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
//),
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
continue
}
// patch absolute paths to relative path pointing to ladder proxy
// <img src="http://images.cdn.proxiedsite.com/favicon.png"> -> <img src="/http://images.cdn.proxiedsite.com/favicon.png">
//log.Printf("abolute patch: %s\n", attr.Val)
attr.Val = fmt.Sprintf(
"/%s",
//url.QueryEscape(attr.Val),
//url.QueryEscape(
strings.TrimPrefix(attr.Val, "/"),
//),
//attr.Val,
)
log.Printf("url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
}
// RewriteHTMLResourceURLs modifies HTTP responses
// to rewrite URLs attributes in HTML content (such as src, href)
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
// - This function is designed to allow the proxified page
// to still be browsible by routing all resource URLs through the proxy.
//
// ---
//
// - It works by replacing the io.ReadCloser of the http.Response.Body
// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
//
// - This process can be done multiple times, so that the response will
// be streamed and modified through each pass without buffering the entire response in memory.
//
// - HTMLResourceRewriter reads the http.Response.Body stream,
// parsing each HTML token one at a time and replacing attribute tags.
//
// - When ProxyChain.Execute() is called, the response body will be read from the server
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
// without ever buffering the entire HTTP response in memory.
func RewriteHTMLResourceURLs() proxychain.ResponseModification { func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error { return func(chain *proxychain.ProxyChain) error {
log.Println("rhru") // return early if it's not HTML
ct := chain.Response.Header.Get("content-type") ct := chain.Response.Header.Get("content-type")
log.Println(ct)
if !strings.HasPrefix(ct, "text/html") { if !strings.HasPrefix(ct, "text/html") {
return nil return nil
} }
log.Println("rhru2")
// chain.Response.Body is an unread http.Response.Body // should be site being requested to proxy
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL) baseUrl := fmt.Sprintf("%s://%s", chain.Request.URL.Scheme, chain.Request.URL.Host)
/*
log.Println("--------------------")
log.Println(baseUrl)
log.Println("--------------------")
*/
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, baseUrl)
return nil return nil
} }
} }
func rewriteToken(token *html.Token, baseURL *url.URL) {
log.Println(token.String())
attrsToRewrite := map[string]bool{"href": true, "src": true, "action": true, "srcset": true}
for i := range token.Attr {
attr := &token.Attr[i]
if attrsToRewrite[attr.Key] {
attr.Val = "/" + attr.Val
}
/*
if attrsToRewrite[attr.Key] && strings.HasPrefix(attr.Val, "/") {
// Make URL absolute
attr.Val = "/https://" + baseURL.Host + attr.Val
}
*/
}
}

View File

@@ -1,8 +0,0 @@
package strategy
/*
var Cloudflare = proxy.Strategy{
tactic.NoCookie(),
// ... other tactics ...
}
*/