refactor rewriters to modify html in single pass with multiple rewriters; improve html rewriter edge case handling
This commit is contained in:
@@ -13,24 +13,9 @@ import (
|
||||
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
||||
// - This function is designed to allow the proxified page
|
||||
// to still be browsible by routing all resource URLs through the proxy.
|
||||
//
|
||||
// ---
|
||||
//
|
||||
// - It works by replacing the io.ReadCloser of the http.Response.Body
|
||||
// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
|
||||
//
|
||||
// - This process can be done multiple times, so that the response will
|
||||
// be streamed and modified through each pass without buffering the entire response in memory.
|
||||
//
|
||||
// - HTMLResourceRewriter reads the http.Response.Body stream,
|
||||
// parsing each HTML token one at a time and replacing attribute tags.
|
||||
//
|
||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||
// without ever buffering the entire HTTP response in memory.
|
||||
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// return early if it's not HTML
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
@@ -40,12 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
||||
originalURI := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||
|
||||
chain.Response.Body = rewriters.
|
||||
NewHTMLResourceURLRewriter(
|
||||
chain.Response.Body,
|
||||
chain.Request.URL,
|
||||
proxyURL,
|
||||
)
|
||||
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
|
||||
// we just queue it up here
|
||||
chain.AddHTMLTokenRewriter(rr)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user