refactor rewriters to modify html in single pass with multiple rewriters; improve html rewriter edge case handling
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"ladder/pkg/ruleset"
|
"ladder/pkg/ruleset"
|
||||||
|
rr "ladder/proxychain/responsemodifers/rewriters"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
)
|
)
|
||||||
@@ -35,6 +36,7 @@ import (
|
|||||||
|
|
||||||
rx "ladder/pkg/proxychain/requestmodifers"
|
rx "ladder/pkg/proxychain/requestmodifers"
|
||||||
tx "ladder/pkg/proxychain/responsemodifers"
|
tx "ladder/pkg/proxychain/responsemodifers"
|
||||||
|
"ladder/pkg/proxychain/responsemodifers/rewriters"
|
||||||
"ladder/internal/proxychain"
|
"ladder/internal/proxychain"
|
||||||
|
|
||||||
)
|
)
|
||||||
@@ -87,6 +89,7 @@ type ProxyChain struct {
|
|||||||
Response *http.Response
|
Response *http.Response
|
||||||
requestModifications []RequestModification
|
requestModifications []RequestModification
|
||||||
resultModifications []ResponseModification
|
resultModifications []ResponseModification
|
||||||
|
htmlTokenRewriters []rr.IHTMLTokenRewriter
|
||||||
Ruleset *ruleset.RuleSet
|
Ruleset *ruleset.RuleSet
|
||||||
debugMode bool
|
debugMode bool
|
||||||
abortErr error
|
abortErr error
|
||||||
@@ -169,75 +172,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
|
|||||||
return req, nil
|
return req, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// _execute sends the request for the ProxyChain and returns the raw body only
|
|
||||||
// the caller is responsible for returning a response back to the requestor
|
|
||||||
// the caller is also responsible for calling chain._reset() when they are done with the body
|
|
||||||
func (chain *ProxyChain) _execute() (io.Reader, error) {
|
|
||||||
if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
|
|
||||||
return nil, chain.abortErr
|
|
||||||
}
|
|
||||||
if chain.Request == nil {
|
|
||||||
return nil, errors.New("proxychain request not yet initialized")
|
|
||||||
}
|
|
||||||
if chain.Request.URL.Scheme == "" {
|
|
||||||
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply requestModifications to proxychain
|
|
||||||
for _, applyRequestModificationsTo := range chain.requestModifications {
|
|
||||||
err := applyRequestModificationsTo(chain)
|
|
||||||
if err != nil {
|
|
||||||
return nil, chain.abort(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send Request Upstream
|
|
||||||
resp, err := chain.Client.Do(chain.Request)
|
|
||||||
if err != nil {
|
|
||||||
return nil, chain.abort(err)
|
|
||||||
}
|
|
||||||
chain.Response = resp
|
|
||||||
|
|
||||||
//defer resp.Body.Close()
|
|
||||||
|
|
||||||
/* todo: move to rsm
|
|
||||||
for k, v := range resp.Header {
|
|
||||||
chain.Context.Set(k, resp.Header.Get(k))
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Apply ResponseModifiers to proxychain
|
|
||||||
for _, applyResultModificationsTo := range chain.resultModifications {
|
|
||||||
err := applyResultModificationsTo(chain)
|
|
||||||
if err != nil {
|
|
||||||
return nil, chain.abort(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return chain.Response.Body, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Execute sends the request for the ProxyChain and returns the request to the sender
|
|
||||||
// and resets the fields so that the ProxyChain can be reused.
|
|
||||||
// if any step in the ProxyChain fails, the request will abort and a 500 error will
|
|
||||||
// be returned to the client
|
|
||||||
func (chain *ProxyChain) Execute() error {
|
|
||||||
defer chain._reset()
|
|
||||||
body, err := chain._execute()
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if chain.Context == nil {
|
|
||||||
return errors.New("no context set")
|
|
||||||
}
|
|
||||||
// Return request back to client
|
|
||||||
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
|
||||||
return chain.Context.SendStream(body)
|
|
||||||
|
|
||||||
//return chain.Context.SendStream(body)
|
|
||||||
}
|
|
||||||
|
|
||||||
// reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries
|
// reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries
|
||||||
func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) {
|
func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) {
|
||||||
|
|
||||||
@@ -322,6 +256,13 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
|
|||||||
return reconstructUrlFromReferer(referer, relativePath)
|
return reconstructUrlFromReferer(referer, relativePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddBodyRewriter adds a HTMLTokenRewriter to the chain
|
||||||
|
// HTMLTokenRewriters modify the body response by parsing the HTML
|
||||||
|
func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain {
|
||||||
|
chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr)
|
||||||
|
return chain
|
||||||
|
}
|
||||||
|
|
||||||
// SetFiberCtx takes the request ctx from the client
|
// SetFiberCtx takes the request ctx from the client
|
||||||
// for the modifiers and execute function to use.
|
// for the modifiers and execute function to use.
|
||||||
// it must be set everytime a new request comes through
|
// it must be set everytime a new request comes through
|
||||||
@@ -398,3 +339,86 @@ func NewProxyChain() *ProxyChain {
|
|||||||
chain.Client = http.DefaultClient
|
chain.Client = http.DefaultClient
|
||||||
return chain
|
return chain
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ========================================================================================================
|
||||||
|
|
||||||
|
// _execute sends the request for the ProxyChain and returns the raw body only
|
||||||
|
// the caller is responsible for returning a response back to the requestor
|
||||||
|
// the caller is also responsible for calling chain._reset() when they are done with the body
|
||||||
|
func (chain *ProxyChain) _execute() (io.Reader, error) {
|
||||||
|
if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
|
||||||
|
return nil, chain.abortErr
|
||||||
|
}
|
||||||
|
if chain.Request == nil {
|
||||||
|
return nil, errors.New("proxychain request not yet initialized")
|
||||||
|
}
|
||||||
|
if chain.Request.URL.Scheme == "" {
|
||||||
|
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply requestModifications to proxychain
|
||||||
|
for _, applyRequestModificationsTo := range chain.requestModifications {
|
||||||
|
err := applyRequestModificationsTo(chain)
|
||||||
|
if err != nil {
|
||||||
|
return nil, chain.abort(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send Request Upstream
|
||||||
|
resp, err := chain.Client.Do(chain.Request)
|
||||||
|
if err != nil {
|
||||||
|
return nil, chain.abort(err)
|
||||||
|
}
|
||||||
|
chain.Response = resp
|
||||||
|
|
||||||
|
/* todo: move to rsm
|
||||||
|
for k, v := range resp.Header {
|
||||||
|
chain.Context.Set(k, resp.Header.Get(k))
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Apply ResponseModifiers to proxychain
|
||||||
|
for _, applyResultModificationsTo := range chain.resultModifications {
|
||||||
|
err := applyResultModificationsTo(chain)
|
||||||
|
if err != nil {
|
||||||
|
return nil, chain.abort(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// stream request back to client, possibly rewriting the body
|
||||||
|
if len(chain.htmlTokenRewriters) == 0 {
|
||||||
|
return chain.Response.Body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ct := chain.Response.Header.Get("content-type")
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(ct, "text/html"):
|
||||||
|
fmt.Println("fooox")
|
||||||
|
return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil
|
||||||
|
default:
|
||||||
|
return chain.Response.Body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute sends the request for the ProxyChain and returns the request to the sender
|
||||||
|
// and resets the fields so that the ProxyChain can be reused.
|
||||||
|
// if any step in the ProxyChain fails, the request will abort and a 500 error will
|
||||||
|
// be returned to the client
|
||||||
|
func (chain *ProxyChain) Execute() error {
|
||||||
|
defer chain._reset()
|
||||||
|
body, err := chain._execute()
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if chain.Context == nil {
|
||||||
|
return errors.New("no context set")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return request back to client
|
||||||
|
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
||||||
|
return chain.Context.SendStream(body)
|
||||||
|
|
||||||
|
//return chain.Context.SendStream(body)
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ import (
|
|||||||
"ladder/proxychain"
|
"ladder/proxychain"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: handle edge case where CSP is specified in meta tag:
|
||||||
|
// <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
|
||||||
|
|
||||||
// BypassContentSecurityPolicy modifies response headers to prevent the browser
|
// BypassContentSecurityPolicy modifies response headers to prevent the browser
|
||||||
// from enforcing any CSP restrictions. This should run at the end of the chain.
|
// from enforcing any CSP restrictions. This should run at the end of the chain.
|
||||||
func BypassContentSecurityPolicy() proxychain.ResponseModification {
|
func BypassContentSecurityPolicy() proxychain.ResponseModification {
|
||||||
|
|||||||
27
proxychain/responsemodifers/inject_script.go
Normal file
27
proxychain/responsemodifers/inject_script.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package responsemodifers
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
"ladder/proxychain"
|
||||||
|
"ladder/proxychain/responsemodifers/rewriters"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// InjectScript modifies HTTP responses
|
||||||
|
// to execute javascript at a particular time.
|
||||||
|
func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
|
||||||
|
return func(chain *proxychain.ProxyChain) error {
|
||||||
|
// don't add rewriter if it's not even html
|
||||||
|
ct := chain.Response.Header.Get("content-type")
|
||||||
|
if !strings.HasPrefix(ct, "text/html") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||||
|
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
|
||||||
|
// we just queue it up here
|
||||||
|
chain.AddHTMLTokenRewriter(rr)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,24 +13,9 @@ import (
|
|||||||
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
||||||
// - This function is designed to allow the proxified page
|
// - This function is designed to allow the proxified page
|
||||||
// to still be browsible by routing all resource URLs through the proxy.
|
// to still be browsible by routing all resource URLs through the proxy.
|
||||||
//
|
|
||||||
// ---
|
|
||||||
//
|
|
||||||
// - It works by replacing the io.ReadCloser of the http.Response.Body
|
|
||||||
// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
|
|
||||||
//
|
|
||||||
// - This process can be done multiple times, so that the response will
|
|
||||||
// be streamed and modified through each pass without buffering the entire response in memory.
|
|
||||||
//
|
|
||||||
// - HTMLResourceRewriter reads the http.Response.Body stream,
|
|
||||||
// parsing each HTML token one at a time and replacing attribute tags.
|
|
||||||
//
|
|
||||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
|
||||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
|
||||||
// without ever buffering the entire HTTP response in memory.
|
|
||||||
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
||||||
return func(chain *proxychain.ProxyChain) error {
|
return func(chain *proxychain.ProxyChain) error {
|
||||||
// return early if it's not HTML
|
// don't add rewriter if it's not even html
|
||||||
ct := chain.Response.Header.Get("content-type")
|
ct := chain.Response.Header.Get("content-type")
|
||||||
if !strings.HasPrefix(ct, "text/html") {
|
if !strings.HasPrefix(ct, "text/html") {
|
||||||
return nil
|
return nil
|
||||||
@@ -40,12 +25,10 @@ func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
|||||||
originalURI := chain.Context.Request().URI()
|
originalURI := chain.Context.Request().URI()
|
||||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||||
|
|
||||||
chain.Response.Body = rewriters.
|
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||||
NewHTMLResourceURLRewriter(
|
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
|
||||||
chain.Response.Body,
|
// we just queue it up here
|
||||||
chain.Request.URL,
|
chain.AddHTMLTokenRewriter(rr)
|
||||||
proxyURL,
|
|
||||||
)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
(() => {
|
||||||
|
document.addEventListener('DOMContentLoaded', (event) => {
|
||||||
|
initIdleMutationObserver();
|
||||||
|
});
|
||||||
|
|
||||||
|
function initIdleMutationObserver() {
|
||||||
|
let debounceTimer;
|
||||||
|
const debounceDelay = 500; // adjust the delay as needed
|
||||||
|
|
||||||
|
const observer = new MutationObserver((mutations) => {
|
||||||
|
// Clear the previous timer and set a new one
|
||||||
|
clearTimeout(debounceTimer);
|
||||||
|
debounceTimer = setTimeout(() => {
|
||||||
|
execute();
|
||||||
|
observer.disconnect(); // Disconnect after first execution
|
||||||
|
}, debounceDelay);
|
||||||
|
});
|
||||||
|
|
||||||
|
const config = { attributes: false, childList: true, subtree: true };
|
||||||
|
observer.observe(document.body, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function execute() {
|
||||||
|
'SCRIPT_CONTENT_PARAM'
|
||||||
|
//console.log('DOM is now idle. Executing...');
|
||||||
|
}
|
||||||
|
})();
|
||||||
@@ -1,344 +0,0 @@
|
|||||||
package rewriters
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
_ "embed"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"log"
|
|
||||||
"net/url"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
|
||||||
)
|
|
||||||
|
|
||||||
var attributesToRewrite map[string]bool
|
|
||||||
var schemeBlacklist map[string]bool
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
// Define list of HTML attributes to try to rewrite
|
|
||||||
attributesToRewrite = map[string]bool{
|
|
||||||
"src": true,
|
|
||||||
"href": true,
|
|
||||||
"action": true,
|
|
||||||
"srcset": true,
|
|
||||||
"poster": true,
|
|
||||||
"data": true,
|
|
||||||
"cite": true,
|
|
||||||
"formaction": true,
|
|
||||||
"background": true,
|
|
||||||
"usemap": true,
|
|
||||||
"longdesc": true,
|
|
||||||
"manifest": true,
|
|
||||||
"archive": true,
|
|
||||||
"codebase": true,
|
|
||||||
"icon": true,
|
|
||||||
"pluginspage": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
// define URIs to NOT rewrite
|
|
||||||
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
|
||||||
schemeBlacklist = map[string]bool{
|
|
||||||
"data": true,
|
|
||||||
"tel": true,
|
|
||||||
"mailto": true,
|
|
||||||
"file": true,
|
|
||||||
"blob": true,
|
|
||||||
"javascript": true,
|
|
||||||
"about": true,
|
|
||||||
"magnet": true,
|
|
||||||
"ws": true,
|
|
||||||
"wss": true,
|
|
||||||
"ftp": true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
|
|
||||||
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
|
|
||||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
|
||||||
type HTMLResourceURLRewriter struct {
|
|
||||||
baseURL *url.URL
|
|
||||||
tokenizer *html.Tokenizer
|
|
||||||
currentToken html.Token
|
|
||||||
tokenBuffer *bytes.Buffer
|
|
||||||
scriptContentBuffer *bytes.Buffer
|
|
||||||
insideScript bool
|
|
||||||
currentTokenIndex int
|
|
||||||
currentTokenProcessed bool
|
|
||||||
proxyURL string // ladder URL, not proxied site URL
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
|
||||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
|
||||||
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
|
|
||||||
return &HTMLResourceURLRewriter{
|
|
||||||
tokenizer: html.NewTokenizer(src),
|
|
||||||
currentToken: html.Token{},
|
|
||||||
currentTokenIndex: 0,
|
|
||||||
tokenBuffer: new(bytes.Buffer),
|
|
||||||
scriptContentBuffer: new(bytes.Buffer),
|
|
||||||
insideScript: false,
|
|
||||||
baseURL: baseURL,
|
|
||||||
proxyURL: proxyURL,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
|
|
||||||
func (r *HTMLResourceURLRewriter) Close() error {
|
|
||||||
r.tokenBuffer.Reset()
|
|
||||||
r.currentToken = html.Token{}
|
|
||||||
r.currentTokenIndex = 0
|
|
||||||
r.currentTokenProcessed = false
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
|
||||||
// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
|
|
||||||
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
|
||||||
|
|
||||||
if r.currentToken.Data == "" || r.currentTokenProcessed {
|
|
||||||
tokenType := r.tokenizer.Next()
|
|
||||||
|
|
||||||
// done reading html, close out reader
|
|
||||||
if tokenType == html.ErrorToken {
|
|
||||||
if r.tokenizer.Err() == io.EOF {
|
|
||||||
return 0, io.EOF
|
|
||||||
}
|
|
||||||
return 0, r.tokenizer.Err()
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush the current token into an internal buffer
|
|
||||||
// to handle fragmented tokens
|
|
||||||
r.currentToken = r.tokenizer.Token()
|
|
||||||
|
|
||||||
// patch tokens with URLs
|
|
||||||
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
|
|
||||||
if isTokenWithAttribute {
|
|
||||||
patchResourceURL(&r.currentToken, r.baseURL, r.proxyURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.tokenBuffer.Reset()
|
|
||||||
|
|
||||||
// unescape script contents, not sure why tokenizer will escape things
|
|
||||||
switch tokenType {
|
|
||||||
case html.StartTagToken:
|
|
||||||
if r.currentToken.Data == "script" {
|
|
||||||
r.insideScript = true
|
|
||||||
r.scriptContentBuffer.Reset() // Reset buffer for new script contents
|
|
||||||
}
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
|
|
||||||
case html.EndTagToken:
|
|
||||||
if r.currentToken.Data == "script" {
|
|
||||||
r.insideScript = false
|
|
||||||
modScript := modifyInlineScript(r.scriptContentBuffer)
|
|
||||||
r.tokenBuffer.WriteString(modScript)
|
|
||||||
}
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
|
||||||
default:
|
|
||||||
if r.insideScript {
|
|
||||||
r.scriptContentBuffer.WriteString(r.currentToken.String())
|
|
||||||
} else {
|
|
||||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// inject <script> right after <head>
|
|
||||||
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
|
|
||||||
if isHeadToken {
|
|
||||||
params := map[string]string{
|
|
||||||
"R_PROXYURL": r.proxyURL,
|
|
||||||
"R_BASEURL": fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Host),
|
|
||||||
}
|
|
||||||
injectScriptWithParams(r.tokenBuffer, rewriteJSResourceUrlsScript, params)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.currentTokenProcessed = false
|
|
||||||
r.currentTokenIndex = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
n, err := r.tokenBuffer.Read(p)
|
|
||||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
|
||||||
r.currentTokenProcessed = true
|
|
||||||
err = nil // EOF in this context is expected and not an actual error
|
|
||||||
}
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
|
||||||
//
|
|
||||||
//go:embed js_resource_url_rewriter.js
|
|
||||||
var rewriteJSResourceUrlsScript string
|
|
||||||
|
|
||||||
func injectScript(tokenBuffer *bytes.Buffer, script string) {
|
|
||||||
tokenBuffer.WriteString(
|
|
||||||
fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
func injectScriptWithParams(tokenBuffer *bytes.Buffer, script string, params map[string]string) {
|
|
||||||
for old, new := range params {
|
|
||||||
script = strings.ReplaceAll(script, old, new)
|
|
||||||
}
|
|
||||||
tokenBuffer.WriteString(
|
|
||||||
fmt.Sprintf("\n<script>\n%s\n</script>\n", script),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// possible ad-blocking / bypassing opportunity here
|
|
||||||
func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
|
|
||||||
return html.UnescapeString(scriptContentBuffer.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
|
||||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
// doublecheck this is a valid relative URL
|
|
||||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
|
||||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
//log.Printf("BASEURL patch: %s\n", baseURL)
|
|
||||||
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"/%s://%s/%s",
|
|
||||||
baseURL.Scheme,
|
|
||||||
baseURL.Host,
|
|
||||||
strings.TrimPrefix(attr.Val, "/"),
|
|
||||||
)
|
|
||||||
attr.Val = url.QueryEscape(attr.Val)
|
|
||||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
|
||||||
|
|
||||||
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
|
||||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"%s://%s/%s%s",
|
|
||||||
baseURL.Scheme,
|
|
||||||
strings.Trim(baseURL.Host, "/"),
|
|
||||||
strings.Trim(baseURL.RawPath, "/"),
|
|
||||||
strings.Trim(attr.Val, "/"),
|
|
||||||
)
|
|
||||||
attr.Val = url.QueryEscape(attr.Val)
|
|
||||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
|
||||||
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
|
||||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
|
||||||
handleRootRelativePath(attr, baseURL)
|
|
||||||
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
// check if valid URL
|
|
||||||
u, err := url.Parse(attr.Val)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
attr.Val = fmt.Sprintf(
|
|
||||||
"/%s",
|
|
||||||
url.QueryEscape(
|
|
||||||
strings.TrimPrefix(attr.Val, "/"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
|
||||||
var srcSetBuilder strings.Builder
|
|
||||||
srcSetItems := strings.Split(attr.Val, ",")
|
|
||||||
|
|
||||||
for i, srcItem := range srcSetItems {
|
|
||||||
srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
|
|
||||||
|
|
||||||
if len(srcParts) == 0 {
|
|
||||||
continue // skip empty items
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process URL part
|
|
||||||
urlPart := processURLPart(srcParts[0], baseURL)
|
|
||||||
|
|
||||||
// First srcset item without a descriptor
|
|
||||||
if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
|
|
||||||
srcSetBuilder.WriteString(urlPart)
|
|
||||||
} else {
|
|
||||||
srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
|
|
||||||
}
|
|
||||||
|
|
||||||
if i < len(srcSetItems)-1 {
|
|
||||||
srcSetBuilder.WriteString(",") // Add comma for all but last item
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
attr.Val = srcSetBuilder.String()
|
|
||||||
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// only for srcset
|
|
||||||
func processURLPart(urlPart string, baseURL *url.URL) string {
|
|
||||||
f := &html.Attribute{Val: urlPart, Key: "src"}
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(urlPart, "//"):
|
|
||||||
handleProtocolRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(urlPart, "/"):
|
|
||||||
handleRootRelativePath(f, baseURL)
|
|
||||||
case strings.HasPrefix(urlPart, "https://"), strings.HasPrefix(urlPart, "http://"):
|
|
||||||
handleAbsolutePath(f, baseURL)
|
|
||||||
default:
|
|
||||||
handleDocumentRelativePath(f, baseURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
return f.Val
|
|
||||||
}
|
|
||||||
|
|
||||||
func isBlackedlistedScheme(url string) bool {
|
|
||||||
spl := strings.Split(url, ":")
|
|
||||||
if len(spl) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
scheme := spl[0]
|
|
||||||
return schemeBlacklist[scheme]
|
|
||||||
}
|
|
||||||
|
|
||||||
func patchResourceURL(token *html.Token, baseURL *url.URL, proxyURL string) {
|
|
||||||
for i := range token.Attr {
|
|
||||||
attr := &token.Attr[i]
|
|
||||||
|
|
||||||
switch {
|
|
||||||
// don't touch attributes except for the ones we defined
|
|
||||||
case !attributesToRewrite[attr.Key]:
|
|
||||||
continue
|
|
||||||
// don't rewrite special URIs that don't make network requests
|
|
||||||
case isBlackedlistedScheme(attr.Val):
|
|
||||||
continue
|
|
||||||
// don't double-overwrite the url
|
|
||||||
case strings.HasPrefix(attr.Val, proxyURL):
|
|
||||||
continue
|
|
||||||
case attr.Key == "srcset":
|
|
||||||
handleSrcSet(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "//"):
|
|
||||||
handleProtocolRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "/"):
|
|
||||||
handleRootRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
|
|
||||||
handleAbsolutePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
handleDocumentRelativePath(attr, baseURL)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
131
proxychain/responsemodifers/rewriters/html_rewriter.go
Normal file
131
proxychain/responsemodifers/rewriters/html_rewriter.go
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
package rewriters
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
|
||||||
|
type IHTMLTokenRewriter interface {
|
||||||
|
// ShouldModify determines whether a given HTML token requires modification.
|
||||||
|
ShouldModify(*html.Token) bool
|
||||||
|
|
||||||
|
// ModifyToken applies modifications to a given HTML token.
|
||||||
|
// It returns strings representing content to be prepended and
|
||||||
|
// appended to the token. If no modifications are required or if an error occurs,
|
||||||
|
// it returns empty strings for both 'prepend' and 'append'.
|
||||||
|
// Note: The original token is not modified if an error occurs.
|
||||||
|
ModifyToken(*html.Token) (prepend, append string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
|
||||||
|
// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
|
||||||
|
//
|
||||||
|
// - HTMLRewriter reads the http.Response.Body stream,
|
||||||
|
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
|
||||||
|
// in a single pass of the tokenizer.
|
||||||
|
//
|
||||||
|
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||||
|
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||||
|
// without ever buffering the entire HTTP response in memory.
|
||||||
|
type HTMLRewriter struct {
|
||||||
|
tokenizer *html.Tokenizer
|
||||||
|
currentToken *html.Token
|
||||||
|
tokenBuffer *bytes.Buffer
|
||||||
|
currentTokenProcessed bool
|
||||||
|
rewriters []IHTMLTokenRewriter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewHTMLRewriter creates a new HTMLRewriter instance.
|
||||||
|
// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
|
||||||
|
// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
|
||||||
|
// applies its specific modifications to the HTML tokens.
|
||||||
|
// The HTMLRewriter reads from the provided 'src', applies the modifications,
|
||||||
|
// and returns the processed content as a new io.ReadCloser.
|
||||||
|
// This new io.ReadCloser can be used to stream the modified content back to the client.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
|
||||||
|
// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
|
||||||
|
func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
|
||||||
|
return &HTMLRewriter{
|
||||||
|
tokenizer: html.NewTokenizer(src),
|
||||||
|
currentToken: nil,
|
||||||
|
tokenBuffer: new(bytes.Buffer),
|
||||||
|
currentTokenProcessed: false,
|
||||||
|
rewriters: rewriters,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
|
||||||
|
func (r *HTMLRewriter) Close() error {
|
||||||
|
r.tokenBuffer.Reset()
|
||||||
|
r.currentToken = nil
|
||||||
|
r.currentTokenProcessed = false
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||||
|
func (r *HTMLRewriter) Read(p []byte) (int, error) {
|
||||||
|
|
||||||
|
if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||||
|
tokenType := r.tokenizer.Next()
|
||||||
|
|
||||||
|
// done reading html, close out reader
|
||||||
|
if tokenType == html.ErrorToken {
|
||||||
|
if r.tokenizer.Err() == io.EOF {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
return 0, r.tokenizer.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the next token; reset buffer
|
||||||
|
t := r.tokenizer.Token()
|
||||||
|
r.currentToken = &t
|
||||||
|
r.tokenBuffer.Reset()
|
||||||
|
|
||||||
|
// buffer += "<prepends> <token> <appends>"
|
||||||
|
// process token through all registered rewriters
|
||||||
|
// rewriters will modify the token, and optionally
|
||||||
|
// return a <prepend> or <append> string token
|
||||||
|
appends := make([]string, 0, len(r.rewriters))
|
||||||
|
for _, rewriter := range r.rewriters {
|
||||||
|
if !rewriter.ShouldModify(r.currentToken) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
prepend, a := rewriter.ModifyToken(r.currentToken)
|
||||||
|
appends = append(appends, a)
|
||||||
|
// add <prepends> to buffer
|
||||||
|
r.tokenBuffer.WriteString(prepend)
|
||||||
|
}
|
||||||
|
|
||||||
|
// add <token> to buffer
|
||||||
|
if tokenType == html.TextToken {
|
||||||
|
// don't unescape textTokens (such as inline scripts).
|
||||||
|
// Token.String() by default will escape the inputs, but
|
||||||
|
// we don't want to modify the original source
|
||||||
|
r.tokenBuffer.WriteString(r.currentToken.Data)
|
||||||
|
} else {
|
||||||
|
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// add <appends> to buffer
|
||||||
|
for _, a := range appends {
|
||||||
|
r.tokenBuffer.WriteString(a)
|
||||||
|
}
|
||||||
|
|
||||||
|
r.currentTokenProcessed = false
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := r.tokenBuffer.Read(p)
|
||||||
|
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||||
|
r.currentTokenProcessed = true
|
||||||
|
err = nil // EOF in this context is expected and not an actual error
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
263
proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
Normal file
263
proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
package rewriters
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
)
|
||||||
|
|
||||||
|
var rewriteAttrs map[string]map[string]bool
|
||||||
|
var specialRewriteAttrs map[string]map[string]bool
|
||||||
|
var schemeBlacklist map[string]bool
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// define all tag/attributes which might contain URLs
|
||||||
|
// to attempt to rewrite to point to proxy instead
|
||||||
|
rewriteAttrs = map[string]map[string]bool{
|
||||||
|
"img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
|
||||||
|
"a": {"href": true},
|
||||||
|
"form": {"action": true},
|
||||||
|
"link": {"href": true, "manifest": true, "icon": true},
|
||||||
|
"script": {"src": true},
|
||||||
|
"video": {"src": true, "poster": true},
|
||||||
|
"audio": {"src": true},
|
||||||
|
"iframe": {"src": true, "longdesc": true},
|
||||||
|
"embed": {"src": true},
|
||||||
|
"object": {"data": true, "codebase": true},
|
||||||
|
"source": {"src": true, "srcset": true},
|
||||||
|
"track": {"src": true},
|
||||||
|
"area": {"href": true},
|
||||||
|
"base": {"href": true},
|
||||||
|
"blockquote": {"cite": true},
|
||||||
|
"del": {"cite": true},
|
||||||
|
"ins": {"cite": true},
|
||||||
|
"q": {"cite": true},
|
||||||
|
"body": {"background": true},
|
||||||
|
"button": {"formaction": true},
|
||||||
|
"input": {"src": true, "formaction": true},
|
||||||
|
"meta": {"content": true},
|
||||||
|
}
|
||||||
|
|
||||||
|
// might contain URL but requires special handling
|
||||||
|
specialRewriteAttrs = map[string]map[string]bool{
|
||||||
|
"img": {"srcset": true},
|
||||||
|
"source": {"srcset": true},
|
||||||
|
"meta": {"content": true},
|
||||||
|
}
|
||||||
|
|
||||||
|
// define URIs to NOT rewrite
|
||||||
|
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
||||||
|
schemeBlacklist = map[string]bool{
|
||||||
|
"data": true,
|
||||||
|
"tel": true,
|
||||||
|
"mailto": true,
|
||||||
|
"file": true,
|
||||||
|
"blob": true,
|
||||||
|
"javascript": true,
|
||||||
|
"about": true,
|
||||||
|
"magnet": true,
|
||||||
|
"ws": true,
|
||||||
|
"wss": true,
|
||||||
|
"ftp": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTMLTokenURLRewriter implements HTMLTokenRewriter
|
||||||
|
// it rewrites URLs within HTML resources to use a specified proxy URL.
|
||||||
|
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||||
|
type HTMLTokenURLRewriter struct {
|
||||||
|
baseURL *url.URL
|
||||||
|
proxyURL string // ladder URL, not proxied site URL
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||||
|
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||||
|
func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
|
||||||
|
return &HTMLTokenURLRewriter{
|
||||||
|
baseURL: baseURL,
|
||||||
|
proxyURL: proxyURL,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
|
||||||
|
attrLen := len(token.Attr)
|
||||||
|
if attrLen == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !(token.Type == html.StartTagToken || token.Type == html.SelfClosingTagToken) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||||
|
for i := range token.Attr {
|
||||||
|
attr := &token.Attr[i]
|
||||||
|
switch {
|
||||||
|
// don't touch tag/attributes that don't contain URIs
|
||||||
|
case !rewriteAttrs[token.Data][attr.Key]:
|
||||||
|
continue
|
||||||
|
// don't touch attributes with special URIs (like data:)
|
||||||
|
case schemeBlacklist[strings.Split(attr.Key, ":")[0]]:
|
||||||
|
continue
|
||||||
|
// don't double-overwrite the url
|
||||||
|
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(attr.Val, "/http://"):
|
||||||
|
continue
|
||||||
|
case strings.HasPrefix(attr.Val, "/https://"):
|
||||||
|
continue
|
||||||
|
// handle special rewrites
|
||||||
|
case specialRewriteAttrs[token.Data][attr.Key]:
|
||||||
|
r.handleSpecialAttr(token, attr, r.baseURL)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
// rewrite url
|
||||||
|
handleURLPart(attr, r.baseURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// dispatcher for ModifyURL based on URI type
|
||||||
|
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(attr.Key, "//"):
|
||||||
|
handleProtocolRelativePath(attr, baseURL)
|
||||||
|
case strings.HasPrefix(attr.Key, "/"):
|
||||||
|
handleRootRelativePath(attr, baseURL)
|
||||||
|
case strings.HasPrefix(attr.Key, "https://"):
|
||||||
|
handleAbsolutePath(attr, baseURL)
|
||||||
|
case strings.HasPrefix(attr.Key, "http://"):
|
||||||
|
handleAbsolutePath(attr, baseURL)
|
||||||
|
default:
|
||||||
|
handleDocumentRelativePath(attr, baseURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||||
|
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||||
|
handleRootRelativePath(attr, baseURL)
|
||||||
|
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||||
|
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
// doublecheck this is a valid relative URL
|
||||||
|
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||||
|
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
//log.Printf("BASEURL patch: %s\n", baseURL)
|
||||||
|
|
||||||
|
attr.Val = fmt.Sprintf(
|
||||||
|
"/%s://%s/%s",
|
||||||
|
baseURL.Scheme,
|
||||||
|
baseURL.Host,
|
||||||
|
strings.TrimPrefix(attr.Val, "/"),
|
||||||
|
)
|
||||||
|
attr.Val = escape(attr.Val)
|
||||||
|
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||||
|
|
||||||
|
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||||
|
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||||
|
attr.Val = fmt.Sprintf(
|
||||||
|
"%s://%s/%s%s",
|
||||||
|
baseURL.Scheme,
|
||||||
|
strings.Trim(baseURL.Host, "/"),
|
||||||
|
strings.Trim(baseURL.RawPath, "/"),
|
||||||
|
strings.Trim(attr.Val, "/"),
|
||||||
|
)
|
||||||
|
attr.Val = escape(attr.Val)
|
||||||
|
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||||
|
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// full URIs beginning with https?://proxiedsite.com
|
||||||
|
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
// check if valid URL
|
||||||
|
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||||
|
u, err := url.Parse(attr.Val)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
|
||||||
|
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle edge cases for special attributes
|
||||||
|
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
switch {
|
||||||
|
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
|
||||||
|
case token.Data == "img" && attr.Key == "srcset":
|
||||||
|
handleSrcSet(attr, baseURL)
|
||||||
|
case token.Data == "source" && attr.Key == "srcset":
|
||||||
|
handleSrcSet(attr, baseURL)
|
||||||
|
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
|
||||||
|
case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
|
||||||
|
handleMetaRefresh(attr, baseURL)
|
||||||
|
default:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
sec := strings.Split(attr.Val, ";url=")[0]
|
||||||
|
url := strings.Split(attr.Val, ";url=")[1]
|
||||||
|
f := &html.Attribute{Val: url, Key: "src"}
|
||||||
|
handleURLPart(f, baseURL)
|
||||||
|
attr.Val = fmt.Sprintf("%s;url=%s", sec, url)
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||||
|
var srcSetBuilder strings.Builder
|
||||||
|
srcSetItems := strings.Split(attr.Val, ",")
|
||||||
|
|
||||||
|
for i, srcItem := range srcSetItems {
|
||||||
|
srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
|
||||||
|
|
||||||
|
if len(srcParts) == 0 {
|
||||||
|
continue // skip empty items
|
||||||
|
}
|
||||||
|
|
||||||
|
// rewrite each URL part by passing in fake attribute
|
||||||
|
f := &html.Attribute{Val: srcParts[0], Key: "src"}
|
||||||
|
handleURLPart(f, baseURL)
|
||||||
|
urlPart := f.Key
|
||||||
|
|
||||||
|
// First srcset item without a descriptor
|
||||||
|
if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
|
||||||
|
srcSetBuilder.WriteString(urlPart)
|
||||||
|
} else {
|
||||||
|
srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
|
||||||
|
}
|
||||||
|
|
||||||
|
if i < len(srcSetItems)-1 {
|
||||||
|
srcSetBuilder.WriteString(",") // Add comma for all but last item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
attr.Val = srcSetBuilder.String()
|
||||||
|
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||||
|
}
|
||||||
|
|
||||||
|
func escape(str string) string {
|
||||||
|
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
|
||||||
|
}
|
||||||
@@ -285,3 +285,33 @@ const originalSetters = {};
|
|||||||
|
|
||||||
|
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(() => {
|
||||||
|
document.addEventListener('DOMContentLoaded', (event) => {
|
||||||
|
initIdleMutationObserver();
|
||||||
|
});
|
||||||
|
|
||||||
|
function initIdleMutationObserver() {
|
||||||
|
let debounceTimer;
|
||||||
|
const debounceDelay = 500; // adjust the delay as needed
|
||||||
|
|
||||||
|
const observer = new MutationObserver((mutations) => {
|
||||||
|
// Clear the previous timer and set a new one
|
||||||
|
clearTimeout(debounceTimer);
|
||||||
|
debounceTimer = setTimeout(() => {
|
||||||
|
execute();
|
||||||
|
observer.disconnect(); // Disconnect after first execution
|
||||||
|
}, debounceDelay);
|
||||||
|
});
|
||||||
|
|
||||||
|
const config = { attributes: false, childList: true, subtree: true };
|
||||||
|
observer.observe(document.body, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function execute() {
|
||||||
|
console.log('DOM is now idle. Executing...');
|
||||||
|
}
|
||||||
|
|
||||||
|
})();
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
package rewriters
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/atom"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ScriptInjectorRewriter implements HTMLTokenRewriter
|
||||||
|
// ScriptInjectorRewriter is a struct that injects JS into the page
|
||||||
|
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
|
||||||
|
type ScriptInjectorRewriter struct {
|
||||||
|
execTime ScriptExecTime
|
||||||
|
script string
|
||||||
|
}
|
||||||
|
|
||||||
|
type ScriptExecTime int
|
||||||
|
|
||||||
|
const (
|
||||||
|
BeforeDOMContentLoaded ScriptExecTime = iota
|
||||||
|
AfterDOMContentLoaded
|
||||||
|
AfterDOMIdle
|
||||||
|
)
|
||||||
|
|
||||||
|
func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
|
||||||
|
// modify if token == <head>
|
||||||
|
return token.DataAtom == atom.Head && token.Type == html.StartTagToken
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:embed after_dom_idle_script_injector.js
|
||||||
|
var afterDomIdleScriptInjector string
|
||||||
|
|
||||||
|
func (r *ScriptInjectorRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||||
|
switch {
|
||||||
|
case r.execTime == BeforeDOMContentLoaded:
|
||||||
|
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||||
|
|
||||||
|
case r.execTime == AfterDOMContentLoaded:
|
||||||
|
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||||
|
|
||||||
|
case r.execTime == AfterDOMIdle:
|
||||||
|
s := strings.Replace(afterDomIdleScriptInjector, `'SCRIPT_CONTENT_PARAM'`, r.script, 1)
|
||||||
|
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||||
|
// and injects JS into the page for execution at a particular time
|
||||||
|
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||||
|
return &ScriptInjectorRewriter{
|
||||||
|
execTime: execTime,
|
||||||
|
script: script,
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user