fix "modifer" -> "modifier" typo everywhere
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
(() => {
|
||||
document.addEventListener("DOMContentLoaded", (event) => {
|
||||
initIdleMutationObserver();
|
||||
});
|
||||
|
||||
function initIdleMutationObserver() {
|
||||
let debounceTimer;
|
||||
const debounceDelay = 500; // adjust the delay as needed
|
||||
|
||||
const observer = new MutationObserver((mutations) => {
|
||||
// Clear the previous timer and set a new one
|
||||
clearTimeout(debounceTimer);
|
||||
debounceTimer = setTimeout(() => {
|
||||
execute();
|
||||
observer.disconnect(); // Disconnect after first execution
|
||||
}, debounceDelay);
|
||||
});
|
||||
|
||||
const config = { attributes: false, childList: true, subtree: true };
|
||||
observer.observe(document.body, config);
|
||||
}
|
||||
|
||||
function execute() {
|
||||
"{{AFTER_DOM_IDLE_SCRIPT}}";
|
||||
//console.log('DOM is now idle. Executing...');
|
||||
}
|
||||
})();
|
||||
|
||||
3
proxychain/responsemodifiers/rewriters/css_rewriter.go
Normal file
3
proxychain/responsemodifiers/rewriters/css_rewriter.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package rewriters
|
||||
|
||||
// todo: implement
|
||||
133
proxychain/responsemodifiers/rewriters/html_rewriter.go
Normal file
133
proxychain/responsemodifiers/rewriters/html_rewriter.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
|
||||
type IHTMLTokenRewriter interface {
|
||||
// ShouldModify determines whether a given HTML token requires modification.
|
||||
ShouldModify(*html.Token) bool
|
||||
|
||||
// ModifyToken applies modifications to a given HTML token.
|
||||
// It returns strings representing content to be prepended and
|
||||
// appended to the token. If no modifications are required or if an error occurs,
|
||||
// it returns empty strings for both 'prepend' and 'append'.
|
||||
// Note: The original token is not modified if an error occurs.
|
||||
ModifyToken(*html.Token) (prepend, append string)
|
||||
}
|
||||
|
||||
// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
|
||||
// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
|
||||
//
|
||||
// - HTMLRewriter reads the http.Response.Body stream,
|
||||
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
|
||||
//
|
||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||
// without ever buffering the entire HTTP response in memory.
|
||||
type HTMLRewriter struct {
|
||||
tokenizer *html.Tokenizer
|
||||
currentToken *html.Token
|
||||
tokenBuffer *bytes.Buffer
|
||||
currentTokenProcessed bool
|
||||
rewriters []IHTMLTokenRewriter
|
||||
}
|
||||
|
||||
// NewHTMLRewriter creates a new HTMLRewriter instance.
|
||||
// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
|
||||
// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
|
||||
// applies its specific modifications to the HTML tokens.
|
||||
// The HTMLRewriter reads from the provided 'src', applies the modifications,
|
||||
// and returns the processed content as a new io.ReadCloser.
|
||||
// This new io.ReadCloser can be used to stream the modified content back to the client.
|
||||
//
|
||||
// Parameters:
|
||||
// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
|
||||
// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
|
||||
func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
|
||||
return &HTMLRewriter{
|
||||
tokenizer: html.NewTokenizer(src),
|
||||
currentToken: nil,
|
||||
tokenBuffer: new(bytes.Buffer),
|
||||
currentTokenProcessed: false,
|
||||
rewriters: rewriters,
|
||||
}
|
||||
}
|
||||
|
||||
// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
|
||||
func (r *HTMLRewriter) Close() error {
|
||||
r.tokenBuffer.Reset()
|
||||
r.currentToken = nil
|
||||
r.currentTokenProcessed = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||
func (r *HTMLRewriter) Read(p []byte) (int, error) {
|
||||
if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||
tokenType := r.tokenizer.Next()
|
||||
|
||||
// done reading html, close out reader
|
||||
if tokenType == html.ErrorToken {
|
||||
if r.tokenizer.Err() == io.EOF {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
return 0, r.tokenizer.Err()
|
||||
}
|
||||
|
||||
// get the next token; reset buffer
|
||||
t := r.tokenizer.Token()
|
||||
r.currentToken = &t
|
||||
r.tokenBuffer.Reset()
|
||||
|
||||
// buffer += "<prepends> <token> <appends>"
|
||||
// process token through all registered rewriters
|
||||
// rewriters will modify the token, and optionally
|
||||
// return a <prepend> or <append> string token
|
||||
appends := make([]string, 0, len(r.rewriters))
|
||||
for _, rewriter := range r.rewriters {
|
||||
if !rewriter.ShouldModify(r.currentToken) {
|
||||
continue
|
||||
}
|
||||
|
||||
prepend, a := rewriter.ModifyToken(r.currentToken)
|
||||
appends = append(appends, a)
|
||||
// add <prepends> to buffer
|
||||
r.tokenBuffer.WriteString(prepend)
|
||||
}
|
||||
|
||||
// add <token> to buffer
|
||||
if tokenType == html.TextToken {
|
||||
// don't unescape textTokens (such as inline scripts).
|
||||
// Token.String() by default will escape the inputs, but
|
||||
// we don't want to modify the original source
|
||||
r.tokenBuffer.WriteString(r.currentToken.Data)
|
||||
} else {
|
||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||
}
|
||||
|
||||
// add <appends> to buffer
|
||||
for _, a := range appends {
|
||||
r.tokenBuffer.WriteString(a)
|
||||
}
|
||||
|
||||
r.currentTokenProcessed = false
|
||||
}
|
||||
|
||||
n, err := r.tokenBuffer.Read(p)
|
||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||
r.currentTokenProcessed = true
|
||||
err = nil // EOF in this context is expected and not an actual error
|
||||
}
|
||||
|
||||
return n, err
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html/atom"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
rewriteAttrs map[string]map[string]bool
|
||||
specialRewriteAttrs map[string]map[string]bool
|
||||
schemeBlacklist map[string]bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
// define all tag/attributes which might contain URLs
|
||||
// to attempt to rewrite to point to proxy instead
|
||||
rewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
|
||||
"a": {"href": true},
|
||||
"form": {"action": true},
|
||||
"link": {"href": true, "manifest": true, "icon": true},
|
||||
"script": {"src": true},
|
||||
"video": {"src": true, "poster": true},
|
||||
"audio": {"src": true},
|
||||
"iframe": {"src": true, "longdesc": true},
|
||||
"embed": {"src": true},
|
||||
"object": {"data": true, "codebase": true},
|
||||
"source": {"src": true, "srcset": true},
|
||||
"track": {"src": true},
|
||||
"area": {"href": true},
|
||||
"base": {"href": true},
|
||||
"blockquote": {"cite": true},
|
||||
"del": {"cite": true},
|
||||
"ins": {"cite": true},
|
||||
"q": {"cite": true},
|
||||
"body": {"background": true},
|
||||
"button": {"formaction": true},
|
||||
"input": {"src": true, "formaction": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// might contain URL but requires special handling
|
||||
specialRewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"srcset": true},
|
||||
"source": {"srcset": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// define URIs to NOT rewrite
|
||||
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
||||
schemeBlacklist = map[string]bool{
|
||||
"data": true,
|
||||
"tel": true,
|
||||
"mailto": true,
|
||||
"file": true,
|
||||
"blob": true,
|
||||
"javascript": true,
|
||||
"about": true,
|
||||
"magnet": true,
|
||||
"ws": true,
|
||||
"wss": true,
|
||||
"ftp": true,
|
||||
}
|
||||
}
|
||||
|
||||
// HTMLTokenURLRewriter implements HTMLTokenRewriter
|
||||
// it rewrites URLs within HTML resources to use a specified proxy URL.
|
||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||
type HTMLTokenURLRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||
// baseURL might be https://medium.com/foobar
|
||||
// proxyURL is http://localhost:8080
|
||||
func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
|
||||
return &HTMLTokenURLRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
|
||||
// fmt.Printf("touch token: %s\n", token.String())
|
||||
attrLen := len(token.Attr)
|
||||
if attrLen == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if token.Type == html.StartTagToken {
|
||||
return true
|
||||
}
|
||||
|
||||
if token.Type == html.SelfClosingTagToken {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
|
||||
switch {
|
||||
// don't touch tag/attributes that don't contain URIs
|
||||
case !rewriteAttrs[token.Data][attr.Key]:
|
||||
continue
|
||||
// don't touch attributes with special URIs (like data:)
|
||||
case schemeBlacklist[strings.Split(attr.Val, ":")[0]]:
|
||||
continue
|
||||
// don't double-overwrite the url
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/http://"):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/https://"):
|
||||
continue
|
||||
// handle special rewrites
|
||||
case specialRewriteAttrs[token.Data][attr.Key]:
|
||||
r.handleSpecialAttr(token, attr, r.baseURL)
|
||||
continue
|
||||
default:
|
||||
// rewrite url
|
||||
handleURLPart(attr, r.baseURL)
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// dispatcher for ModifyURL based on URI type
|
||||
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
case strings.HasPrefix(attr.Val, "//"):
|
||||
handleProtocolRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "https://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "http://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(attr, baseURL)
|
||||
}
|
||||
}
|
||||
|
||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// Skip processing if it's already in the correct format
|
||||
if strings.HasPrefix(attr.Val, "/http://") || strings.HasPrefix(attr.Val, "/https://") {
|
||||
return
|
||||
}
|
||||
|
||||
// doublecheck this is a valid relative URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
// log.Printf("BASEURL patch: %s\n", baseURL)
|
||||
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
baseURL.Host,
|
||||
strings.TrimPrefix(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
|
||||
if strings.HasPrefix(attr.Val, "#") {
|
||||
return
|
||||
}
|
||||
|
||||
relativePath := path.Join(strings.Trim(baseURL.RawPath, "/"), strings.Trim(attr.Val, "/"))
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
strings.Trim(baseURL.Host, "/"),
|
||||
relativePath,
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// full URIs beginning with https?://proxiedsite.com
|
||||
func handleAbsolutePath(attr *html.Attribute, _ *url.URL) {
|
||||
// check if valid URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
|
||||
u, err := url.Parse(attr.Val)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||
return
|
||||
}
|
||||
|
||||
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
|
||||
// attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
|
||||
|
||||
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// handle edge cases for special attributes
|
||||
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
|
||||
case token.DataAtom == atom.Img && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
case token.DataAtom == atom.Source && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
|
||||
case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
|
||||
handleMetaRefresh(attr, baseURL)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
|
||||
sec := strings.Split(attr.Val, ";url=")[0]
|
||||
url := strings.Split(attr.Val, ";url=")[1]
|
||||
f := &html.Attribute{Val: url, Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
attr.Val = fmt.Sprintf("%s;url=%s", sec, f.Val)
|
||||
}
|
||||
|
||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||
var srcSetBuilder strings.Builder
|
||||
srcSetItems := strings.Split(attr.Val, ",")
|
||||
|
||||
for i, srcItem := range srcSetItems {
|
||||
srcParts := strings.Fields(srcItem)
|
||||
|
||||
if len(srcParts) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
f := &html.Attribute{Val: srcParts[0], Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
|
||||
if i > 0 {
|
||||
srcSetBuilder.WriteString(", ")
|
||||
}
|
||||
|
||||
srcSetBuilder.WriteString(f.Val)
|
||||
if len(srcParts) > 1 {
|
||||
srcSetBuilder.WriteString(" ")
|
||||
srcSetBuilder.WriteString(strings.Join(srcParts[1:], " "))
|
||||
}
|
||||
}
|
||||
|
||||
attr.Val = srcSetBuilder.String()
|
||||
}
|
||||
|
||||
func escape(str string) string {
|
||||
// return str
|
||||
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// ScriptInjectorRewriter implements HTMLTokenRewriter
|
||||
// ScriptInjectorRewriter is a struct that injects JS into the page
|
||||
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
|
||||
type ScriptInjectorRewriter struct {
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
}
|
||||
|
||||
type ScriptExecTime int
|
||||
|
||||
const (
|
||||
BeforeDOMContentLoaded ScriptExecTime = iota
|
||||
AfterDOMContentLoaded
|
||||
AfterDOMIdle
|
||||
)
|
||||
|
||||
func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
|
||||
// modify if token == <head>
|
||||
return token.DataAtom == atom.Head && token.Type == html.StartTagToken
|
||||
}
|
||||
|
||||
//go:embed after_dom_idle_script_injector.js
|
||||
var afterDomIdleScriptInjector string
|
||||
|
||||
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
|
||||
switch {
|
||||
case r.execTime == BeforeDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||
|
||||
case r.execTime == AfterDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||
|
||||
case r.execTime == AfterDOMIdle:
|
||||
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// applies parameters by string replacement of the template script
|
||||
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// Sort the keys by length in descending order
|
||||
keys := make([]string, 0, len(params))
|
||||
for key := range params {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
return len(keys[i]) > len(keys[j])
|
||||
})
|
||||
|
||||
for _, key := range keys {
|
||||
r.script = strings.ReplaceAll(r.script, key, params[key])
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||
return &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriterWith implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
// accepting arguments into the script, which will be added via a string replace
|
||||
// the params map represents the key-value pair of the params.
|
||||
// the key will be string replaced with the value
|
||||
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
|
||||
rr := &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
rr.applyParams(params)
|
||||
return rr
|
||||
}
|
||||
Reference in New Issue
Block a user