encorporate url encoding issue fix from ddba232a31

This commit is contained in:
Kevin Pham
2023-11-21 15:09:24 -06:00
parent dab77d786f
commit 0fc0942095
2 changed files with 51 additions and 19 deletions

View File

@@ -37,21 +37,20 @@ func init() {
"pluginspage": true, "pluginspage": true,
} }
// define URIs to NOT rewrite // define URIs to NOT rewrite
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">" // for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
schemeBlacklist = map[string]bool { schemeBlacklist = map[string]bool{
"data": true, "data": true,
"tel": true, "tel": true,
"mailto": true, "mailto": true,
"file": true, "file": true,
"blob": true, "blob": true,
"javascript": true, "javascript": true,
"about": true, "about": true,
"magnet": true, "magnet": true,
"ws": true, "ws": true,
"wss": true, "wss": true,
"ftp": true, "ftp": true,
} }
} }
@@ -63,6 +62,8 @@ type HTMLResourceURLRewriter struct {
tokenizer *html.Tokenizer tokenizer *html.Tokenizer
currentToken html.Token currentToken html.Token
tokenBuffer *bytes.Buffer tokenBuffer *bytes.Buffer
scriptContentBuffer *bytes.Buffer
insideScript bool
currentTokenIndex int currentTokenIndex int
currentTokenProcessed bool currentTokenProcessed bool
proxyURL string // ladder URL, not proxied site URL proxyURL string // ladder URL, not proxied site URL
@@ -72,12 +73,14 @@ type HTMLResourceURLRewriter struct {
// It initializes the tokenizer with the provided source and sets the proxy URL. // It initializes the tokenizer with the provided source and sets the proxy URL.
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter { func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL string) *HTMLResourceURLRewriter {
return &HTMLResourceURLRewriter{ return &HTMLResourceURLRewriter{
tokenizer: html.NewTokenizer(src), tokenizer: html.NewTokenizer(src),
currentToken: html.Token{}, currentToken: html.Token{},
currentTokenIndex: 0, currentTokenIndex: 0,
tokenBuffer: new(bytes.Buffer), tokenBuffer: new(bytes.Buffer),
baseURL: baseURL, scriptContentBuffer: new(bytes.Buffer),
proxyURL: proxyURL, insideScript: false,
baseURL: baseURL,
proxyURL: proxyURL,
} }
} }
@@ -116,7 +119,29 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
} }
r.tokenBuffer.Reset() r.tokenBuffer.Reset()
r.tokenBuffer.WriteString(r.currentToken.String())
// unescape script contents, not sure why tokenizer will escape things
switch tokenType {
case html.StartTagToken:
if r.currentToken.Data == "script" {
r.insideScript = true
r.scriptContentBuffer.Reset() // Reset buffer for new script contents
}
r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
case html.EndTagToken:
if r.currentToken.Data == "script" {
r.insideScript = false
modScript := modifyInlineScript(r.scriptContentBuffer)
r.tokenBuffer.WriteString(modScript)
}
r.tokenBuffer.WriteString(r.currentToken.String())
default:
if r.insideScript {
r.scriptContentBuffer.WriteString(r.currentToken.String())
} else {
r.tokenBuffer.WriteString(r.currentToken.String())
}
}
// inject <script> right after <head> // inject <script> right after <head>
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head" isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
@@ -147,6 +172,11 @@ func injectScript(tokenBuffer *bytes.Buffer, script string) {
) )
} }
// possible ad-blocking / bypassing opportunity here
func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
return html.UnescapeString(scriptContentBuffer.String())
}
// Root-relative URLs: These are relative to the root path and start with a "/". // Root-relative URLs: These are relative to the root path and start with a "/".
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) { func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
// doublecheck this is a valid relative URL // doublecheck this is a valid relative URL

View File

@@ -18,6 +18,8 @@
function rewriteURL(url) { function rewriteURL(url) {
const oldUrl = url const oldUrl = url
if (!url) return url if (!url) return url
let isStr = (typeof url.startsWith === 'function')
if (!isStr) return url
// don't rewrite invalid URIs // don't rewrite invalid URIs
try { new URL(url) } catch { return url } try { new URL(url) } catch { return url }