encorporate url encoding issue fix from ddba232a31

This commit is contained in:
Kevin Pham
2023-11-21 15:09:24 -06:00
parent dab77d786f
commit 0fc0942095
2 changed files with 51 additions and 19 deletions

View File

@@ -37,10 +37,9 @@ func init() {
"pluginspage": true,
}
// define URIs to NOT rewrite
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
schemeBlacklist = map[string]bool {
schemeBlacklist = map[string]bool{
"data": true,
"tel": true,
"mailto": true,
@@ -63,6 +62,8 @@ type HTMLResourceURLRewriter struct {
tokenizer *html.Tokenizer
currentToken html.Token
tokenBuffer *bytes.Buffer
scriptContentBuffer *bytes.Buffer
insideScript bool
currentTokenIndex int
currentTokenProcessed bool
proxyURL string // ladder URL, not proxied site URL
@@ -76,6 +77,8 @@ func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL, proxyURL st
currentToken: html.Token{},
currentTokenIndex: 0,
tokenBuffer: new(bytes.Buffer),
scriptContentBuffer: new(bytes.Buffer),
insideScript: false,
baseURL: baseURL,
proxyURL: proxyURL,
}
@@ -116,7 +119,29 @@ func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
}
r.tokenBuffer.Reset()
// unescape script contents, not sure why tokenizer will escape things
switch tokenType {
case html.StartTagToken:
if r.currentToken.Data == "script" {
r.insideScript = true
r.scriptContentBuffer.Reset() // Reset buffer for new script contents
}
r.tokenBuffer.WriteString(r.currentToken.String()) // Write the start tag
case html.EndTagToken:
if r.currentToken.Data == "script" {
r.insideScript = false
modScript := modifyInlineScript(r.scriptContentBuffer)
r.tokenBuffer.WriteString(modScript)
}
r.tokenBuffer.WriteString(r.currentToken.String())
default:
if r.insideScript {
r.scriptContentBuffer.WriteString(r.currentToken.String())
} else {
r.tokenBuffer.WriteString(r.currentToken.String())
}
}
// inject <script> right after <head>
isHeadToken := (r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken) && r.currentToken.Data == "head"
@@ -147,6 +172,11 @@ func injectScript(tokenBuffer *bytes.Buffer, script string) {
)
}
// possible ad-blocking / bypassing opportunity here
func modifyInlineScript(scriptContentBuffer *bytes.Buffer) string {
return html.UnescapeString(scriptContentBuffer.String())
}
// Root-relative URLs: These are relative to the root path and start with a "/".
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
// doublecheck this is a valid relative URL

View File

@@ -18,6 +18,8 @@
function rewriteURL(url) {
const oldUrl = url
if (!url) return url
let isStr = (typeof url.startsWith === 'function')
if (!isStr) return url
// don't rewrite invalid URIs
try { new URL(url) } catch { return url }