add response modifier to prevent the removal of elements by paywall script

This commit is contained in:
Kevin Pham
2023-12-01 21:36:51 -06:00
parent d9714fb449
commit dcdf75bad2
3 changed files with 82 additions and 5 deletions

View File

@@ -33,7 +33,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetRequestModifications( SetRequestModifications(
// rx.SpoofJA3fingerprint(ja3, "Googlebot"), // rx.SpoofJA3fingerprint(ja3, "Googlebot"),
// rx.MasqueradeAsFacebookBot(), // rx.MasqueradeAsFacebookBot(),
rx.MasqueradeAsGoogleBot(), //rx.MasqueradeAsGoogleBot(),
rx.DeleteOutgoingCookies(), rx.DeleteOutgoingCookies(),
rx.ForwardRequestHeaders(), rx.ForwardRequestHeaders(),
//rx.SpoofReferrerFromGoogleSearch(), //rx.SpoofReferrerFromGoogleSearch(),
@@ -42,14 +42,14 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
//rx.RequestArchiveIs(), //rx.RequestArchiveIs(),
). ).
AddResponseModifications( AddResponseModifications(
tx.InjectScriptBeforeDOMContentLoaded(`(() => {let d = document.createElement("div"); d.id = "adb-check"; document.body.append(d) })()`),
tx.ForwardResponseHeaders(), tx.ForwardResponseHeaders(),
tx.BypassCORS(), tx.BypassCORS(),
tx.BypassContentSecurityPolicy(), tx.BypassContentSecurityPolicy(),
// tx.DeleteIncomingCookies(), // tx.DeleteIncomingCookies(),
tx.RewriteHTMLResourceURLs(), tx.RewriteHTMLResourceURLs(),
tx.PatchDynamicResourceURLs(),
tx.PatchTrackerScripts(), tx.PatchTrackerScripts(),
tx.PatchDynamicResourceURLs(),
tx.BlockElementRemoval(".article-content"),
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
). ).
Execute() Execute()

View File

@@ -0,0 +1,42 @@
package responsemodifers
import (
_ "embed"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifers/rewriters"
)
//go:embed block_element_removal.js
var blockElementRemoval string
// BlockElementRemoval prevents paywall javascript from removing a
// particular element by detecting the removal, then immediately reinserting it.
// This is useful when a page will return a "fake" 404, after flashing the content briefly.
// If the /outline/ API works, but the regular API doesn't, try this modifier.
func BlockElementRemoval(cssSelector string) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
params := map[string]string{
// ie: "div.article-content"
"{{CSS_SELECTOR}}": cssSelector,
}
rr := rewriters.NewScriptInjectorRewriterWithParams(
blockElementRemoval,
rewriters.BeforeDOMContentLoaded,
params,
)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}

View File

@@ -0,0 +1,35 @@
/**
* Monitors and restores specific DOM elements if they are removed.
*
* This self-invoking function creates a MutationObserver to watch for removal of elements matching
* "{{CSS_SELECTOR}}". If such an element is removed, it logs the event and attempts to restore the
* element after a 50ms delay. The restored element is reinserted at its original location or prepended
* to the document body if the original location is unavailable.
*/
(function() {
function handleMutation(mutationList) {
for (const mutation of mutationList) {
if (mutation.type === "childList") {
for (const node of Array.from(mutation.removedNodes)) {
if (node.outerHTML && node.querySelector("{{CSS_SELECTOR}}")) {
console.log(
"proxychain: prevented removal of element containing 'article-content'",
);
console.log(node.outerHTML);
setTimeout(() => {
let e = document.querySelector("{{CSS_SELECTOR}}");
if (e != null) {
e.replaceWith(node);
} else {
document.body.prepend(node);
}
}, 50);
}
}
}
}
}
const observer = new MutationObserver(handleMutation);
observer.observe(document, { childList: true, subtree: true });
})();