diff --git a/handlers/proxy.go b/handlers/proxy.go index d781085..5d753f8 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -33,23 +33,23 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetRequestModifications( // rx.SpoofJA3fingerprint(ja3, "Googlebot"), // rx.MasqueradeAsFacebookBot(), - rx.MasqueradeAsGoogleBot(), + //rx.MasqueradeAsGoogleBot(), rx.DeleteOutgoingCookies(), rx.ForwardRequestHeaders(), //rx.SpoofReferrerFromGoogleSearch(), rx.SpoofReferrerFromLinkedInPost(), - // rx.RequestWaybackMachine(), - // rx.RequestArchiveIs(), + //rx.RequestWaybackMachine(), + //rx.RequestArchiveIs(), ). AddResponseModifications( - tx.InjectScriptBeforeDOMContentLoaded(`(() => {let d = document.createElement("div"); d.id = "adb-check"; document.body.append(d) })()`), tx.ForwardResponseHeaders(), tx.BypassCORS(), tx.BypassContentSecurityPolicy(), // tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), - tx.PatchDynamicResourceURLs(), tx.PatchTrackerScripts(), + tx.PatchDynamicResourceURLs(), + tx.BlockElementRemoval(".article-content"), // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), ). Execute() diff --git a/proxychain/responsemodifers/block_element_removal.go b/proxychain/responsemodifers/block_element_removal.go new file mode 100644 index 0000000..97e40f6 --- /dev/null +++ b/proxychain/responsemodifers/block_element_removal.go @@ -0,0 +1,42 @@ +package responsemodifers + +import ( + _ "embed" + "strings" + + "ladder/proxychain" + "ladder/proxychain/responsemodifers/rewriters" +) + +//go:embed block_element_removal.js +var blockElementRemoval string + +// BlockElementRemoval prevents paywall javascript from removing a +// particular element by detecting the removal, then immediately reinserting it. +// This is useful when a page will return a "fake" 404, after flashing the content briefly. +// If the /outline/ API works, but the regular API doesn't, try this modifier. +func BlockElementRemoval(cssSelector string) proxychain.ResponseModification { + return func(chain *proxychain.ProxyChain) error { + // don't add rewriter if it's not even html + ct := chain.Response.Header.Get("content-type") + if !strings.HasPrefix(ct, "text/html") { + return nil + } + + params := map[string]string{ + // ie: "div.article-content" + "{{CSS_SELECTOR}}": cssSelector, + } + + rr := rewriters.NewScriptInjectorRewriterWithParams( + blockElementRemoval, + rewriters.BeforeDOMContentLoaded, + params, + ) + + htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr) + chain.Response.Body = htmlRewriter + + return nil + } +} diff --git a/proxychain/responsemodifers/block_element_removal.js b/proxychain/responsemodifers/block_element_removal.js new file mode 100644 index 0000000..da8350e --- /dev/null +++ b/proxychain/responsemodifers/block_element_removal.js @@ -0,0 +1,35 @@ +/** + * Monitors and restores specific DOM elements if they are removed. + * + * This self-invoking function creates a MutationObserver to watch for removal of elements matching + * "{{CSS_SELECTOR}}". If such an element is removed, it logs the event and attempts to restore the + * element after a 50ms delay. The restored element is reinserted at its original location or prepended + * to the document body if the original location is unavailable. + */ +(function() { + function handleMutation(mutationList) { + for (const mutation of mutationList) { + if (mutation.type === "childList") { + for (const node of Array.from(mutation.removedNodes)) { + if (node.outerHTML && node.querySelector("{{CSS_SELECTOR}}")) { + console.log( + "proxychain: prevented removal of element containing 'article-content'", + ); + console.log(node.outerHTML); + setTimeout(() => { + let e = document.querySelector("{{CSS_SELECTOR}}"); + if (e != null) { + e.replaceWith(node); + } else { + document.body.prepend(node); + } + }, 50); + } + } + } + } + } + + const observer = new MutationObserver(handleMutation); + observer.observe(document, { childList: true, subtree: true }); +})();