add subdomain url extractor; add 3p script blocker modifier
This commit is contained in:
@@ -34,7 +34,7 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
|
||||
SetRequestModifications(
|
||||
//rx.SpoofJA3fingerprint(ja3, "Googlebot"),
|
||||
rx.AddCacheBusterQuery(),
|
||||
rx.MasqueradeAsGoogleBot(),
|
||||
//rx.MasqueradeAsGoogleBot(),
|
||||
rx.ForwardRequestHeaders(),
|
||||
rx.DeleteOutgoingCookies(),
|
||||
rx.SpoofReferrerFromRedditPost(),
|
||||
@@ -44,15 +44,17 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
|
||||
).
|
||||
AddResponseModifications(
|
||||
//tx.ForwardResponseHeaders(),
|
||||
//tx.BlockThirdPartyScripts(),
|
||||
tx.DeleteIncomingCookies(),
|
||||
tx.DeleteLocalStorageData(),
|
||||
tx.DeleteSessionStorageData(),
|
||||
tx.BypassCORS(),
|
||||
tx.BypassContentSecurityPolicy(),
|
||||
tx.RewriteHTMLResourceURLs(),
|
||||
tx.PatchTrackerScripts(),
|
||||
tx.PatchDynamicResourceURLs(),
|
||||
//tx.BlockElementRemoval(".article-content"),
|
||||
tx.PatchTrackerScripts(),
|
||||
//tx.BlockElementRemoval(".article-content"), // techcrunch
|
||||
tx.BlockElementRemoval(".available-content"), // substack
|
||||
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
|
||||
)
|
||||
|
||||
|
||||
@@ -234,9 +234,53 @@ func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.U
|
||||
return preventRecursiveProxyRequest(fixedURL, baseProxyURL)
|
||||
}
|
||||
|
||||
// extractURL extracts a URL from the request ctx. If the URL in the request
|
||||
// is a relative path, it reconstructs the full URL using the referer header.
|
||||
// extractURL extracts a URL from the request ctx
|
||||
func (chain *ProxyChain) extractURL() (*url.URL, error) {
|
||||
isLocal := strings.HasPrefix(chain.Context.BaseURL(), "http://localhost") || strings.HasPrefix(chain.Context.BaseURL(), "http://127.0.0.1")
|
||||
isReqPath := strings.HasPrefix(chain.Context.Path(), "/http")
|
||||
isAPI := strings.HasPrefix(chain.Context.Path(), "/api")
|
||||
isOutline := strings.HasPrefix(chain.Context.Path(), "/outline")
|
||||
|
||||
if isLocal || isReqPath || isAPI || isOutline {
|
||||
return chain.extractURLFromPath()
|
||||
}
|
||||
|
||||
u, err := url.Parse(chain.Context.BaseURL())
|
||||
if err != nil {
|
||||
return &url.URL{}, err
|
||||
}
|
||||
parts := strings.Split(u.Hostname(), ".")
|
||||
if len(parts) < 2 {
|
||||
fmt.Println("path")
|
||||
return chain.extractURLFromPath()
|
||||
}
|
||||
|
||||
return chain.extractURLFromSubdomain()
|
||||
}
|
||||
|
||||
// extractURLFromPath extracts a URL from the request ctx if subdomains are used.
|
||||
func (chain *ProxyChain) extractURLFromSubdomain() (*url.URL, error) {
|
||||
u, err := url.Parse(chain.Context.BaseURL())
|
||||
if err != nil {
|
||||
return &url.URL{}, err
|
||||
}
|
||||
parts := strings.Split(u.Hostname(), ".")
|
||||
if len(parts) < 2 {
|
||||
// no subdomain set, fallback to path extraction
|
||||
//panic("asdf")
|
||||
return chain.extractURLFromPath()
|
||||
}
|
||||
subdomain := strings.Join(parts[:len(parts)-2], ".")
|
||||
subURL := subdomain
|
||||
subURL = strings.ReplaceAll(subURL, "--", "|")
|
||||
subURL = strings.ReplaceAll(subURL, "-", ".")
|
||||
subURL = strings.ReplaceAll(subURL, "|", "-")
|
||||
return url.Parse(fmt.Sprintf("https://%s/%s", subURL, u.Path))
|
||||
}
|
||||
|
||||
// extractURLFromPath extracts a URL from the request ctx. If the URL in the request
|
||||
// is a relative path, it reconstructs the full URL using the referer header.
|
||||
func (chain *ProxyChain) extractURLFromPath() (*url.URL, error) {
|
||||
reqURL := chain.Context.Params("*")
|
||||
|
||||
fmt.Println("XXXXXXXXXXXXXXXX")
|
||||
@@ -316,7 +360,7 @@ func (chain *ProxyChain) validateCtxIsSet() error {
|
||||
if chain.Context != nil {
|
||||
return nil
|
||||
}
|
||||
err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetCtx()")
|
||||
err := errors.New("proxyChain was called without setting a fiber Ctx. Use ProxyChain.SetFiberCtx()")
|
||||
chain.abortErr = chain.abort(err)
|
||||
return chain.abortErr
|
||||
}
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
package requestmodifiers
|
||||
|
||||
// removed due to using a different TLS spoofing technique
|
||||
|
||||
/*
|
||||
import (
|
||||
//"github.com/Danny-Dasilva/CycleTLS/cycletls"
|
||||
//http "github.com/Danny-Dasilva/fhttp"
|
||||
//http "github.com/bogdanfinn/fhttp"
|
||||
|
||||
"golang.org/x/net/proxy"
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofJA3fingerprint modifies the TLS client and user agent to spoof a particular JA3 fingerprint
|
||||
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
|
||||
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
|
||||
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
|
||||
func SpoofJA3fingerprint(ja3 string, userAgent string) proxychain.RequestModification {
|
||||
//fmt.Println(ja3)
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// deep copy existing client while modifying http transport
|
||||
ja3SpoofClient := &http.Client{
|
||||
Transport: cycletls.NewTransport(ja3, userAgent),
|
||||
Timeout: chain.Client.Timeout,
|
||||
CheckRedirect: chain.Client.CheckRedirect,
|
||||
}
|
||||
|
||||
chain.SetOnceHTTPClient(ja3SpoofClient)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SpoofJA3fingerprintWithProxy modifies the TLS client and user agent to spoof a particular JA3 fingerprint and use a proxy.ContextDialer from the "golang.org/x/net/proxy"
|
||||
// Some anti-bot WAFs such as cloudflare can fingerprint the fields of the TLS hello packet, and the order in which they appear
|
||||
// https://web.archive.org/web/20231126224326/https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
|
||||
// https://web.archive.org/web/20231119065253/https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/
|
||||
func SpoofJA3fingerprintWithProxy(ja3 string, userAgent string, proxy proxy.ContextDialer) proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
|
||||
// deep copy existing client while modifying http transport
|
||||
ja3SpoofClient := &http.Client{
|
||||
Transport: cycletls.NewTransportWithProxy(ja3, userAgent, proxy),
|
||||
Timeout: chain.Client.Timeout,
|
||||
CheckRedirect: chain.Client.CheckRedirect,
|
||||
}
|
||||
|
||||
chain.SetOnceHTTPClient(ja3SpoofClient)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
*/
|
||||
33
proxychain/responsemodifiers/block_third_party_scripts.go
Normal file
33
proxychain/responsemodifiers/block_third_party_scripts.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
// BlockThirdPartyScripts rewrites HTML and injects JS to block all third party JS from loading.
|
||||
func BlockThirdPartyScripts() proxychain.ResponseModification {
|
||||
// TODO: monkey patch fetch and XMLHttpRequest to firewall 3P JS as well.
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
|
||||
originalURI := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||
|
||||
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
|
||||
rr := rewriters.NewBlockThirdPartyScriptsRewriter(chain.Request.URL, proxyURL)
|
||||
blockJSRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = blockJSRewriter
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
//go:embed vendor/patch_google_analytics.js
|
||||
var gaPatch string
|
||||
|
||||
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
|
||||
// Some sites will not display content until GA is loaded, so we fake one instead.
|
||||
// Credit to Raymond Hill @ github.com/gorhill/uBlock
|
||||
func PatchGoogleAnalytics() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
|
||||
// preflight check
|
||||
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
|
||||
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
|
||||
if !(isGADomain || isGAPath) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// send modified js payload to client containing
|
||||
// stub functions from patch_google_analytics.js
|
||||
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
|
||||
chain.Response.Body = gaPatchReader
|
||||
chain.Context.Set("content-type", "text/javascript")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// BlockThirdPartyScriptsRewriter implements HTMLTokenRewriter
|
||||
// and blocks 3rd party JS in script tags by replacing the src attribute value "blocked"
|
||||
type BlockThirdPartyScriptsRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewBlockThirdPartyScriptsRewriter creates a new instance of BlockThirdPartyScriptsRewriter.
|
||||
// This rewriter will strip out 3rd party JS URLs from script tags.
|
||||
func NewBlockThirdPartyScriptsRewriter(baseURL *url.URL, proxyURL string) *BlockThirdPartyScriptsRewriter {
|
||||
return &BlockThirdPartyScriptsRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ShouldModify(token *html.Token) bool {
|
||||
if token.DataAtom != atom.Script {
|
||||
return false
|
||||
}
|
||||
|
||||
// check for 3p .js urls in html elements
|
||||
for i := range token.Attr {
|
||||
attr := token.Attr[i]
|
||||
switch {
|
||||
case attr.Key != "src":
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
return false
|
||||
case !strings.HasPrefix(attr.Val, "http"):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
return false
|
||||
case strings.HasPrefix(attr.Val, fmt.Sprintf("%s://%s", r.baseURL.Scheme, r.baseURL.Hostname())):
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *BlockThirdPartyScriptsRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
if attr.Key != "src" {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(attr.Val, "http") {
|
||||
continue
|
||||
}
|
||||
log.Printf("INFO: blocked 3P js: '%s' on '%s'\n", attr.Val, r.baseURL.String())
|
||||
attr.Key = "blocked"
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
@@ -16,6 +18,7 @@ import (
|
||||
type ScriptInjectorRewriter struct {
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
scriptMD5 string
|
||||
}
|
||||
|
||||
type ScriptExecTime int
|
||||
@@ -37,20 +40,27 @@ var afterDomIdleScriptInjector string
|
||||
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
|
||||
switch {
|
||||
case r.execTime == BeforeDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, r.script)
|
||||
|
||||
case r.execTime == AfterDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.scriptMD5, r.script)
|
||||
|
||||
case r.execTime == AfterDOMIdle:
|
||||
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||
return "", fmt.Sprintf("\n<script id='%s'>\n%s\n</script>\n", r.scriptMD5, s)
|
||||
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateMD5Hash takes a string and returns its MD5 hash as a hexadecimal string
|
||||
func generateMD5Hash(input string) string {
|
||||
hasher := md5.New()
|
||||
hasher.Write([]byte(input))
|
||||
return hex.EncodeToString(hasher.Sum(nil))
|
||||
}
|
||||
|
||||
// applies parameters by string replacement of the template script
|
||||
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// Sort the keys by length in descending order
|
||||
@@ -71,9 +81,13 @@ func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||
scriptMD5 := generateMD5Hash(script)
|
||||
executeOnceScript := fmt.Sprintf(`if (!document.getElementById("x-%s")) { %s; document.getElementById("%s").id = "x-%s" };`, scriptMD5, script, scriptMD5, scriptMD5)
|
||||
|
||||
return &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
script: executeOnceScript,
|
||||
scriptMD5: scriptMD5,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,10 +97,7 @@ func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptIn
|
||||
// the params map represents the key-value pair of the params.
|
||||
// the key will be string replaced with the value
|
||||
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
|
||||
rr := &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
rr := NewScriptInjectorRewriter(script, execTime)
|
||||
rr.applyParams(params)
|
||||
return rr
|
||||
}
|
||||
|
||||
@@ -160,6 +160,7 @@
|
||||
);
|
||||
|
||||
// monkey patch service worker registration
|
||||
/*
|
||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
|
||||
return oldRegister.call(this, rewriteURL(scriptURL), options);
|
||||
@@ -169,6 +170,7 @@
|
||||
"register",
|
||||
"function register() { [native code] }",
|
||||
);
|
||||
*/
|
||||
|
||||
// monkey patch URL.toString() method
|
||||
const oldToString = URL.prototype.toString;
|
||||
|
||||
@@ -24,6 +24,10 @@ func init() {
|
||||
return tx.BlockElementRemoval(params[0])
|
||||
}
|
||||
|
||||
rsmModMap["BlockThirdPartyScripts"] = func(_ ...string) proxychain.ResponseModification {
|
||||
return tx.BlockThirdPartyScripts()
|
||||
}
|
||||
|
||||
rsmModMap["BypassCORS"] = func(_ ...string) proxychain.ResponseModification {
|
||||
return tx.BypassCORS()
|
||||
}
|
||||
@@ -92,10 +96,6 @@ func init() {
|
||||
return tx.PatchDynamicResourceURLs()
|
||||
}
|
||||
|
||||
rsmModMap["PatchGoogleAnalytics"] = func(_ ...string) proxychain.ResponseModification {
|
||||
return tx.PatchGoogleAnalytics()
|
||||
}
|
||||
|
||||
rsmModMap["PatchTrackerScripts"] = func(_ ...string) proxychain.ResponseModification {
|
||||
return tx.PatchTrackerScripts()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user