diff --git a/Makefile b/Makefile index 98f3097..c0ecca8 100644 --- a/Makefile +++ b/Makefile @@ -7,4 +7,7 @@ lint: install-linters: go install mvdan.cc/gofumpt@latest - go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2 \ No newline at end of file + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2 + +run: + go run ./cmd/. \ No newline at end of file diff --git a/cmd/main.go b/cmd/main.go index e6aaf1f..32b468d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -10,6 +10,7 @@ import ( "ladder/handlers" "ladder/internal/cli" + "ladder/proxychain/requestmodifers/bot" "github.com/akamensky/argparse" "github.com/gofiber/fiber/v2" @@ -54,6 +55,16 @@ func main() { Help: "Adds verbose logging", }) + randomGoogleBot := parser.Flag("", "random-googlebot", &argparse.Options{ + Required: false, + Help: "Update the list of trusted Googlebot IPs, and use a random one for each masqueraded request", + }) + + randomBingBot := parser.Flag("", "random-bingbot", &argparse.Options{ + Required: false, + Help: "Update the list of trusted Bingbot IPs, and use a random one for each masqueraded request", + }) + // TODO: add version flag that reads from handers/VERSION ruleset := parser.String("r", "ruleset", &argparse.Options{ @@ -81,6 +92,22 @@ func main() { fmt.Print(parser.Usage(err)) } + if *randomGoogleBot { + err := bot.GoogleBot.UpdatePool("https://developers.google.com/static/search/apis/ipranges/googlebot.json") + if err != nil { + fmt.Println("error while retrieving list of Googlebot IPs: " + err.Error()) + fmt.Println("defaulting to known trusted Googlebot identity") + } + } + + if *randomBingBot { + err := bot.GoogleBot.UpdatePool("https://www.bing.com/toolbox/bingbot.json") + if err != nil { + fmt.Println("error while retrieving list of Bingbot IPs: " + err.Error()) + fmt.Println("defaulting to known trusted Bingbot identity") + } + } + // utility cli flag to compile ruleset directory into single ruleset.yaml if *mergeRulesets || *mergeRulesetsGzip { output := os.Stdout diff --git a/go.mod b/go.mod index 61613f7..a6454de 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module ladder go 1.21.1 require ( + github.com/3th1nk/cidr v0.2.0 github.com/akamensky/argparse v1.4.0 github.com/bogdanfinn/fhttp v0.5.24 github.com/bogdanfinn/tls-client v1.6.1 diff --git a/go.sum b/go.sum index c973ed8..73e7c65 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/3th1nk/cidr v0.2.0 h1:81jjEknszD8SHPLVTPPk+BZjNVqq1ND2YXLSChl6Lrs= +github.com/3th1nk/cidr v0.2.0/go.mod h1:XsSQnS4rEYyB2veDfnIGgViulFpIITPKtp3f0VxpiLw= github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4= github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc= github.com/akamensky/argparse v1.4.0 h1:YGzvsTqCvbEZhL8zZu2AiA5nq805NZh75JNj4ajn1xc= @@ -82,6 +84,7 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tam7t/hpkp v0.0.0-20160821193359-2b70b4024ed5 h1:YqAladjX7xpA6BM04leXMWAEjS0mTZ5kUU9KRBriQJc= diff --git a/handlers/proxy.go b/handlers/proxy.go index 8682419..07165ac 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -31,23 +31,23 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetFiberCtx(c). SetDebugLogging(opts.Verbose). SetRequestModifications( - //rx.SpoofJA3fingerprint(ja3, "Googlebot"), - //rx.MasqueradeAsFacebookBot(), + // rx.SpoofJA3fingerprint(ja3, "Googlebot"), + // rx.MasqueradeAsFacebookBot(), rx.MasqueradeAsGoogleBot(), rx.DeleteOutgoingCookies(), rx.ForwardRequestHeaders(), rx.SpoofReferrerFromGoogleSearch(), - //rx.RequestWaybackMachine(), - //rx.RequestArchiveIs(), + // rx.RequestWaybackMachine(), + // rx.RequestArchiveIs(), ). AddResponseModifications( tx.ForwardResponseHeaders(), tx.BypassCORS(), tx.BypassContentSecurityPolicy(), - //tx.DeleteIncomingCookies(), + // tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), tx.PatchDynamicResourceURLs(), - //tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), + // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), ). Execute() diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index f4f36da..30d7f31 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -391,7 +391,7 @@ func (chain *ProxyChain) _reset() { chain.Context = nil chain.onceResponseModifications = []ResponseModification{} chain.onceRequestModifications = []RequestModification{} - //chain.onceClient = nil + // chain.onceClient = nil } // NewProxyChain initializes a new ProxyChain @@ -401,9 +401,9 @@ func NewProxyChain() *ProxyChain { options := []tls_client.HttpClientOption{ tls_client.WithTimeoutSeconds(20), tls_client.WithRandomTLSExtensionOrder(), - //tls_client.WithClientProfile(profiles.Chrome_117), - //tls_client.WithNotFollowRedirects(), - //tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument + // tls_client.WithClientProfile(profiles.Chrome_117), + // tls_client.WithNotFollowRedirects(), + // tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument } client, err := tls_client.NewHttpClient(tls_client.NewNoopLogger(), options...) if err != nil { @@ -459,7 +459,7 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { return nil, chain.abort(err) } chain.Response = resp - //chain.onceClient = nil + // chain.onceClient = nil } else { resp, err := chain.Client.Do(chain.Request) if err != nil { diff --git a/proxychain/requestmodifers/bot/bot.go b/proxychain/requestmodifers/bot/bot.go new file mode 100644 index 0000000..96f0ad7 --- /dev/null +++ b/proxychain/requestmodifers/bot/bot.go @@ -0,0 +1,129 @@ +package bot + +import ( + "encoding/json" + "fmt" + "io" + "math/rand" + "net/http" + "time" + + "github.com/3th1nk/cidr" +) + +type Bot interface { + UpdatePool() error + GetRandomIdentity() string +} + +type bot struct { + UserAgent string + Fingerprint string + IPPool botPool +} + +type botPool struct { + Timestamp string `json:"creationTime"` + Prefixes []botPrefix `json:"prefixes"` +} + +type botPrefix struct { + IPv6 string `json:"ipv6Prefix,omitempty"` + IPv4 string `json:"ipv4Prefix,omitempty"` +} + +// TODO: move pointers around, not global variables +var GoogleBot = bot{ + UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36", + + // https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json + Fingerprint: "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0", + + IPPool: botPool{ + Timestamp: "2023-11-28T23:00:56.000000", + Prefixes: []botPrefix{ + { + IPv4: "34.100.182.96/28", + }, + }, + }, +} + +var BingBot = bot{ + UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/79.0.3945.120 Safari/537.36", + IPPool: botPool{ + Timestamp: "2023-03-08T10:00:00.121331", + Prefixes: []botPrefix{ + { + IPv4: "207.46.13.0/24", + }, + }, + }, +} + +func (b *bot) UpdatePool(url string) error { + client := &http.Client{Timeout: 10 * time.Second} + + resp, err := client.Get(url) + if err != nil { + return err + } + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to update googlebot IP pool: status code %s", resp.Status) + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + err = json.Unmarshal(body, &b.IPPool) + + return err +} + +func (b *bot) GetRandomIP() string { + count := len(b.IPPool.Prefixes) + + var prefix botPrefix + + if count == 1 { + prefix = b.IPPool.Prefixes[0] + } else { + idx := rand.Intn(count) + prefix = b.IPPool.Prefixes[idx] + } + + if prefix.IPv4 != "" { + ip, err := randomIPFromSubnet(prefix.IPv4) + if err == nil { + return ip + } + } + + if prefix.IPv6 != "" { + ip, err := randomIPFromSubnet(prefix.IPv6) + if err == nil { + return ip + } + } + + // fallback to default IP which is known to work + ip, _ := randomIPFromSubnet(b.IPPool.Prefixes[0].IPv4) + + return ip +} + +func randomIPFromSubnet(c string) (string, error) { + block, err := cidr.Parse(c) + if err != nil { + return "", err + } + + // TODO: the beginning of the network is technically a viable IP to use + // but maybe a different solution would be better here + return block.Network().String(), nil +} diff --git a/proxychain/requestmodifers/forward_request_headers.go b/proxychain/requestmodifers/forward_request_headers.go index c329564..7e139dd 100644 --- a/proxychain/requestmodifers/forward_request_headers.go +++ b/proxychain/requestmodifers/forward_request_headers.go @@ -33,7 +33,7 @@ func ForwardRequestHeaders() proxychain.RequestModification { if forwardBlacklist[k] { return } - //fmt.Println(k, v) + // fmt.Println(k, v) chain.Request.Header.Set(k, v) } diff --git a/proxychain/requestmodifers/masquerade_as_trusted_bot.go b/proxychain/requestmodifers/masquerade_as_trusted_bot.go index 4f1c75a..f9bab95 100644 --- a/proxychain/requestmodifers/masquerade_as_trusted_bot.go +++ b/proxychain/requestmodifers/masquerade_as_trusted_bot.go @@ -2,26 +2,23 @@ package requestmodifers import ( "ladder/proxychain" + "ladder/proxychain/requestmodifers/bot" ) // MasqueradeAsGoogleBot modifies user agent and x-forwarded for // to appear to be a Google Bot func MasqueradeAsGoogleBot() proxychain.RequestModification { - const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36" - const botIP string = "66.249.78.8" // TODO: create a random ip pool from https://developers.google.com/static/search/apis/ipranges/googlebot.json - // https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json - const ja3 string = "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0" - // "741,49195-49199-49200-49161-49171-49162-49172-156-157-47-10-53-51-57,65281-0-23-35-13-13172-11-10,29-23-24,0" + ip := bot.GoogleBot.GetRandomIP() - return masqueradeAsTrustedBot(botUA, botIP, ja3) + return masqueradeAsTrustedBot(bot.GoogleBot.UserAgent, ip, bot.GoogleBot.Fingerprint) } // MasqueradeAsBingBot modifies user agent and x-forwarded for // to appear to be a Bing Bot func MasqueradeAsBingBot() proxychain.RequestModification { - const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/79.0.3945.120 Safari/537.36" - const botIP string = "13.66.144.9" // https://www.bing.com/toolbox/bingbot.json - return masqueradeAsTrustedBot(botUA, botIP, "") + ip := bot.BingBot.GetRandomIP() + + return masqueradeAsTrustedBot(bot.BingBot.Fingerprint, ip, "") } // MasqueradeAsWaybackMachineBot modifies user agent and x-forwarded for @@ -84,7 +81,19 @@ func masqueradeAsTrustedBot(botUA string, botIP string, ja3 string) proxychain.R return func(chain *proxychain.ProxyChain) error { chain.AddOnceRequestModifications( SpoofUserAgent(botUA), - SetRequestHeader("x-forwarded-for", botIP), + + // general / nginx + SetRequestHeader("X-Forwarded-For", botIP), + SetRequestHeader("X-Real-IP", botIP), + // akamai + SetRequestHeader("True-Client-IP", botIP), + // cloudflare + SetRequestHeader("CF-Connecting-IP", botIP), + // weblogic + SetRequestHeader("WL-Proxy-Client-IP", botIP), + // azure + SetRequestHeader("X-Cluster-Client-IP", botIP), + DeleteRequestHeader("referrer"), DeleteRequestHeader("origin"), ) diff --git a/proxychain/responsemodifers/api_content.go b/proxychain/responsemodifers/api_content.go index 9e12220..a428344 100644 --- a/proxychain/responsemodifers/api_content.go +++ b/proxychain/responsemodifers/api_content.go @@ -22,7 +22,7 @@ func APIContent() proxychain.ResponseModification { opts := trafilatura.Options{ IncludeImages: true, IncludeLinks: true, - //FavorPrecision: true, + // FavorPrecision: true, FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" OriginalURL: chain.Request.URL, diff --git a/proxychain/responsemodifers/forward_response_headers.go b/proxychain/responsemodifers/forward_response_headers.go index a143d39..b6d0519 100644 --- a/proxychain/responsemodifers/forward_response_headers.go +++ b/proxychain/responsemodifers/forward_response_headers.go @@ -24,7 +24,7 @@ func init() { // ForwardResponseHeaders forwards the response headers from the upstream server to the client func ForwardResponseHeaders() proxychain.ResponseModification { return func(chain *proxychain.ProxyChain) error { - //fmt.Println(chain.Response.Header) + // fmt.Println(chain.Response.Header) for uname, headers := range chain.Response.Header { name := strings.ToLower(uname) if forwardBlacklist[name] { diff --git a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go index 5489a78..a5b2407 100644 --- a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go +++ b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go @@ -92,7 +92,7 @@ func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRew } func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool { - //fmt.Printf("touch token: %s\n", token.String()) + // fmt.Printf("touch token: %s\n", token.String()) attrLen := len(token.Attr) if attrLen == 0 { return false @@ -225,7 +225,7 @@ func handleAbsolutePath(attr *html.Attribute, _ *url.URL) { } attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/"))) - //attr.Val = fmt.Sprintf("/%s", escape(attr.Val)) + // attr.Val = fmt.Sprintf("/%s", escape(attr.Val)) log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val) } @@ -283,6 +283,6 @@ func handleSrcSet(attr *html.Attribute, baseURL *url.URL) { } func escape(str string) string { - //return str + // return str return strings.ReplaceAll(url.PathEscape(str), "%2F", "/") }