From 8862b7de8b44d5963dfd6f583077bbdc23314758 Mon Sep 17 00:00:00 2001 From: Damian Bednarczyk Date: Thu, 30 Nov 2023 16:05:14 -0600 Subject: [PATCH] rough draft of more modular design --- Makefile | 5 +- cmd/main.go | 3 +- go.mod | 1 + go.sum | 3 + handlers/proxy.go | 12 +- internal/helpers/bot.go | 119 ++++++++++++++++++ internal/helpers/googlebot.go | 84 ------------- proxychain/proxychain.go | 10 +- .../forward_request_headers.go | 2 +- .../masquerade_as_trusted_bot.go | 9 +- .../forward_response_headers.go | 2 +- proxychain/responsemodifers/outline.go | 2 +- .../rewriters/html_token_url_rewriter.go | 6 +- 13 files changed, 147 insertions(+), 111 deletions(-) create mode 100644 internal/helpers/bot.go delete mode 100644 internal/helpers/googlebot.go diff --git a/Makefile b/Makefile index 98f3097..c0ecca8 100644 --- a/Makefile +++ b/Makefile @@ -7,4 +7,7 @@ lint: install-linters: go install mvdan.cc/gofumpt@latest - go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2 \ No newline at end of file + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2 + +run: + go run ./cmd/. \ No newline at end of file diff --git a/cmd/main.go b/cmd/main.go index 30e1432..a9fff89 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -83,8 +83,7 @@ func main() { } if *randomGooglebot { - err := helpers.UpdateGooglebotIPs() - + err := helpers.GlobalGoogleBot.UpdatePool() if err != nil { fmt.Println("error while retrieving list of Googlebot IPs: " + err.Error()) fmt.Println("defaulting to known trusted Googlebot identity") diff --git a/go.mod b/go.mod index d751fb8..aac4eee 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module ladder go 1.21.1 require ( + github.com/3th1nk/cidr v0.2.0 github.com/akamensky/argparse v1.4.0 github.com/bogdanfinn/fhttp v0.5.24 github.com/bogdanfinn/tls-client v1.6.1 diff --git a/go.sum b/go.sum index 72701cf..87075ad 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/3th1nk/cidr v0.2.0 h1:81jjEknszD8SHPLVTPPk+BZjNVqq1ND2YXLSChl6Lrs= +github.com/3th1nk/cidr v0.2.0/go.mod h1:XsSQnS4rEYyB2veDfnIGgViulFpIITPKtp3f0VxpiLw= github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4= github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc= github.com/akamensky/argparse v1.4.0 h1:YGzvsTqCvbEZhL8zZu2AiA5nq805NZh75JNj4ajn1xc= @@ -76,6 +78,7 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tam7t/hpkp v0.0.0-20160821193359-2b70b4024ed5 h1:YqAladjX7xpA6BM04leXMWAEjS0mTZ5kUU9KRBriQJc= diff --git a/handlers/proxy.go b/handlers/proxy.go index 8682419..07165ac 100644 --- a/handlers/proxy.go +++ b/handlers/proxy.go @@ -31,23 +31,23 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler { SetFiberCtx(c). SetDebugLogging(opts.Verbose). SetRequestModifications( - //rx.SpoofJA3fingerprint(ja3, "Googlebot"), - //rx.MasqueradeAsFacebookBot(), + // rx.SpoofJA3fingerprint(ja3, "Googlebot"), + // rx.MasqueradeAsFacebookBot(), rx.MasqueradeAsGoogleBot(), rx.DeleteOutgoingCookies(), rx.ForwardRequestHeaders(), rx.SpoofReferrerFromGoogleSearch(), - //rx.RequestWaybackMachine(), - //rx.RequestArchiveIs(), + // rx.RequestWaybackMachine(), + // rx.RequestArchiveIs(), ). AddResponseModifications( tx.ForwardResponseHeaders(), tx.BypassCORS(), tx.BypassContentSecurityPolicy(), - //tx.DeleteIncomingCookies(), + // tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), tx.PatchDynamicResourceURLs(), - //tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), + // tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"), ). Execute() diff --git a/internal/helpers/bot.go b/internal/helpers/bot.go new file mode 100644 index 0000000..4e1632c --- /dev/null +++ b/internal/helpers/bot.go @@ -0,0 +1,119 @@ +package helpers + +import ( + "encoding/json" + "fmt" + "io" + "math/rand" + "net/http" + "time" + + "github.com/3th1nk/cidr" +) + +type Bot interface { + UpdatePool() error + GetRandomIdentity() string +} + +type GoogleBot struct { + UserAgent string + Fingerprint string + IPPool googleBotPool +} + +type googleBotPool struct { + Timestamp string `json:"creationTime"` + Prefixes []googleBotPrefix `json:"prefixes"` +} + +type googleBotPrefix struct { + IPv6 string `json:"ipv6Prefix,omitempty"` + IPv4 string `json:"ipv4Prefix,omitempty"` +} + +// const googleBotTimestampFormat string = "2006-01-02T15:04:05.999999" + +// TODO: move this thing's pointer aound, not use it as a global variable +var GlobalGoogleBot = GoogleBot{ + UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36", + + // https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json + Fingerprint: "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0", + + IPPool: googleBotPool{ + Timestamp: "2023-11-28T23:00:56.000000", + Prefixes: []googleBotPrefix{ + { + IPv4: "34.100.182.96/28", + }, + }, + }, +} + +func (bot *GoogleBot) UpdatePool() error { + client := &http.Client{Timeout: 10 * time.Second} + + resp, err := client.Get("https://developers.google.com/static/search/apis/ipranges/googlebot.json") + if err != nil { + return err + } + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to update googlebot IP pool: status code %s", resp.Status) + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + err = json.Unmarshal(body, &bot.IPPool) + + return err +} + +func (bot *GoogleBot) GetRandomIP() string { + count := len(bot.IPPool.Prefixes) + + var prefix googleBotPrefix + + if count == 1 { + prefix = bot.IPPool.Prefixes[0] + } else { + idx := rand.Intn(count) + prefix = bot.IPPool.Prefixes[idx] + } + + if prefix.IPv4 != "" { + ip, err := randomIPFromSubnet(prefix.IPv4) + if err == nil { + return ip + } + } + + if prefix.IPv6 != "" { + ip, err := randomIPFromSubnet(prefix.IPv6) + if err == nil { + return ip + } + } + + // fallback to default IP which is known to work + ip, _ := randomIPFromSubnet(bot.IPPool.Prefixes[0].IPv4) + + return ip +} + +func randomIPFromSubnet(c string) (string, error) { + block, err := cidr.Parse(c) + if err != nil { + return "", err + } + + // TODO: the beginning of the network is technically a viable IP to use + // but maybe a different solution would be better here + return block.Network().String(), nil +} diff --git a/internal/helpers/googlebot.go b/internal/helpers/googlebot.go deleted file mode 100644 index 58b09a2..0000000 --- a/internal/helpers/googlebot.go +++ /dev/null @@ -1,84 +0,0 @@ -package helpers - -import ( - "encoding/json" - "errors" - "io" - "math/rand" - "net/http" - "strings" - "time" -) - -type googlebotResp struct { - Timestamp time.Time - IPs []string -} - -var GooglebotIPs = googlebotResp{ - IPs: []string{"34.165.18.176"}, -} - -const timeFormat string = "2006-01-02T15:04:05.999999" - -func UpdateGooglebotIPs() error { - resp, err := http.Get("https://developers.google.com/static/search/apis/ipranges/googlebot.json") - if err != nil { - return err - } - - if resp.StatusCode != http.StatusOK { - return errors.New("non-200 status code recieved") - } - - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - return err - } - - j := map[string]any{} - json.Unmarshal(body, &j) - - timestamp, err := time.Parse(timeFormat, j["creationTime"].(string)) - if err != nil { - return err - } - - prefixes := j["prefixes"].([]any) - - ips := make([]string, 0, 127) - - for _, prefix := range prefixes { - p := prefix.(map[string]any) - - if val, exists := p["ipv4Prefix"]; exists { - v := val.(string) - - v = strings.ReplaceAll(v, "/27", "") - v = strings.ReplaceAll(v, "/28", "") - - ips = append(ips, v) - } - - } - - GooglebotIPs = googlebotResp{ - Timestamp: timestamp, - IPs: ips, - } - - return nil -} - -func RandomGooglebotIP() string { - count := len(GooglebotIPs.IPs) - idx := 0 - - if count != 1 { - idx = rand.Intn(count) - } - - return GooglebotIPs.IPs[idx] -} diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index afc2e53..5085914 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -392,7 +392,7 @@ func (chain *ProxyChain) _reset() { chain.Context = nil chain.onceResponseModifications = []ResponseModification{} chain.onceRequestModifications = []RequestModification{} - //chain.onceClient = nil + // chain.onceClient = nil } // NewProxyChain initializes a new ProxyChain @@ -402,9 +402,9 @@ func NewProxyChain() *ProxyChain { options := []tls_client.HttpClientOption{ tls_client.WithTimeoutSeconds(20), tls_client.WithRandomTLSExtensionOrder(), - //tls_client.WithClientProfile(profiles.Chrome_117), - //tls_client.WithNotFollowRedirects(), - //tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument + // tls_client.WithClientProfile(profiles.Chrome_117), + // tls_client.WithNotFollowRedirects(), + // tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument } client, err := tls_client.NewHttpClient(tls_client.NewNoopLogger(), options...) if err != nil { @@ -460,7 +460,7 @@ func (chain *ProxyChain) _execute() (io.Reader, error) { return nil, chain.abort(err) } chain.Response = resp - //chain.onceClient = nil + // chain.onceClient = nil } else { resp, err := chain.Client.Do(chain.Request) if err != nil { diff --git a/proxychain/requestmodifers/forward_request_headers.go b/proxychain/requestmodifers/forward_request_headers.go index c329564..7e139dd 100644 --- a/proxychain/requestmodifers/forward_request_headers.go +++ b/proxychain/requestmodifers/forward_request_headers.go @@ -33,7 +33,7 @@ func ForwardRequestHeaders() proxychain.RequestModification { if forwardBlacklist[k] { return } - //fmt.Println(k, v) + // fmt.Println(k, v) chain.Request.Header.Set(k, v) } diff --git a/proxychain/requestmodifers/masquerade_as_trusted_bot.go b/proxychain/requestmodifers/masquerade_as_trusted_bot.go index 4d4d81a..658543b 100644 --- a/proxychain/requestmodifers/masquerade_as_trusted_bot.go +++ b/proxychain/requestmodifers/masquerade_as_trusted_bot.go @@ -8,14 +8,9 @@ import ( // MasqueradeAsGoogleBot modifies user agent and x-forwarded for // to appear to be a Google Bot func MasqueradeAsGoogleBot() proxychain.RequestModification { - const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36" - var botIP string = helpers.RandomGooglebotIP() + ip := helpers.GlobalGoogleBot.GetRandomIP() - // https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json - const ja3 string = "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0" - // "741,49195-49199-49200-49161-49171-49162-49172-156-157-47-10-53-51-57,65281-0-23-35-13-13172-11-10,29-23-24,0" - - return masqueradeAsTrustedBot(botUA, botIP, ja3) + return masqueradeAsTrustedBot(helpers.GlobalGoogleBot.UserAgent, ip, helpers.GlobalGoogleBot.Fingerprint) } // MasqueradeAsBingBot modifies user agent and x-forwarded for diff --git a/proxychain/responsemodifers/forward_response_headers.go b/proxychain/responsemodifers/forward_response_headers.go index a143d39..b6d0519 100644 --- a/proxychain/responsemodifers/forward_response_headers.go +++ b/proxychain/responsemodifers/forward_response_headers.go @@ -24,7 +24,7 @@ func init() { // ForwardResponseHeaders forwards the response headers from the upstream server to the client func ForwardResponseHeaders() proxychain.ResponseModification { return func(chain *proxychain.ProxyChain) error { - //fmt.Println(chain.Response.Header) + // fmt.Println(chain.Response.Header) for uname, headers := range chain.Response.Header { name := strings.ToLower(uname) if forwardBlacklist[name] { diff --git a/proxychain/responsemodifers/outline.go b/proxychain/responsemodifers/outline.go index be1f6cb..a8e4dbd 100644 --- a/proxychain/responsemodifers/outline.go +++ b/proxychain/responsemodifers/outline.go @@ -25,7 +25,7 @@ func APIOutline() proxychain.ResponseModification { opts := trafilatura.Options{ IncludeImages: true, IncludeLinks: true, - //FavorPrecision: true, + // FavorPrecision: true, FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" OriginalURL: chain.Request.URL, diff --git a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go index 5489a78..a5b2407 100644 --- a/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go +++ b/proxychain/responsemodifers/rewriters/html_token_url_rewriter.go @@ -92,7 +92,7 @@ func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRew } func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool { - //fmt.Printf("touch token: %s\n", token.String()) + // fmt.Printf("touch token: %s\n", token.String()) attrLen := len(token.Attr) if attrLen == 0 { return false @@ -225,7 +225,7 @@ func handleAbsolutePath(attr *html.Attribute, _ *url.URL) { } attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/"))) - //attr.Val = fmt.Sprintf("/%s", escape(attr.Val)) + // attr.Val = fmt.Sprintf("/%s", escape(attr.Val)) log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val) } @@ -283,6 +283,6 @@ func handleSrcSet(attr *html.Attribute, baseURL *url.URL) { } func escape(str string) string { - //return str + // return str return strings.ReplaceAll(url.PathEscape(str), "%2F", "/") }