Merge pull request #53 from dxbednarczyk/origin/proxy_v2

Add option to use random IPs for trusted bots
This commit is contained in:
Kevin Pham
2023-12-01 13:01:57 -06:00
committed by GitHub
12 changed files with 200 additions and 28 deletions

View File

@@ -8,3 +8,6 @@ lint:
install-linters:
go install mvdan.cc/gofumpt@latest
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2
run:
go run ./cmd/.

View File

@@ -10,6 +10,7 @@ import (
"ladder/handlers"
"ladder/internal/cli"
"ladder/proxychain/requestmodifers/bot"
"github.com/akamensky/argparse"
"github.com/gofiber/fiber/v2"
@@ -54,6 +55,16 @@ func main() {
Help: "Adds verbose logging",
})
randomGoogleBot := parser.Flag("", "random-googlebot", &argparse.Options{
Required: false,
Help: "Update the list of trusted Googlebot IPs, and use a random one for each masqueraded request",
})
randomBingBot := parser.Flag("", "random-bingbot", &argparse.Options{
Required: false,
Help: "Update the list of trusted Bingbot IPs, and use a random one for each masqueraded request",
})
// TODO: add version flag that reads from handers/VERSION
ruleset := parser.String("r", "ruleset", &argparse.Options{
@@ -81,6 +92,22 @@ func main() {
fmt.Print(parser.Usage(err))
}
if *randomGoogleBot {
err := bot.GoogleBot.UpdatePool("https://developers.google.com/static/search/apis/ipranges/googlebot.json")
if err != nil {
fmt.Println("error while retrieving list of Googlebot IPs: " + err.Error())
fmt.Println("defaulting to known trusted Googlebot identity")
}
}
if *randomBingBot {
err := bot.GoogleBot.UpdatePool("https://www.bing.com/toolbox/bingbot.json")
if err != nil {
fmt.Println("error while retrieving list of Bingbot IPs: " + err.Error())
fmt.Println("defaulting to known trusted Bingbot identity")
}
}
// utility cli flag to compile ruleset directory into single ruleset.yaml
if *mergeRulesets || *mergeRulesetsGzip {
output := os.Stdout

1
go.mod
View File

@@ -3,6 +3,7 @@ module ladder
go 1.21.1
require (
github.com/3th1nk/cidr v0.2.0
github.com/akamensky/argparse v1.4.0
github.com/bogdanfinn/fhttp v0.5.24
github.com/bogdanfinn/tls-client v1.6.1

3
go.sum
View File

@@ -1,3 +1,5 @@
github.com/3th1nk/cidr v0.2.0 h1:81jjEknszD8SHPLVTPPk+BZjNVqq1ND2YXLSChl6Lrs=
github.com/3th1nk/cidr v0.2.0/go.mod h1:XsSQnS4rEYyB2veDfnIGgViulFpIITPKtp3f0VxpiLw=
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
github.com/akamensky/argparse v1.4.0 h1:YGzvsTqCvbEZhL8zZu2AiA5nq805NZh75JNj4ajn1xc=
@@ -82,6 +84,7 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tam7t/hpkp v0.0.0-20160821193359-2b70b4024ed5 h1:YqAladjX7xpA6BM04leXMWAEjS0mTZ5kUU9KRBriQJc=

View File

@@ -0,0 +1,129 @@
package bot
import (
"encoding/json"
"fmt"
"io"
"math/rand"
"net/http"
"time"
"github.com/3th1nk/cidr"
)
type Bot interface {
UpdatePool() error
GetRandomIdentity() string
}
type bot struct {
UserAgent string
Fingerprint string
IPPool botPool
}
type botPool struct {
Timestamp string `json:"creationTime"`
Prefixes []botPrefix `json:"prefixes"`
}
type botPrefix struct {
IPv6 string `json:"ipv6Prefix,omitempty"`
IPv4 string `json:"ipv4Prefix,omitempty"`
}
// TODO: move pointers around, not global variables
var GoogleBot = bot{
UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36",
// https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json
Fingerprint: "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0",
IPPool: botPool{
Timestamp: "2023-11-28T23:00:56.000000",
Prefixes: []botPrefix{
{
IPv4: "34.100.182.96/28",
},
},
},
}
var BingBot = bot{
UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/79.0.3945.120 Safari/537.36",
IPPool: botPool{
Timestamp: "2023-03-08T10:00:00.121331",
Prefixes: []botPrefix{
{
IPv4: "207.46.13.0/24",
},
},
},
}
func (b *bot) UpdatePool(url string) error {
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Get(url)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("failed to update googlebot IP pool: status code %s", resp.Status)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
err = json.Unmarshal(body, &b.IPPool)
return err
}
func (b *bot) GetRandomIP() string {
count := len(b.IPPool.Prefixes)
var prefix botPrefix
if count == 1 {
prefix = b.IPPool.Prefixes[0]
} else {
idx := rand.Intn(count)
prefix = b.IPPool.Prefixes[idx]
}
if prefix.IPv4 != "" {
ip, err := randomIPFromSubnet(prefix.IPv4)
if err == nil {
return ip
}
}
if prefix.IPv6 != "" {
ip, err := randomIPFromSubnet(prefix.IPv6)
if err == nil {
return ip
}
}
// fallback to default IP which is known to work
ip, _ := randomIPFromSubnet(b.IPPool.Prefixes[0].IPv4)
return ip
}
func randomIPFromSubnet(c string) (string, error) {
block, err := cidr.Parse(c)
if err != nil {
return "", err
}
// TODO: the beginning of the network is technically a viable IP to use
// but maybe a different solution would be better here
return block.Network().String(), nil
}

View File

@@ -2,26 +2,23 @@ package requestmodifers
import (
"ladder/proxychain"
"ladder/proxychain/requestmodifers/bot"
)
// MasqueradeAsGoogleBot modifies user agent and x-forwarded for
// to appear to be a Google Bot
func MasqueradeAsGoogleBot() proxychain.RequestModification {
const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36"
const botIP string = "66.249.78.8" // TODO: create a random ip pool from https://developers.google.com/static/search/apis/ipranges/googlebot.json
// https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json
const ja3 string = "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0"
// "741,49195-49199-49200-49161-49171-49162-49172-156-157-47-10-53-51-57,65281-0-23-35-13-13172-11-10,29-23-24,0"
ip := bot.GoogleBot.GetRandomIP()
return masqueradeAsTrustedBot(botUA, botIP, ja3)
return masqueradeAsTrustedBot(bot.GoogleBot.UserAgent, ip, bot.GoogleBot.Fingerprint)
}
// MasqueradeAsBingBot modifies user agent and x-forwarded for
// to appear to be a Bing Bot
func MasqueradeAsBingBot() proxychain.RequestModification {
const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/79.0.3945.120 Safari/537.36"
const botIP string = "13.66.144.9" // https://www.bing.com/toolbox/bingbot.json
return masqueradeAsTrustedBot(botUA, botIP, "")
ip := bot.BingBot.GetRandomIP()
return masqueradeAsTrustedBot(bot.BingBot.Fingerprint, ip, "")
}
// MasqueradeAsWaybackMachineBot modifies user agent and x-forwarded for
@@ -84,7 +81,19 @@ func masqueradeAsTrustedBot(botUA string, botIP string, ja3 string) proxychain.R
return func(chain *proxychain.ProxyChain) error {
chain.AddOnceRequestModifications(
SpoofUserAgent(botUA),
SetRequestHeader("x-forwarded-for", botIP),
// general / nginx
SetRequestHeader("X-Forwarded-For", botIP),
SetRequestHeader("X-Real-IP", botIP),
// akamai
SetRequestHeader("True-Client-IP", botIP),
// cloudflare
SetRequestHeader("CF-Connecting-IP", botIP),
// weblogic
SetRequestHeader("WL-Proxy-Client-IP", botIP),
// azure
SetRequestHeader("X-Cluster-Client-IP", botIP),
DeleteRequestHeader("referrer"),
DeleteRequestHeader("origin"),
)