rough draft of more modular design

This commit is contained in:
Damian Bednarczyk
2023-11-30 16:05:14 -06:00
parent dbaf1029c5
commit 8862b7de8b
13 changed files with 147 additions and 111 deletions

View File

@@ -8,3 +8,6 @@ lint:
install-linters:
go install mvdan.cc/gofumpt@latest
go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2
run:
go run ./cmd/.

View File

@@ -83,8 +83,7 @@ func main() {
}
if *randomGooglebot {
err := helpers.UpdateGooglebotIPs()
err := helpers.GlobalGoogleBot.UpdatePool()
if err != nil {
fmt.Println("error while retrieving list of Googlebot IPs: " + err.Error())
fmt.Println("defaulting to known trusted Googlebot identity")

1
go.mod
View File

@@ -3,6 +3,7 @@ module ladder
go 1.21.1
require (
github.com/3th1nk/cidr v0.2.0
github.com/akamensky/argparse v1.4.0
github.com/bogdanfinn/fhttp v0.5.24
github.com/bogdanfinn/tls-client v1.6.1

3
go.sum
View File

@@ -1,3 +1,5 @@
github.com/3th1nk/cidr v0.2.0 h1:81jjEknszD8SHPLVTPPk+BZjNVqq1ND2YXLSChl6Lrs=
github.com/3th1nk/cidr v0.2.0/go.mod h1:XsSQnS4rEYyB2veDfnIGgViulFpIITPKtp3f0VxpiLw=
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
github.com/akamensky/argparse v1.4.0 h1:YGzvsTqCvbEZhL8zZu2AiA5nq805NZh75JNj4ajn1xc=
@@ -76,6 +78,7 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tam7t/hpkp v0.0.0-20160821193359-2b70b4024ed5 h1:YqAladjX7xpA6BM04leXMWAEjS0mTZ5kUU9KRBriQJc=

View File

@@ -31,23 +31,23 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
SetFiberCtx(c).
SetDebugLogging(opts.Verbose).
SetRequestModifications(
//rx.SpoofJA3fingerprint(ja3, "Googlebot"),
//rx.MasqueradeAsFacebookBot(),
// rx.SpoofJA3fingerprint(ja3, "Googlebot"),
// rx.MasqueradeAsFacebookBot(),
rx.MasqueradeAsGoogleBot(),
rx.DeleteOutgoingCookies(),
rx.ForwardRequestHeaders(),
rx.SpoofReferrerFromGoogleSearch(),
//rx.RequestWaybackMachine(),
//rx.RequestArchiveIs(),
// rx.RequestWaybackMachine(),
// rx.RequestArchiveIs(),
).
AddResponseModifications(
tx.ForwardResponseHeaders(),
tx.BypassCORS(),
tx.BypassContentSecurityPolicy(),
//tx.DeleteIncomingCookies(),
// tx.DeleteIncomingCookies(),
tx.RewriteHTMLResourceURLs(),
tx.PatchDynamicResourceURLs(),
//tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
// tx.SetContentSecurityPolicy("default-src * 'unsafe-inline' 'unsafe-eval' data: blob:;"),
).
Execute()

119
internal/helpers/bot.go Normal file
View File

@@ -0,0 +1,119 @@
package helpers
import (
"encoding/json"
"fmt"
"io"
"math/rand"
"net/http"
"time"
"github.com/3th1nk/cidr"
)
type Bot interface {
UpdatePool() error
GetRandomIdentity() string
}
type GoogleBot struct {
UserAgent string
Fingerprint string
IPPool googleBotPool
}
type googleBotPool struct {
Timestamp string `json:"creationTime"`
Prefixes []googleBotPrefix `json:"prefixes"`
}
type googleBotPrefix struct {
IPv6 string `json:"ipv6Prefix,omitempty"`
IPv4 string `json:"ipv4Prefix,omitempty"`
}
// const googleBotTimestampFormat string = "2006-01-02T15:04:05.999999"
// TODO: move this thing's pointer aound, not use it as a global variable
var GlobalGoogleBot = GoogleBot{
UserAgent: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36",
// https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json
Fingerprint: "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0",
IPPool: googleBotPool{
Timestamp: "2023-11-28T23:00:56.000000",
Prefixes: []googleBotPrefix{
{
IPv4: "34.100.182.96/28",
},
},
},
}
func (bot *GoogleBot) UpdatePool() error {
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Get("https://developers.google.com/static/search/apis/ipranges/googlebot.json")
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("failed to update googlebot IP pool: status code %s", resp.Status)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
err = json.Unmarshal(body, &bot.IPPool)
return err
}
func (bot *GoogleBot) GetRandomIP() string {
count := len(bot.IPPool.Prefixes)
var prefix googleBotPrefix
if count == 1 {
prefix = bot.IPPool.Prefixes[0]
} else {
idx := rand.Intn(count)
prefix = bot.IPPool.Prefixes[idx]
}
if prefix.IPv4 != "" {
ip, err := randomIPFromSubnet(prefix.IPv4)
if err == nil {
return ip
}
}
if prefix.IPv6 != "" {
ip, err := randomIPFromSubnet(prefix.IPv6)
if err == nil {
return ip
}
}
// fallback to default IP which is known to work
ip, _ := randomIPFromSubnet(bot.IPPool.Prefixes[0].IPv4)
return ip
}
func randomIPFromSubnet(c string) (string, error) {
block, err := cidr.Parse(c)
if err != nil {
return "", err
}
// TODO: the beginning of the network is technically a viable IP to use
// but maybe a different solution would be better here
return block.Network().String(), nil
}

View File

@@ -1,84 +0,0 @@
package helpers
import (
"encoding/json"
"errors"
"io"
"math/rand"
"net/http"
"strings"
"time"
)
type googlebotResp struct {
Timestamp time.Time
IPs []string
}
var GooglebotIPs = googlebotResp{
IPs: []string{"34.165.18.176"},
}
const timeFormat string = "2006-01-02T15:04:05.999999"
func UpdateGooglebotIPs() error {
resp, err := http.Get("https://developers.google.com/static/search/apis/ipranges/googlebot.json")
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return errors.New("non-200 status code recieved")
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
j := map[string]any{}
json.Unmarshal(body, &j)
timestamp, err := time.Parse(timeFormat, j["creationTime"].(string))
if err != nil {
return err
}
prefixes := j["prefixes"].([]any)
ips := make([]string, 0, 127)
for _, prefix := range prefixes {
p := prefix.(map[string]any)
if val, exists := p["ipv4Prefix"]; exists {
v := val.(string)
v = strings.ReplaceAll(v, "/27", "")
v = strings.ReplaceAll(v, "/28", "")
ips = append(ips, v)
}
}
GooglebotIPs = googlebotResp{
Timestamp: timestamp,
IPs: ips,
}
return nil
}
func RandomGooglebotIP() string {
count := len(GooglebotIPs.IPs)
idx := 0
if count != 1 {
idx = rand.Intn(count)
}
return GooglebotIPs.IPs[idx]
}

View File

@@ -392,7 +392,7 @@ func (chain *ProxyChain) _reset() {
chain.Context = nil
chain.onceResponseModifications = []ResponseModification{}
chain.onceRequestModifications = []RequestModification{}
//chain.onceClient = nil
// chain.onceClient = nil
}
// NewProxyChain initializes a new ProxyChain
@@ -402,9 +402,9 @@ func NewProxyChain() *ProxyChain {
options := []tls_client.HttpClientOption{
tls_client.WithTimeoutSeconds(20),
tls_client.WithRandomTLSExtensionOrder(),
//tls_client.WithClientProfile(profiles.Chrome_117),
//tls_client.WithNotFollowRedirects(),
//tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument
// tls_client.WithClientProfile(profiles.Chrome_117),
// tls_client.WithNotFollowRedirects(),
// tls_client.WithCookieJar(jar), // create cookieJar instance and pass it as argument
}
client, err := tls_client.NewHttpClient(tls_client.NewNoopLogger(), options...)
if err != nil {
@@ -460,7 +460,7 @@ func (chain *ProxyChain) _execute() (io.Reader, error) {
return nil, chain.abort(err)
}
chain.Response = resp
//chain.onceClient = nil
// chain.onceClient = nil
} else {
resp, err := chain.Client.Do(chain.Request)
if err != nil {

View File

@@ -33,7 +33,7 @@ func ForwardRequestHeaders() proxychain.RequestModification {
if forwardBlacklist[k] {
return
}
//fmt.Println(k, v)
// fmt.Println(k, v)
chain.Request.Header.Set(k, v)
}

View File

@@ -8,14 +8,9 @@ import (
// MasqueradeAsGoogleBot modifies user agent and x-forwarded for
// to appear to be a Google Bot
func MasqueradeAsGoogleBot() proxychain.RequestModification {
const botUA string = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36"
var botIP string = helpers.RandomGooglebotIP()
ip := helpers.GlobalGoogleBot.GetRandomIP()
// https://github.com/trisulnsm/trisul-scripts/blob/master/lua/frontend_scripts/reassembly/ja3/prints/ja3fingerprint.json
const ja3 string = "769,49195-49199-49196-49200-52393-52392-52244-52243-49161-49171-49162-49172-156-157-47-53-10,65281-0-23-35-13-5-18-16-11-10-21,29-23-24,0"
// "741,49195-49199-49200-49161-49171-49162-49172-156-157-47-10-53-51-57,65281-0-23-35-13-13172-11-10,29-23-24,0"
return masqueradeAsTrustedBot(botUA, botIP, ja3)
return masqueradeAsTrustedBot(helpers.GlobalGoogleBot.UserAgent, ip, helpers.GlobalGoogleBot.Fingerprint)
}
// MasqueradeAsBingBot modifies user agent and x-forwarded for

View File

@@ -24,7 +24,7 @@ func init() {
// ForwardResponseHeaders forwards the response headers from the upstream server to the client
func ForwardResponseHeaders() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
//fmt.Println(chain.Response.Header)
// fmt.Println(chain.Response.Header)
for uname, headers := range chain.Response.Header {
name := strings.ToLower(uname)
if forwardBlacklist[name] {

View File

@@ -25,7 +25,7 @@ func APIOutline() proxychain.ResponseModification {
opts := trafilatura.Options{
IncludeImages: true,
IncludeLinks: true,
//FavorPrecision: true,
// FavorPrecision: true,
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
OriginalURL: chain.Request.URL,

View File

@@ -92,7 +92,7 @@ func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRew
}
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
//fmt.Printf("touch token: %s\n", token.String())
// fmt.Printf("touch token: %s\n", token.String())
attrLen := len(token.Attr)
if attrLen == 0 {
return false
@@ -225,7 +225,7 @@ func handleAbsolutePath(attr *html.Attribute, _ *url.URL) {
}
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
//attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
// attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
@@ -283,6 +283,6 @@ func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
}
func escape(str string) string {
//return str
// return str
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
}