Compare commits
13 Commits
proxy_v2_f
...
ladder_tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb409f96d4 | ||
|
|
d71ebe5137 | ||
|
|
6c54d31086 | ||
|
|
5d55a2f3f0 | ||
|
|
7668713b1a | ||
|
|
bfd647e526 | ||
|
|
efa43a6f36 | ||
|
|
854dafbcfa | ||
|
|
a4e016b36c | ||
|
|
0e620e46ab | ||
|
|
0fc0942095 | ||
|
|
dab77d786f | ||
|
|
543192afbe |
@@ -86,8 +86,9 @@ func main() {
|
||||
|
||||
app := fiber.New(
|
||||
fiber.Config{
|
||||
Prefork: *prefork,
|
||||
GETOnly: true,
|
||||
Prefork: *prefork,
|
||||
GETOnly: false,
|
||||
ReadBufferSize: 4096 * 4, // increase max header size
|
||||
},
|
||||
)
|
||||
|
||||
@@ -138,5 +139,6 @@ func main() {
|
||||
}
|
||||
|
||||
app.Get("/*", handlers.NewProxySiteHandler(proxyOpts))
|
||||
app.Post("/*", handlers.NewProxySiteHandler(proxyOpts))
|
||||
log.Fatal(app.Listen(":" + *port))
|
||||
}
|
||||
|
||||
@@ -53,16 +53,23 @@ func NewProxySiteHandler(opts *ProxyOptions) fiber.Handler {
|
||||
return func(c *fiber.Ctx) error {
|
||||
proxychain := proxychain.
|
||||
NewProxyChain().
|
||||
SetFiberCtx(c).
|
||||
SetDebugLogging(opts.Verbose).
|
||||
SetRequestModifications(
|
||||
rx.DeleteOutgoingCookies(),
|
||||
//rx.RequestArchiveIs(),
|
||||
rx.MasqueradeAsGoogleBot(),
|
||||
).
|
||||
AddResponseModifications(
|
||||
tx.BypassCORS(),
|
||||
tx.BypassContentSecurityPolicy(),
|
||||
tx.DeleteIncomingCookies(),
|
||||
tx.RewriteHTMLResourceURLs(),
|
||||
)
|
||||
return proxychain.SetFiberCtx(c).Execute()
|
||||
tx.PatchDynamicResourceURLs(),
|
||||
).
|
||||
Execute()
|
||||
|
||||
return proxychain
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"ladder/pkg/ruleset"
|
||||
rr "ladder/proxychain/responsemodifers/rewriters"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
@@ -35,6 +36,7 @@ import (
|
||||
|
||||
rx "ladder/pkg/proxychain/requestmodifers"
|
||||
tx "ladder/pkg/proxychain/responsemodifers"
|
||||
"ladder/pkg/proxychain/responsemodifers/rewriters"
|
||||
"ladder/internal/proxychain"
|
||||
|
||||
)
|
||||
@@ -87,6 +89,7 @@ type ProxyChain struct {
|
||||
Response *http.Response
|
||||
requestModifications []RequestModification
|
||||
resultModifications []ResponseModification
|
||||
htmlTokenRewriters []rr.IHTMLTokenRewriter
|
||||
Ruleset *ruleset.RuleSet
|
||||
debugMode bool
|
||||
abortErr error
|
||||
@@ -169,75 +172,6 @@ func (chain *ProxyChain) _initialize_request() (*http.Request, error) {
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// _execute sends the request for the ProxyChain and returns the raw body only
|
||||
// the caller is responsible for returning a response back to the requestor
|
||||
// the caller is also responsible for calling chain._reset() when they are done with the body
|
||||
func (chain *ProxyChain) _execute() (io.Reader, error) {
|
||||
if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
|
||||
return nil, chain.abortErr
|
||||
}
|
||||
if chain.Request == nil {
|
||||
return nil, errors.New("proxychain request not yet initialized")
|
||||
}
|
||||
if chain.Request.URL.Scheme == "" {
|
||||
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
|
||||
}
|
||||
|
||||
// Apply requestModifications to proxychain
|
||||
for _, applyRequestModificationsTo := range chain.requestModifications {
|
||||
err := applyRequestModificationsTo(chain)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Send Request Upstream
|
||||
resp, err := chain.Client.Do(chain.Request)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
chain.Response = resp
|
||||
|
||||
//defer resp.Body.Close()
|
||||
|
||||
/* todo: move to rsm
|
||||
for k, v := range resp.Header {
|
||||
chain.Context.Set(k, resp.Header.Get(k))
|
||||
}
|
||||
*/
|
||||
|
||||
// Apply ResponseModifiers to proxychain
|
||||
for _, applyResultModificationsTo := range chain.resultModifications {
|
||||
err := applyResultModificationsTo(chain)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
}
|
||||
|
||||
return chain.Response.Body, nil
|
||||
}
|
||||
|
||||
// Execute sends the request for the ProxyChain and returns the request to the sender
|
||||
// and resets the fields so that the ProxyChain can be reused.
|
||||
// if any step in the ProxyChain fails, the request will abort and a 500 error will
|
||||
// be returned to the client
|
||||
func (chain *ProxyChain) Execute() error {
|
||||
defer chain._reset()
|
||||
body, err := chain._execute()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return err
|
||||
}
|
||||
if chain.Context == nil {
|
||||
return errors.New("no context set")
|
||||
}
|
||||
// Return request back to client
|
||||
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
||||
return chain.Context.SendStream(body)
|
||||
|
||||
//return chain.Context.SendStream(body)
|
||||
}
|
||||
|
||||
// reconstructUrlFromReferer reconstructs the URL using the referer's scheme, host, and the relative path / queries
|
||||
func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL, error) {
|
||||
|
||||
@@ -248,10 +182,10 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
|
||||
}
|
||||
|
||||
if realUrl.Scheme == "" || realUrl.Host == "" {
|
||||
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer, relativeUrl)
|
||||
return nil, fmt.Errorf("invalid referer URL: '%s' on request '%s", referer.String(), relativeUrl.String())
|
||||
}
|
||||
|
||||
log.Printf("'%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
|
||||
log.Printf("rewrite relative URL using referer: '%s' -> '%s'\n", relativeUrl.String(), realUrl.String())
|
||||
|
||||
return &url.URL{
|
||||
Scheme: referer.Scheme,
|
||||
@@ -261,13 +195,38 @@ func reconstructUrlFromReferer(referer *url.URL, relativeUrl *url.URL) (*url.URL
|
||||
}, nil
|
||||
}
|
||||
|
||||
// prevents calls like: http://localhost:8080/http://localhost:8080
|
||||
func preventRecursiveProxyRequest(urlQuery *url.URL, baseProxyURL string) *url.URL {
|
||||
u := urlQuery.String()
|
||||
isRecursive := strings.HasPrefix(u, baseProxyURL) || u == baseProxyURL
|
||||
if !isRecursive {
|
||||
return urlQuery
|
||||
}
|
||||
|
||||
fixedURL, err := url.Parse(strings.TrimPrefix(strings.TrimPrefix(urlQuery.String(), baseProxyURL), "/"))
|
||||
if err != nil {
|
||||
log.Printf("proxychain: failed to fix recursive request: '%s' -> '%s\n'", baseProxyURL, u)
|
||||
return urlQuery
|
||||
}
|
||||
return preventRecursiveProxyRequest(fixedURL, baseProxyURL)
|
||||
}
|
||||
|
||||
// extractUrl extracts a URL from the request ctx. If the URL in the request
|
||||
// is a relative path, it reconstructs the full URL using the referer header.
|
||||
func (chain *ProxyChain) extractUrl() (*url.URL, error) {
|
||||
// try to extract url-encoded
|
||||
reqUrl, err := url.QueryUnescape(chain.Context.Params("*"))
|
||||
if err != nil {
|
||||
reqUrl = chain.Context.Params("*") // fallback
|
||||
reqUrl := chain.Context.Params("*")
|
||||
|
||||
// sometimes client requests doubleroot '//'
|
||||
// there is a bug somewhere else, but this is a workaround until we find it
|
||||
if strings.HasPrefix(reqUrl, "/") || strings.HasPrefix(reqUrl, `%2F`) {
|
||||
reqUrl = strings.TrimPrefix(reqUrl, "/")
|
||||
reqUrl = strings.TrimPrefix(reqUrl, `%2F`)
|
||||
}
|
||||
|
||||
// unescape url query
|
||||
uReqUrl, err := url.QueryUnescape(reqUrl)
|
||||
if err == nil {
|
||||
reqUrl = uReqUrl
|
||||
}
|
||||
|
||||
urlQuery, err := url.Parse(reqUrl)
|
||||
@@ -275,6 +234,11 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
|
||||
return nil, fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err)
|
||||
}
|
||||
|
||||
// prevent recursive proxy requests
|
||||
fullURL := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", fullURL.Scheme(), fullURL.Host())
|
||||
urlQuery = preventRecursiveProxyRequest(urlQuery, proxyURL)
|
||||
|
||||
// Handle standard paths
|
||||
// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
|
||||
isRelativePath := urlQuery.Scheme == ""
|
||||
@@ -292,6 +256,16 @@ func (chain *ProxyChain) extractUrl() (*url.URL, error) {
|
||||
return reconstructUrlFromReferer(referer, relativePath)
|
||||
}
|
||||
|
||||
// AddBodyRewriter adds a HTMLTokenRewriter to the chain.
|
||||
// - HTMLTokenRewriters modify the body response by parsing the HTML
|
||||
// and making changes to the DOM as it streams to the client
|
||||
// - In most cases, you don't need to use this method. It's usually called by
|
||||
// a ResponseModifier to batch queue changes for performance reasons.
|
||||
func (chain *ProxyChain) AddHTMLTokenRewriter(rr rr.IHTMLTokenRewriter) *ProxyChain {
|
||||
chain.htmlTokenRewriters = append(chain.htmlTokenRewriters, rr)
|
||||
return chain
|
||||
}
|
||||
|
||||
// SetFiberCtx takes the request ctx from the client
|
||||
// for the modifiers and execute function to use.
|
||||
// it must be set everytime a new request comes through
|
||||
@@ -368,3 +342,86 @@ func NewProxyChain() *ProxyChain {
|
||||
chain.Client = http.DefaultClient
|
||||
return chain
|
||||
}
|
||||
|
||||
/// ========================================================================================================
|
||||
|
||||
// _execute sends the request for the ProxyChain and returns the raw body only
|
||||
// the caller is responsible for returning a response back to the requestor
|
||||
// the caller is also responsible for calling chain._reset() when they are done with the body
|
||||
func (chain *ProxyChain) _execute() (io.Reader, error) {
|
||||
if chain.validateCtxIsSet() != nil || chain.abortErr != nil {
|
||||
return nil, chain.abortErr
|
||||
}
|
||||
if chain.Request == nil {
|
||||
return nil, errors.New("proxychain request not yet initialized")
|
||||
}
|
||||
if chain.Request.URL.Scheme == "" {
|
||||
return nil, errors.New("request url not set or invalid. Check ProxyChain ReqMods for issues")
|
||||
}
|
||||
|
||||
// Apply requestModifications to proxychain
|
||||
for _, applyRequestModificationsTo := range chain.requestModifications {
|
||||
err := applyRequestModificationsTo(chain)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Send Request Upstream
|
||||
resp, err := chain.Client.Do(chain.Request)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
chain.Response = resp
|
||||
|
||||
/* todo: move to rsm
|
||||
for k, v := range resp.Header {
|
||||
chain.Context.Set(k, resp.Header.Get(k))
|
||||
}
|
||||
*/
|
||||
|
||||
// Apply ResponseModifiers to proxychain
|
||||
for _, applyResultModificationsTo := range chain.resultModifications {
|
||||
err := applyResultModificationsTo(chain)
|
||||
if err != nil {
|
||||
return nil, chain.abort(err)
|
||||
}
|
||||
}
|
||||
|
||||
// stream request back to client, possibly rewriting the body
|
||||
if len(chain.htmlTokenRewriters) == 0 {
|
||||
return chain.Response.Body, nil
|
||||
}
|
||||
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
switch {
|
||||
case strings.HasPrefix(ct, "text/html"):
|
||||
fmt.Println("fooox")
|
||||
return rr.NewHTMLRewriter(chain.Response.Body, chain.htmlTokenRewriters), nil
|
||||
default:
|
||||
return chain.Response.Body, nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Execute sends the request for the ProxyChain and returns the request to the sender
|
||||
// and resets the fields so that the ProxyChain can be reused.
|
||||
// if any step in the ProxyChain fails, the request will abort and a 500 error will
|
||||
// be returned to the client
|
||||
func (chain *ProxyChain) Execute() error {
|
||||
defer chain._reset()
|
||||
body, err := chain._execute()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return err
|
||||
}
|
||||
if chain.Context == nil {
|
||||
return errors.New("no context set")
|
||||
}
|
||||
|
||||
// Return request back to client
|
||||
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
||||
return chain.Context.SendStream(body)
|
||||
|
||||
//return chain.Context.SendStream(body)
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ func resolveWithGoogleDoH(host string) (string, error) {
|
||||
return "", fmt.Errorf("no DoH DNS record found for %s", host)
|
||||
}
|
||||
|
||||
// ResolveWithGoogleDoH modifies a ProxyChain's client to make the request but resolve the URL
|
||||
// ResolveWithGoogleDoH modifies a ProxyChain's client to make the request by resolving the URL
|
||||
// using Google's DNS over HTTPs service
|
||||
func ResolveWithGoogleDoH() proxychain.RequestModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
|
||||
44
proxychain/requestmodifers/spoof_referrer_from_baidu_post.go
Normal file
44
proxychain/requestmodifers/spoof_referrer_from_baidu_post.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"ladder/proxychain"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromBaiduSearch modifies the referrer header
|
||||
// pretending to be from a BaiduSearch
|
||||
func SpoofReferrerFromBaiduSearch() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// https://www.baidu.com/link?url=5biIeDvUIihawf3Zbbysach2Xn4H3w3FzO6LZKgSs-B5Yt4M4RUFikokOk5zetf2&wd=&eqid=9da80d8208009b8480000706655d5ed6
|
||||
referrer := fmt.Sprintf("https://baidu.com/link?url=%s", generateRandomBaiduURL())
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer(referrer),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// utility functions ==================
|
||||
|
||||
func generateRandomString(charset string, length int) string {
|
||||
var seededRand *rand.Rand = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
var stringBuilder strings.Builder
|
||||
for i := 0; i < length; i++ {
|
||||
stringBuilder.WriteByte(charset[seededRand.Intn(len(charset))])
|
||||
}
|
||||
return stringBuilder.String()
|
||||
}
|
||||
|
||||
func generateRandomBaiduURL() string {
|
||||
const alphanumericCharset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
const hexCharset = "0123456789abcdef"
|
||||
randomAlphanumeric := generateRandomString(alphanumericCharset, 30) // Length before "-"
|
||||
randomHex := generateRandomString(hexCharset, 16) // Length of eqid
|
||||
return randomAlphanumeric + "-" + "&wd=&eqid=" + randomHex
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromBingSearch modifies the referrer header
|
||||
// pretending to be from a bing search site
|
||||
func SpoofReferrerFromBingSearch() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.bing.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
ModifyQueryParams("utm_source", "bing"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromGoogleSearch modifies the referrer header
|
||||
// pretending to be from a google search site
|
||||
func SpoofReferrerFromGoogleSearch() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.google.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
ModifyQueryParams("utm_source", "google"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromLinkedInPost modifies the referrer header
|
||||
// pretending to be from a linkedin post
|
||||
func SpoofReferrerFromLinkedInPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.linkedin.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
ModifyQueryParams("utm_campaign", "post"),
|
||||
ModifyQueryParams("utm_medium", "web"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
24
proxychain/requestmodifers/spoof_referrer_from_naver_post.go
Normal file
24
proxychain/requestmodifers/spoof_referrer_from_naver_post.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromNaverSearch modifies the referrer header
|
||||
// pretending to be from a Naver search (popular in South Korea)
|
||||
func SpoofReferrerFromNaverSearch() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
referrer := fmt.Sprintf(
|
||||
"https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=%s",
|
||||
chain.Request.URL.Host,
|
||||
)
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer(referrer),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromPinterestPost modifies the referrer header
|
||||
// pretending to be from a pinterest post
|
||||
func SpoofReferrerFromPinterestPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.pinterest.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
19
proxychain/requestmodifers/spoof_referrer_from_qq_post.go
Normal file
19
proxychain/requestmodifers/spoof_referrer_from_qq_post.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromQQPost modifies the referrer header
|
||||
// pretending to be from a QQ post (popular social media in China)
|
||||
func SpoofReferrerFromQQPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://new.qq.com/'"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromRedditPost modifies the referrer header
|
||||
// pretending to be from a reddit post
|
||||
func SpoofReferrerFromRedditPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.reddit.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromTumblrPost modifies the referrer header
|
||||
// pretending to be from a tumblr post
|
||||
func SpoofReferrerFromTumblrPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://www.tumblr.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromTwitterPost modifies the referrer header
|
||||
// pretending to be from a twitter post
|
||||
func SpoofReferrerFromTwitterPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://t.co/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromVkontaktePost modifies the referrer header
|
||||
// pretending to be from a vkontakte post (popular in Russia)
|
||||
func SpoofReferrerFromVkontaktePost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer("https://away.vk.com/"),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
22
proxychain/requestmodifers/spoof_referrer_from_weibo_post.go
Normal file
22
proxychain/requestmodifers/spoof_referrer_from_weibo_post.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package requestmodifers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"ladder/proxychain"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
// SpoofReferrerFromWeiboPost modifies the referrer header
|
||||
// pretending to be from a Weibo post (popular in China)
|
||||
func SpoofReferrerFromWeiboPost() proxychain.RequestModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
referrer := fmt.Sprintf("http://weibo.com/u/%d", rand.Intn(90001))
|
||||
chain.AddRequestModifications(
|
||||
SpoofReferrer(referrer),
|
||||
SetRequestHeader("sec-fetch-site", "cross-site"),
|
||||
SetRequestHeader("sec-fetch-dest", "document"),
|
||||
SetRequestHeader("sec-fetch-mode", "navigate"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -11,4 +11,4 @@ func SpoofXForwardedFor(ip string) proxychain.RequestModification {
|
||||
px.Request.Header.Set("X-FORWARDED-FOR", ip)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,9 @@ import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// TODO: handle edge case where CSP is specified in meta tag:
|
||||
// <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
|
||||
|
||||
// BypassContentSecurityPolicy modifies response headers to prevent the browser
|
||||
// from enforcing any CSP restrictions. This should run at the end of the chain.
|
||||
func BypassContentSecurityPolicy() proxychain.ResponseModification {
|
||||
|
||||
27
proxychain/responsemodifers/inject_script.go
Normal file
27
proxychain/responsemodifers/inject_script.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package responsemodifers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifers/rewriters"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// InjectScript modifies HTTP responses
|
||||
// to execute javascript at a particular time.
|
||||
func InjectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
|
||||
// we just queue it up here
|
||||
chain.AddHTMLTokenRewriter(rr)
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
|
||||
// Also overrides the attribute setter prototype to modify the request URLs
|
||||
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
||||
(() => {
|
||||
function rewriteURL(url) {
|
||||
if (!url) return url
|
||||
if (url.startsWith(window.location.origin)) return url
|
||||
|
||||
if (url.startsWith("//")) {
|
||||
url = `${window.location.origin}/${encodeURIComponent(url.substring(2))}`;
|
||||
} else if (url.startsWith("/")) {
|
||||
url = `${window.location.origin}/${encodeURIComponent(url.substring(1))}`;
|
||||
} else if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||
url = `${window.location.origin}/${encodeURIComponent(url)}`;
|
||||
}
|
||||
return url;
|
||||
};
|
||||
|
||||
// monkey patch fetch
|
||||
const oldFetch = globalThis.fetch ;
|
||||
globalThis.fetch = async (url, init) => {
|
||||
return oldFetch(rewriteURL(url), init)
|
||||
}
|
||||
|
||||
// monkey patch xmlhttprequest
|
||||
const oldOpen = XMLHttpRequest.prototype.open;
|
||||
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
||||
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||
};
|
||||
|
||||
|
||||
// Monkey patch setter methods
|
||||
const elements = [
|
||||
{ tag: 'a', attribute: 'href' },
|
||||
{ tag: 'img', attribute: 'src' },
|
||||
{ tag: 'script', attribute: 'src' },
|
||||
{ tag: 'link', attribute: 'href' },
|
||||
{ tag: 'iframe', attribute: 'src' },
|
||||
{ tag: 'audio', attribute: 'src' },
|
||||
{ tag: 'video', attribute: 'src' },
|
||||
{ tag: 'source', attribute: 'src' },
|
||||
{ tag: 'embed', attribute: 'src' },
|
||||
{ tag: 'object', attribute: 'src' },
|
||||
{ tag: 'input', attribute: 'src' },
|
||||
{ tag: 'track', attribute: 'src' },
|
||||
{ tag: 'form', attribute: 'action' },
|
||||
];
|
||||
|
||||
elements.forEach(({ tag, attribute }) => {
|
||||
const proto = document.createElement(tag).constructor.prototype;
|
||||
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
|
||||
if (descriptor && descriptor.set) {
|
||||
Object.defineProperty(proto, attribute, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
return descriptor.set.call(this, rewriteURL(value));
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
})();
|
||||
55
proxychain/responsemodifers/patch_dynamic_resource_urls.go
Normal file
55
proxychain/responsemodifers/patch_dynamic_resource_urls.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package responsemodifers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifers/rewriters"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed patch_dynamic_resource_urls.js
|
||||
var patchDynamicResourceURLsScript string
|
||||
|
||||
// PatchDynamicResourceURLs patches the javascript runtime to rewrite URLs client-side.
|
||||
// - This function is designed to allow the proxified page
|
||||
// to still be browsible by routing all resource URLs through the proxy.
|
||||
// - Native APIs capable of network requests will be hooked
|
||||
// and the URLs arguments modified to point to the proxy instead.
|
||||
// - fetch('/relative_path') -> fetch('/https://proxiedsite.com/relative_path')
|
||||
// - Element.setAttribute('src', "/assets/img.jpg") -> Element.setAttribute('src', "/https://proxiedsite.com/assets/img.jpg") -> fetch('/https://proxiedsite.com/relative_path')
|
||||
func PatchDynamicResourceURLs() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// this is the original URL sent by client:
|
||||
// http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
originalURI := chain.Context.Request().URI()
|
||||
|
||||
// this is the extracted URL that the client requests to proxy
|
||||
// http://proxiedsite.com/foo/bar
|
||||
reqURL := chain.Request.URL
|
||||
|
||||
params := map[string]string{
|
||||
// ie: http://localhost:8080
|
||||
"{{PROXY_ORIGIN}}": fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host()),
|
||||
// ie: http://proxiedsite.com
|
||||
"{{ORIGIN}}": fmt.Sprintf("%s://%s", reqURL.Scheme, reqURL.Host),
|
||||
}
|
||||
|
||||
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||
rr := rewriters.NewScriptInjectorRewriterWithParams(
|
||||
patchDynamicResourceURLsScript,
|
||||
rewriters.BeforeDOMContentLoaded,
|
||||
params,
|
||||
)
|
||||
// we just queue it up here
|
||||
chain.AddHTMLTokenRewriter(rr)
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
325
proxychain/responsemodifers/patch_dynamic_resource_urls.js
Normal file
325
proxychain/responsemodifers/patch_dynamic_resource_urls.js
Normal file
@@ -0,0 +1,325 @@
|
||||
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
|
||||
// Also overrides the attribute setter prototype to modify the request URLs
|
||||
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
||||
(() => {
|
||||
|
||||
// ============== PARAMS ===========================
|
||||
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
// proxyOrigin is http://localhost:8080
|
||||
const proxyOrigin = "{{PROXY_ORIGIN}}";
|
||||
//const proxyOrigin = globalThis.window.location.origin;
|
||||
|
||||
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
// origin is http://proxiedsite.com
|
||||
const origin = "{{ORIGIN}}";
|
||||
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||
// ============== END PARAMS ======================
|
||||
|
||||
const blacklistedSchemes = [
|
||||
"ftp:",
|
||||
"mailto:",
|
||||
"tel:",
|
||||
"file:",
|
||||
"blob:",
|
||||
"javascript:",
|
||||
"about:",
|
||||
"magnet:",
|
||||
"ws:",
|
||||
"wss:",
|
||||
];
|
||||
|
||||
function rewriteURL(url) {
|
||||
const oldUrl = url
|
||||
if (!url) return url
|
||||
let isStr = (typeof url.startsWith === 'function')
|
||||
if (!isStr) return url
|
||||
|
||||
// don't rewrite special URIs
|
||||
if (blacklistedSchemes.includes(url)) return url;
|
||||
|
||||
// don't rewrite invalid URIs
|
||||
try { new URL(url, origin) } catch { return url }
|
||||
|
||||
// don't double rewrite
|
||||
if (url.startsWith(proxyOrigin)) return url;
|
||||
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
||||
if (url.startsWith(`/${origin}`)) return url;
|
||||
if (url.startsWith(`/http://`)) return url;
|
||||
if (url.startsWith(`/https://`)) return url;
|
||||
if (url.startsWith(`/http%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/https%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/%2Fhttp`)) return url;
|
||||
|
||||
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
||||
|
||||
if (url.startsWith("//")) {
|
||||
url = `/${origin}/${encodeURIComponent(url.substring(2))}`;
|
||||
} else if (url.startsWith("/")) {
|
||||
url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
|
||||
} else if (url.startsWith(origin)) {
|
||||
url = `/${encodeURIComponent(url)}`
|
||||
} else if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||
url = `/${proxyOrigin}/${encodeURIComponent(url)}`;
|
||||
}
|
||||
console.log(`proxychain: rewrite JS URL: ${oldUrl} -> ${url}`)
|
||||
return url;
|
||||
};
|
||||
|
||||
// sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked
|
||||
function hideMonkeyPatch(objectOrName, method, originalToString) {
|
||||
let obj;
|
||||
let isGlobalFunction = false;
|
||||
|
||||
if (typeof objectOrName === 'string') {
|
||||
obj = globalThis[objectOrName];
|
||||
isGlobalFunction = (typeof obj === 'function') && (method === objectOrName);
|
||||
} else {
|
||||
obj = objectOrName;
|
||||
}
|
||||
|
||||
if (isGlobalFunction) {
|
||||
const originalFunction = obj;
|
||||
globalThis[objectOrName] = function(...args) {
|
||||
return originalFunction.apply(this, args);
|
||||
};
|
||||
globalThis[objectOrName].toString = () => originalToString;
|
||||
} else if (obj && typeof obj[method] === 'function') {
|
||||
const originalMethod = obj[method];
|
||||
obj[method] = function(...args) {
|
||||
return originalMethod.apply(this, args);
|
||||
};
|
||||
obj[method].toString = () => originalToString;
|
||||
} else {
|
||||
console.warn(`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`);
|
||||
}
|
||||
}
|
||||
|
||||
// monkey patch fetch
|
||||
const oldFetch = fetch;
|
||||
fetch = async (url, init) => {
|
||||
return oldFetch(rewriteURL(url), init)
|
||||
}
|
||||
hideMonkeyPatch('fetch', 'fetch', 'function fetch() { [native code] }')
|
||||
|
||||
// monkey patch xmlhttprequest
|
||||
const oldOpen = XMLHttpRequest.prototype.open;
|
||||
XMLHttpRequest.prototype.open = function(method, url, async = true, user = null, password = null) {
|
||||
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||
};
|
||||
hideMonkeyPatch(XMLHttpRequest.prototype, 'open', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||
|
||||
const oldSend = XMLHttpRequest.prototype.send;
|
||||
XMLHttpRequest.prototype.send = function(method, url) {
|
||||
return oldSend.call(this, method, rewriteURL(url));
|
||||
};
|
||||
hideMonkeyPatch(XMLHttpRequest.prototype, 'send', 'function(){if("function"==typeof eo)return eo.apply(this,arguments)}');
|
||||
|
||||
|
||||
// monkey patch service worker registration
|
||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||
ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
|
||||
return oldRegister.call(this, rewriteURL(scriptURL), options)
|
||||
}
|
||||
hideMonkeyPatch(ServiceWorkerContainer.prototype, 'register', 'function register() { [native code] }')
|
||||
|
||||
// monkey patch URL.toString() method
|
||||
const oldToString = URL.prototype.toString
|
||||
URL.prototype.toString = function() {
|
||||
let originalURL = oldToString.call(this)
|
||||
return rewriteURL(originalURL)
|
||||
}
|
||||
hideMonkeyPatch(URL.prototype, 'toString', 'function toString() { [native code] }')
|
||||
|
||||
// monkey patch URL.toJSON() method
|
||||
const oldToJson = URL.prototype.toString
|
||||
URL.prototype.toString = function() {
|
||||
let originalURL = oldToJson.call(this)
|
||||
return rewriteURL(originalURL)
|
||||
}
|
||||
hideMonkeyPatch(URL.prototype, 'toString', 'function toJSON() { [native code] }')
|
||||
|
||||
// Monkey patch URL.href getter and setter
|
||||
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(URL.prototype, 'href');
|
||||
Object.defineProperty(URL.prototype, 'href', {
|
||||
get: function() {
|
||||
let originalHref = originalHrefDescriptor.get.call(this);
|
||||
return rewriteURL(originalHref)
|
||||
},
|
||||
set: function(newValue) {
|
||||
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
|
||||
}
|
||||
});
|
||||
|
||||
// TODO: do one more pass of this by manually traversing the DOM
|
||||
// AFTER all the JS and page has loaded just in case
|
||||
|
||||
// Monkey patch setter
|
||||
const elements = [
|
||||
{ tag: 'a', attribute: 'href' },
|
||||
{ tag: 'img', attribute: 'src' },
|
||||
// { tag: 'img', attribute: 'srcset' }, // TODO: handle srcset
|
||||
{ tag: 'script', attribute: 'src' },
|
||||
{ tag: 'link', attribute: 'href' },
|
||||
{ tag: 'link', attribute: 'icon' },
|
||||
{ tag: 'iframe', attribute: 'src' },
|
||||
{ tag: 'audio', attribute: 'src' },
|
||||
{ tag: 'video', attribute: 'src' },
|
||||
{ tag: 'source', attribute: 'src' },
|
||||
// { tag: 'source', attribute: 'srcset' }, // TODO: handle srcset
|
||||
{ tag: 'embed', attribute: 'src' },
|
||||
{ tag: 'embed', attribute: 'pluginspage' },
|
||||
{ tag: 'html', attribute: 'manifest' },
|
||||
{ tag: 'object', attribute: 'src' },
|
||||
{ tag: 'input', attribute: 'src' },
|
||||
{ tag: 'track', attribute: 'src' },
|
||||
{ tag: 'form', attribute: 'action' },
|
||||
{ tag: 'area', attribute: 'href' },
|
||||
{ tag: 'base', attribute: 'href' },
|
||||
{ tag: 'blockquote', attribute: 'cite' },
|
||||
{ tag: 'del', attribute: 'cite' },
|
||||
{ tag: 'ins', attribute: 'cite' },
|
||||
{ tag: 'q', attribute: 'cite' },
|
||||
{ tag: 'button', attribute: 'formaction' },
|
||||
{ tag: 'input', attribute: 'formaction' },
|
||||
{ tag: 'meta', attribute: 'content' },
|
||||
{ tag: 'object', attribute: 'data' },
|
||||
];
|
||||
|
||||
elements.forEach(({ tag, attribute }) => {
|
||||
const proto = document.createElement(tag).constructor.prototype;
|
||||
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
|
||||
if (descriptor && descriptor.set) {
|
||||
Object.defineProperty(proto, attribute, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
// calling rewriteURL will end up calling a setter for href,
|
||||
// leading to a recusive loop and a Maximum call stack size exceeded
|
||||
// error, so we guard against this with a local semaphore flag
|
||||
const isRewritingSetKey = Symbol.for('isRewritingSet');
|
||||
if (!this[isRewritingSetKey]) {
|
||||
this[isRewritingSetKey] = true;
|
||||
descriptor.set.call(this, rewriteURL(value));
|
||||
//descriptor.set.call(this, value);
|
||||
this[isRewritingSetKey] = false;
|
||||
} else {
|
||||
// Directly set the value without rewriting
|
||||
descriptor.set.call(this, value);
|
||||
}
|
||||
},
|
||||
get() {
|
||||
const isRewritingGetKey = Symbol.for('isRewritingGet');
|
||||
if (!this[isRewritingGetKey]) {
|
||||
this[isRewritingGetKey] = true;
|
||||
let oldURL = descriptor.get.call(this);
|
||||
let newURL = rewriteURL(oldURL);
|
||||
this[isRewritingGetKey] = false;
|
||||
return newURL
|
||||
} else {
|
||||
return descriptor.get.call(this);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters.
|
||||
// in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could
|
||||
// contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links.
|
||||
function rewriteInnerHTML(html, elements) {
|
||||
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||
|
||||
// Check if already processing
|
||||
if (document[isRewritingHTMLKey]) {
|
||||
return html;
|
||||
}
|
||||
|
||||
const tempContainer = document.createElement('div');
|
||||
document[isRewritingHTMLKey] = true;
|
||||
|
||||
try {
|
||||
tempContainer.innerHTML = html;
|
||||
|
||||
// Create a map for quick lookup
|
||||
const elementsMap = new Map(elements.map(e => [e.tag, e.attribute]));
|
||||
|
||||
// Loop-based DOM traversal
|
||||
const nodes = [...tempContainer.querySelectorAll('*')];
|
||||
for (const node of nodes) {
|
||||
const attribute = elementsMap.get(node.tagName.toLowerCase());
|
||||
if (attribute && node.hasAttribute(attribute)) {
|
||||
const originalUrl = node.getAttribute(attribute);
|
||||
const rewrittenUrl = rewriteURL(originalUrl);
|
||||
node.setAttribute(attribute, rewrittenUrl);
|
||||
}
|
||||
}
|
||||
|
||||
return tempContainer.innerHTML;
|
||||
} finally {
|
||||
// Clear the flag
|
||||
document[isRewritingHTMLKey] = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Store original setters
|
||||
const originalSetters = {};
|
||||
|
||||
['innerHTML', 'outerHTML'].forEach(property => {
|
||||
const descriptor = Object.getOwnPropertyDescriptor(Element.prototype, property);
|
||||
if (descriptor && descriptor.set) {
|
||||
originalSetters[property] = descriptor.set;
|
||||
|
||||
Object.defineProperty(Element.prototype, property, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
const isRewritingHTMLKey = Symbol.for('isRewritingHTML');
|
||||
if (!this[isRewritingHTMLKey]) {
|
||||
this[isRewritingHTMLKey] = true;
|
||||
try {
|
||||
// Use custom logic
|
||||
descriptor.set.call(this, rewriteInnerHTML(value, elements));
|
||||
} finally {
|
||||
this[isRewritingHTMLKey] = false;
|
||||
}
|
||||
} else {
|
||||
// Use original setter in recursive call
|
||||
originalSetters[property].call(this, value);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
})();
|
||||
|
||||
|
||||
|
||||
(() => {
|
||||
document.addEventListener('DOMContentLoaded', (event) => {
|
||||
initIdleMutationObserver();
|
||||
});
|
||||
|
||||
function initIdleMutationObserver() {
|
||||
let debounceTimer;
|
||||
const debounceDelay = 500; // adjust the delay as needed
|
||||
|
||||
const observer = new MutationObserver((mutations) => {
|
||||
// Clear the previous timer and set a new one
|
||||
clearTimeout(debounceTimer);
|
||||
debounceTimer = setTimeout(() => {
|
||||
execute();
|
||||
observer.disconnect(); // Disconnect after first execution
|
||||
}, debounceDelay);
|
||||
});
|
||||
|
||||
const config = { attributes: false, childList: true, subtree: true };
|
||||
observer.observe(document.body, config);
|
||||
}
|
||||
|
||||
function execute() {
|
||||
console.log('DOM is now idle. Executing...');
|
||||
}
|
||||
|
||||
})();
|
||||
@@ -1,262 +1,35 @@
|
||||
package responsemodifers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"io"
|
||||
"ladder/proxychain"
|
||||
"net/url"
|
||||
"ladder/proxychain/responsemodifers/rewriters"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Define list of HTML attributes to try to rewrite
|
||||
var AttributesToRewrite map[string]bool
|
||||
|
||||
func init() {
|
||||
AttributesToRewrite = map[string]bool{
|
||||
"src": true,
|
||||
"href": true,
|
||||
"action": true,
|
||||
"srcset": true, // TODO: fix
|
||||
"poster": true,
|
||||
"data": true,
|
||||
"cite": true,
|
||||
"formaction": true,
|
||||
"background": true,
|
||||
"usemap": true,
|
||||
"longdesc": true,
|
||||
"manifest": true,
|
||||
"archive": true,
|
||||
"codebase": true,
|
||||
"icon": true,
|
||||
"pluginspage": true,
|
||||
}
|
||||
}
|
||||
|
||||
// HTMLResourceURLRewriter is a struct that rewrites URLs within HTML resources to use a specified proxy URL.
|
||||
// It uses an HTML tokenizer to process HTML content and rewrites URLs in src/href attributes.
|
||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||
type HTMLResourceURLRewriter struct {
|
||||
baseURL *url.URL
|
||||
tokenizer *html.Tokenizer
|
||||
currentToken html.Token
|
||||
tokenBuffer *bytes.Buffer
|
||||
currentTokenIndex int
|
||||
currentTokenProcessed bool
|
||||
}
|
||||
|
||||
// NewHTMLResourceURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||
func NewHTMLResourceURLRewriter(src io.ReadCloser, baseURL *url.URL) *HTMLResourceURLRewriter {
|
||||
return &HTMLResourceURLRewriter{
|
||||
tokenizer: html.NewTokenizer(src),
|
||||
currentToken: html.Token{},
|
||||
currentTokenIndex: 0,
|
||||
tokenBuffer: new(bytes.Buffer),
|
||||
baseURL: baseURL,
|
||||
}
|
||||
}
|
||||
|
||||
// Close resets the internal state of HTMLResourceURLRewriter, clearing buffers and token data.
|
||||
func (r *HTMLResourceURLRewriter) Close() error {
|
||||
r.tokenBuffer.Reset()
|
||||
r.currentToken = html.Token{}
|
||||
r.currentTokenIndex = 0
|
||||
r.currentTokenProcessed = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||
// It reads HTML content, token by token, rewriting URLs to route through the specified proxy.
|
||||
func (r *HTMLResourceURLRewriter) Read(p []byte) (int, error) {
|
||||
|
||||
if r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||
tokenType := r.tokenizer.Next()
|
||||
|
||||
// done reading html, close out reader
|
||||
if tokenType == html.ErrorToken {
|
||||
if r.tokenizer.Err() == io.EOF {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return 0, r.tokenizer.Err()
|
||||
}
|
||||
|
||||
// flush the current token into an internal buffer
|
||||
// to handle fragmented tokens
|
||||
r.currentToken = r.tokenizer.Token()
|
||||
|
||||
// patch tokens with URLs
|
||||
isTokenWithAttribute := r.currentToken.Type == html.StartTagToken || r.currentToken.Type == html.SelfClosingTagToken
|
||||
if isTokenWithAttribute {
|
||||
patchResourceURL(&r.currentToken, r.baseURL)
|
||||
}
|
||||
|
||||
r.tokenBuffer.Reset()
|
||||
r.tokenBuffer.WriteString(html.UnescapeString(r.currentToken.String()))
|
||||
r.currentTokenProcessed = false
|
||||
r.currentTokenIndex = 0
|
||||
}
|
||||
|
||||
n, err := r.tokenBuffer.Read(p)
|
||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||
r.currentTokenProcessed = true
|
||||
err = nil // EOF in this context is expected and not an actual error
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// doublecheck this is a valid relative URL
|
||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
//log.Printf("BASEURL patch: %s\n", baseURL)
|
||||
|
||||
attr.Val = fmt.Sprintf(
|
||||
"/%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
baseURL.Host,
|
||||
strings.TrimPrefix(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = url.QueryEscape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
//log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s%s",
|
||||
baseURL.Scheme,
|
||||
strings.Trim(baseURL.Host, "/"),
|
||||
strings.Trim(baseURL.RawPath, "/"),
|
||||
strings.Trim(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = url.QueryEscape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
//log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
//log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// check if valid URL
|
||||
u, err := url.Parse(attr.Val)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||
return
|
||||
}
|
||||
attr.Val = fmt.Sprintf(
|
||||
"/%s",
|
||||
url.QueryEscape(
|
||||
strings.TrimPrefix(attr.Val, "/"),
|
||||
),
|
||||
)
|
||||
//log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||
for i, src := range strings.Split(attr.Val, ",") {
|
||||
src = strings.Trim(src, " ")
|
||||
for j, s := range strings.Split(src, " ") {
|
||||
s = strings.Trim(s, " ")
|
||||
if j == 0 {
|
||||
f := &html.Attribute{Val: s, Key: attr.Key}
|
||||
switch {
|
||||
case strings.HasPrefix(s, "//"):
|
||||
handleProtocolRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(s, "/"):
|
||||
handleRootRelativePath(f, baseURL)
|
||||
case strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://"):
|
||||
handleAbsolutePath(f, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(f, baseURL)
|
||||
}
|
||||
s = f.Val
|
||||
}
|
||||
if i == 0 && j == 0 {
|
||||
attr.Val = s
|
||||
continue
|
||||
}
|
||||
attr.Val = fmt.Sprintf("%s %s", attr.Val, s)
|
||||
}
|
||||
attr.Val = fmt.Sprintf("%s,", attr.Val)
|
||||
}
|
||||
attr.Val = strings.TrimSuffix(attr.Val, ",")
|
||||
}
|
||||
|
||||
// TODO: figure out how to handle these
|
||||
// srcset
|
||||
func patchResourceURL(token *html.Token, baseURL *url.URL) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
|
||||
switch {
|
||||
// dont touch attributes except for the ones we defined
|
||||
case !AttributesToRewrite[attr.Key]:
|
||||
continue
|
||||
case attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "//"):
|
||||
handleProtocolRelativePath(attr, baseURL)
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "https://") || strings.HasPrefix(attr.Val, "http://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
continue
|
||||
default:
|
||||
handleDocumentRelativePath(attr, baseURL)
|
||||
continue
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// RewriteHTMLResourceURLs modifies HTTP responses
|
||||
// to rewrite URLs attributes in HTML content (such as src, href)
|
||||
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
||||
// - This function is designed to allow the proxified page
|
||||
// to still be browsible by routing all resource URLs through the proxy.
|
||||
//
|
||||
// ---
|
||||
//
|
||||
// - It works by replacing the io.ReadCloser of the http.Response.Body
|
||||
// with another io.ReaderCloser (HTMLResourceRewriter) that wraps the first one.
|
||||
//
|
||||
// - This process can be done multiple times, so that the response will
|
||||
// be streamed and modified through each pass without buffering the entire response in memory.
|
||||
//
|
||||
// - HTMLResourceRewriter reads the http.Response.Body stream,
|
||||
// parsing each HTML token one at a time and replacing attribute tags.
|
||||
//
|
||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||
// without ever buffering the entire HTTP response in memory.
|
||||
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// return early if it's not HTML
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
chain.Response.Body = NewHTMLResourceURLRewriter(chain.Response.Body, chain.Request.URL)
|
||||
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
|
||||
originalURI := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||
|
||||
// the rewriting actually happens in chain.Execute() as the client is streaming the response body back
|
||||
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
|
||||
// we just queue it up here
|
||||
chain.AddHTMLTokenRewriter(rr)
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
(() => {
|
||||
document.addEventListener('DOMContentLoaded', (event) => {
|
||||
initIdleMutationObserver();
|
||||
});
|
||||
|
||||
function initIdleMutationObserver() {
|
||||
let debounceTimer;
|
||||
const debounceDelay = 500; // adjust the delay as needed
|
||||
|
||||
const observer = new MutationObserver((mutations) => {
|
||||
// Clear the previous timer and set a new one
|
||||
clearTimeout(debounceTimer);
|
||||
debounceTimer = setTimeout(() => {
|
||||
execute();
|
||||
observer.disconnect(); // Disconnect after first execution
|
||||
}, debounceDelay);
|
||||
});
|
||||
|
||||
const config = { attributes: false, childList: true, subtree: true };
|
||||
observer.observe(document.body, config);
|
||||
}
|
||||
|
||||
function execute() {
|
||||
'SCRIPT_CONTENT_PARAM'
|
||||
//console.log('DOM is now idle. Executing...');
|
||||
}
|
||||
})();
|
||||
3
proxychain/responsemodifers/rewriters/css_rewriter.go
Normal file
3
proxychain/responsemodifers/rewriters/css_rewriter.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package rewriters
|
||||
|
||||
// todo: implement
|
||||
131
proxychain/responsemodifers/rewriters/html_rewriter.go
Normal file
131
proxychain/responsemodifers/rewriters/html_rewriter.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
|
||||
type IHTMLTokenRewriter interface {
|
||||
// ShouldModify determines whether a given HTML token requires modification.
|
||||
ShouldModify(*html.Token) bool
|
||||
|
||||
// ModifyToken applies modifications to a given HTML token.
|
||||
// It returns strings representing content to be prepended and
|
||||
// appended to the token. If no modifications are required or if an error occurs,
|
||||
// it returns empty strings for both 'prepend' and 'append'.
|
||||
// Note: The original token is not modified if an error occurs.
|
||||
ModifyToken(*html.Token) (prepend, append string)
|
||||
}
|
||||
|
||||
// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
|
||||
// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
|
||||
//
|
||||
// - HTMLRewriter reads the http.Response.Body stream,
|
||||
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
|
||||
// in a single pass of the tokenizer.
|
||||
//
|
||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||
// without ever buffering the entire HTTP response in memory.
|
||||
type HTMLRewriter struct {
|
||||
tokenizer *html.Tokenizer
|
||||
currentToken *html.Token
|
||||
tokenBuffer *bytes.Buffer
|
||||
currentTokenProcessed bool
|
||||
rewriters []IHTMLTokenRewriter
|
||||
}
|
||||
|
||||
// NewHTMLRewriter creates a new HTMLRewriter instance.
|
||||
// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
|
||||
// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
|
||||
// applies its specific modifications to the HTML tokens.
|
||||
// The HTMLRewriter reads from the provided 'src', applies the modifications,
|
||||
// and returns the processed content as a new io.ReadCloser.
|
||||
// This new io.ReadCloser can be used to stream the modified content back to the client.
|
||||
//
|
||||
// Parameters:
|
||||
// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
|
||||
// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
|
||||
func NewHTMLRewriter(src io.ReadCloser, rewriters []IHTMLTokenRewriter) *HTMLRewriter {
|
||||
return &HTMLRewriter{
|
||||
tokenizer: html.NewTokenizer(src),
|
||||
currentToken: nil,
|
||||
tokenBuffer: new(bytes.Buffer),
|
||||
currentTokenProcessed: false,
|
||||
rewriters: rewriters,
|
||||
}
|
||||
}
|
||||
|
||||
// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
|
||||
func (r *HTMLRewriter) Close() error {
|
||||
r.tokenBuffer.Reset()
|
||||
r.currentToken = nil
|
||||
r.currentTokenProcessed = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||
func (r *HTMLRewriter) Read(p []byte) (int, error) {
|
||||
|
||||
if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||
tokenType := r.tokenizer.Next()
|
||||
|
||||
// done reading html, close out reader
|
||||
if tokenType == html.ErrorToken {
|
||||
if r.tokenizer.Err() == io.EOF {
|
||||
return 0, io.EOF
|
||||
}
|
||||
return 0, r.tokenizer.Err()
|
||||
}
|
||||
|
||||
// get the next token; reset buffer
|
||||
t := r.tokenizer.Token()
|
||||
r.currentToken = &t
|
||||
r.tokenBuffer.Reset()
|
||||
|
||||
// buffer += "<prepends> <token> <appends>"
|
||||
// process token through all registered rewriters
|
||||
// rewriters will modify the token, and optionally
|
||||
// return a <prepend> or <append> string token
|
||||
appends := make([]string, 0, len(r.rewriters))
|
||||
for _, rewriter := range r.rewriters {
|
||||
if !rewriter.ShouldModify(r.currentToken) {
|
||||
continue
|
||||
}
|
||||
prepend, a := rewriter.ModifyToken(r.currentToken)
|
||||
appends = append(appends, a)
|
||||
// add <prepends> to buffer
|
||||
r.tokenBuffer.WriteString(prepend)
|
||||
}
|
||||
|
||||
// add <token> to buffer
|
||||
if tokenType == html.TextToken {
|
||||
// don't unescape textTokens (such as inline scripts).
|
||||
// Token.String() by default will escape the inputs, but
|
||||
// we don't want to modify the original source
|
||||
r.tokenBuffer.WriteString(r.currentToken.Data)
|
||||
} else {
|
||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||
}
|
||||
|
||||
// add <appends> to buffer
|
||||
for _, a := range appends {
|
||||
r.tokenBuffer.WriteString(a)
|
||||
}
|
||||
|
||||
r.currentTokenProcessed = false
|
||||
}
|
||||
|
||||
n, err := r.tokenBuffer.Read(p)
|
||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||
r.currentTokenProcessed = true
|
||||
err = nil // EOF in this context is expected and not an actual error
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
263
proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
Normal file
263
proxychain/responsemodifers/rewriters/html_token_url_rewriter.go
Normal file
@@ -0,0 +1,263 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var rewriteAttrs map[string]map[string]bool
|
||||
var specialRewriteAttrs map[string]map[string]bool
|
||||
var schemeBlacklist map[string]bool
|
||||
|
||||
func init() {
|
||||
// define all tag/attributes which might contain URLs
|
||||
// to attempt to rewrite to point to proxy instead
|
||||
rewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
|
||||
"a": {"href": true},
|
||||
"form": {"action": true},
|
||||
"link": {"href": true, "manifest": true, "icon": true},
|
||||
"script": {"src": true},
|
||||
"video": {"src": true, "poster": true},
|
||||
"audio": {"src": true},
|
||||
"iframe": {"src": true, "longdesc": true},
|
||||
"embed": {"src": true},
|
||||
"object": {"data": true, "codebase": true},
|
||||
"source": {"src": true, "srcset": true},
|
||||
"track": {"src": true},
|
||||
"area": {"href": true},
|
||||
"base": {"href": true},
|
||||
"blockquote": {"cite": true},
|
||||
"del": {"cite": true},
|
||||
"ins": {"cite": true},
|
||||
"q": {"cite": true},
|
||||
"body": {"background": true},
|
||||
"button": {"formaction": true},
|
||||
"input": {"src": true, "formaction": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// might contain URL but requires special handling
|
||||
specialRewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"srcset": true},
|
||||
"source": {"srcset": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// define URIs to NOT rewrite
|
||||
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
||||
schemeBlacklist = map[string]bool{
|
||||
"data": true,
|
||||
"tel": true,
|
||||
"mailto": true,
|
||||
"file": true,
|
||||
"blob": true,
|
||||
"javascript": true,
|
||||
"about": true,
|
||||
"magnet": true,
|
||||
"ws": true,
|
||||
"wss": true,
|
||||
"ftp": true,
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// HTMLTokenURLRewriter implements HTMLTokenRewriter
|
||||
// it rewrites URLs within HTML resources to use a specified proxy URL.
|
||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||
type HTMLTokenURLRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||
func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
|
||||
return &HTMLTokenURLRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
|
||||
attrLen := len(token.Attr)
|
||||
if attrLen == 0 {
|
||||
return false
|
||||
}
|
||||
if !(token.Type == html.StartTagToken || token.Type == html.SelfClosingTagToken) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
switch {
|
||||
// don't touch tag/attributes that don't contain URIs
|
||||
case !rewriteAttrs[token.Data][attr.Key]:
|
||||
continue
|
||||
// don't touch attributes with special URIs (like data:)
|
||||
case schemeBlacklist[strings.Split(attr.Key, ":")[0]]:
|
||||
continue
|
||||
// don't double-overwrite the url
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/http://"):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/https://"):
|
||||
continue
|
||||
// handle special rewrites
|
||||
case specialRewriteAttrs[token.Data][attr.Key]:
|
||||
r.handleSpecialAttr(token, attr, r.baseURL)
|
||||
continue
|
||||
default:
|
||||
// rewrite url
|
||||
handleURLPart(attr, r.baseURL)
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// dispatcher for ModifyURL based on URI type
|
||||
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
case strings.HasPrefix(attr.Key, "//"):
|
||||
handleProtocolRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Key, "/"):
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Key, "https://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Key, "http://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(attr, baseURL)
|
||||
}
|
||||
}
|
||||
|
||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// doublecheck this is a valid relative URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
//log.Printf("BASEURL patch: %s\n", baseURL)
|
||||
|
||||
attr.Val = fmt.Sprintf(
|
||||
"/%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
baseURL.Host,
|
||||
strings.TrimPrefix(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s%s",
|
||||
baseURL.Scheme,
|
||||
strings.Trim(baseURL.Host, "/"),
|
||||
strings.Trim(baseURL.RawPath, "/"),
|
||||
strings.Trim(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// full URIs beginning with https?://proxiedsite.com
|
||||
func handleAbsolutePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// check if valid URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
u, err := url.Parse(attr.Val)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||
return
|
||||
}
|
||||
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
|
||||
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// handle edge cases for special attributes
|
||||
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
|
||||
case token.Data == "img" && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
case token.Data == "source" && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
|
||||
case token.Data == "meta" && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
|
||||
handleMetaRefresh(attr, baseURL)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
|
||||
sec := strings.Split(attr.Val, ";url=")[0]
|
||||
url := strings.Split(attr.Val, ";url=")[1]
|
||||
f := &html.Attribute{Val: url, Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
attr.Val = fmt.Sprintf("%s;url=%s", sec, url)
|
||||
}
|
||||
|
||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||
var srcSetBuilder strings.Builder
|
||||
srcSetItems := strings.Split(attr.Val, ",")
|
||||
|
||||
for i, srcItem := range srcSetItems {
|
||||
srcParts := strings.Fields(srcItem) // Fields splits around whitespace, trimming them
|
||||
|
||||
if len(srcParts) == 0 {
|
||||
continue // skip empty items
|
||||
}
|
||||
|
||||
// rewrite each URL part by passing in fake attribute
|
||||
f := &html.Attribute{Val: srcParts[0], Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
urlPart := f.Key
|
||||
|
||||
// First srcset item without a descriptor
|
||||
if i == 0 && (len(srcParts) == 1 || !strings.HasSuffix(srcParts[1], "x")) {
|
||||
srcSetBuilder.WriteString(urlPart)
|
||||
} else {
|
||||
srcSetBuilder.WriteString(fmt.Sprintf("%s %s", urlPart, srcParts[1]))
|
||||
}
|
||||
|
||||
if i < len(srcSetItems)-1 {
|
||||
srcSetBuilder.WriteString(",") // Add comma for all but last item
|
||||
}
|
||||
}
|
||||
|
||||
attr.Val = srcSetBuilder.String()
|
||||
log.Printf("srcset url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
func escape(str string) string {
|
||||
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// ScriptInjectorRewriter implements HTMLTokenRewriter
|
||||
// ScriptInjectorRewriter is a struct that injects JS into the page
|
||||
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
|
||||
type ScriptInjectorRewriter struct {
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
}
|
||||
|
||||
type ScriptExecTime int
|
||||
|
||||
const (
|
||||
BeforeDOMContentLoaded ScriptExecTime = iota
|
||||
AfterDOMContentLoaded
|
||||
AfterDOMIdle
|
||||
)
|
||||
|
||||
func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
|
||||
// modify if token == <head>
|
||||
return token.DataAtom == atom.Head && token.Type == html.StartTagToken
|
||||
}
|
||||
|
||||
//go:embed after_dom_idle_script_injector.js
|
||||
var afterDomIdleScriptInjector string
|
||||
|
||||
func (r *ScriptInjectorRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
switch {
|
||||
case r.execTime == BeforeDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||
|
||||
case r.execTime == AfterDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||
|
||||
case r.execTime == AfterDOMIdle:
|
||||
s := strings.Replace(afterDomIdleScriptInjector, `'SCRIPT_CONTENT_PARAM'`, r.script, 1)
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// applies parameters by string replacement of the template script
|
||||
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// Sort the keys by length in descending order
|
||||
keys := make([]string, 0, len(params))
|
||||
for key := range params {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
return len(keys[i]) > len(keys[j])
|
||||
})
|
||||
|
||||
for _, key := range keys {
|
||||
r.script = strings.ReplaceAll(r.script, key, params[key])
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||
return &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriterWith implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
// accepting arguments into the script, which will be added via a string replace
|
||||
// the params map represents the key-value pair of the params.
|
||||
// the key will be string replaced with the value
|
||||
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
|
||||
rr := &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
rr.applyParams(params)
|
||||
return rr
|
||||
}
|
||||
91
tests/package-lock.json
generated
Normal file
91
tests/package-lock.json
generated
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"name": "tests",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "tests",
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@playwright/test": "^1.40.0",
|
||||
"@types/node": "^20.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@playwright/test": {
|
||||
"version": "1.40.0",
|
||||
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.40.0.tgz",
|
||||
"integrity": "sha512-PdW+kn4eV99iP5gxWNSDQCbhMaDVej+RXL5xr6t04nbKLCBwYtA046t7ofoczHOm8u6c+45hpDKQVZqtqwkeQg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"playwright": "1.40.0"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.10.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.0.tgz",
|
||||
"integrity": "sha512-D0WfRmU9TQ8I9PFx9Yc+EBHw+vSpIub4IDvQivcp26PtPrdMGAq5SDcpXEo/epqa/DXotVpekHiLNTg3iaKXBQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
|
||||
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright": {
|
||||
"version": "1.40.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.40.0.tgz",
|
||||
"integrity": "sha512-gyHAgQjiDf1m34Xpwzaqb76KgfzYrhK7iih+2IzcOCoZWr/8ZqmdBw+t0RU85ZmfJMgtgAiNtBQ/KS2325INXw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"playwright-core": "1.40.0"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "2.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright-core": {
|
||||
"version": "1.40.0",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.40.0.tgz",
|
||||
"integrity": "sha512-fvKewVJpGeca8t0ipM56jkVSU6Eo0RmFvQ/MaCQNDYm+sdvKkMBBWTE1FdeMqIdumRaXXjZChWHvIzCGM/tA/Q==",
|
||||
"dev": true,
|
||||
"bin": {
|
||||
"playwright-core": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
||||
14
tests/package.json
Normal file
14
tests/package.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "tests",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@playwright/test": "^1.40.0",
|
||||
"@types/node": "^20.10.0"
|
||||
}
|
||||
}
|
||||
77
tests/playwright.config.ts
Normal file
77
tests/playwright.config.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { defineConfig, devices } from "@playwright/test";
|
||||
|
||||
/**
|
||||
* Read environment variables from file.
|
||||
* https://github.com/motdotla/dotenv
|
||||
*/
|
||||
// require('dotenv').config();
|
||||
|
||||
/**
|
||||
* See https://playwright.dev/docs/test-configuration.
|
||||
*/
|
||||
export default defineConfig({
|
||||
testDir: "./tests",
|
||||
/* Run tests in files in parallel */
|
||||
fullyParallel: true,
|
||||
/* Fail the build on CI if you accidentally left test.only in the source code. */
|
||||
forbidOnly: !!process.env.CI,
|
||||
/* Retry on CI only */
|
||||
retries: process.env.CI ? 2 : 0,
|
||||
/* Opt out of parallel tests on CI. */
|
||||
workers: process.env.CI ? 1 : undefined,
|
||||
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
|
||||
reporter: "html",
|
||||
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
|
||||
use: {
|
||||
/* Base URL to use in actions like `await page.goto('/')`. */
|
||||
// baseURL: 'http://127.0.0.1:3000',
|
||||
|
||||
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
|
||||
trace: "on-first-retry",
|
||||
},
|
||||
|
||||
/* Configure projects for major browsers */
|
||||
projects: [
|
||||
{
|
||||
name: "chromium",
|
||||
use: { ...devices["Desktop Chrome"] },
|
||||
},
|
||||
/*
|
||||
{
|
||||
name: 'firefox',
|
||||
use: { ...devices['Desktop Firefox'] },
|
||||
},
|
||||
|
||||
{
|
||||
name: 'webkit',
|
||||
use: { ...devices['Desktop Safari'] },
|
||||
},
|
||||
*/
|
||||
|
||||
/* Test against mobile viewports. */
|
||||
// {
|
||||
// name: 'Mobile Chrome',
|
||||
// use: { ...devices['Pixel 5'] },
|
||||
// },
|
||||
// {
|
||||
// name: 'Mobile Safari',
|
||||
// use: { ...devices['iPhone 12'] },
|
||||
// },
|
||||
|
||||
/* Test against branded browsers. */
|
||||
// {
|
||||
// name: 'Microsoft Edge',
|
||||
// use: { ...devices['Desktop Edge'], channel: 'msedge' },
|
||||
// },
|
||||
// {
|
||||
// name: 'Google Chrome',
|
||||
// use: { ...devices['Desktop Chrome'], channel: 'chrome' },
|
||||
// },
|
||||
],
|
||||
/* Run your local dev server before starting the tests */
|
||||
// webServer: {
|
||||
// command: 'npm run start',
|
||||
// url: 'http://127.0.0.1:3000',
|
||||
// reuseExistingServer: !process.env.CI,
|
||||
// },
|
||||
});
|
||||
2
tests/run_test.sh
Normal file
2
tests/run_test.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
npx playwright test
|
||||
npx playwright show-report
|
||||
18
tests/tests/www-wellandtribune-ca.spec.ts
Normal file
18
tests/tests/www-wellandtribune-ca.spec.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { expect, test } from "@playwright/test";
|
||||
|
||||
const paywallText = "This article is exclusive to subscribers.";
|
||||
const articleURL =
|
||||
"https://www.wellandtribune.ca/news/niagara-region/niagara-transit-commission-rejects-council-request-to-reduce-its-budget-increase/article_e9fb424c-8df5-58ae-a6c3-3648e2a9df66.html";
|
||||
|
||||
const ladderURL = "http://localhost:8080";
|
||||
let domain = (new URL(articleURL)).host;
|
||||
|
||||
test(`${domain} has paywall by default`, async ({ page }) => {
|
||||
await page.goto(articleURL);
|
||||
await expect(page.getByText(paywallText)).toBeVisible();
|
||||
});
|
||||
|
||||
test(`${domain} + Ladder doesn't have paywall`, async ({ page }) => {
|
||||
await page.goto(`${ladderURL}/${articleURL}`);
|
||||
await expect(page.getByText(paywallText)).toBeVisible();
|
||||
});
|
||||
Reference in New Issue
Block a user