fix "modifer" -> "modifier" typo everywhere
This commit is contained in:
56
proxychain/responsemodifiers/api/error_api.go
Normal file
56
proxychain/responsemodifiers/api/error_api.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
Success bool `json:"success"`
|
||||
Error ErrorDetails `json:"error"`
|
||||
}
|
||||
|
||||
type ErrorDetails struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Cause string `json:"cause"`
|
||||
}
|
||||
|
||||
func CreateAPIErrReader(err error) io.ReadCloser {
|
||||
if err == nil {
|
||||
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "No error provided"}`))
|
||||
}
|
||||
|
||||
baseErr := getBaseError(err)
|
||||
apiErr := Error{
|
||||
Success: false,
|
||||
Error: ErrorDetails{
|
||||
Message: err.Error(),
|
||||
Type: reflect.TypeOf(err).String(),
|
||||
Cause: baseErr.Error(),
|
||||
},
|
||||
}
|
||||
|
||||
// Serialize the APIError into JSON
|
||||
jsonData, jsonErr := json.Marshal(apiErr)
|
||||
if jsonErr != nil {
|
||||
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "Failed to serialize error"}`))
|
||||
}
|
||||
|
||||
// Return the JSON data as an io.ReadCloser
|
||||
return io.NopCloser(bytes.NewBuffer(jsonData))
|
||||
}
|
||||
|
||||
func getBaseError(err error) error {
|
||||
for {
|
||||
unwrapped := errors.Unwrap(err)
|
||||
if unwrapped == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = unwrapped
|
||||
}
|
||||
}
|
||||
174
proxychain/responsemodifiers/api/outline_api.go
Normal file
174
proxychain/responsemodifiers/api/outline_api.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"github.com/go-shiori/dom"
|
||||
"github.com/markusmobius/go-trafilatura"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// =======================================================================================
|
||||
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
|
||||
|
||||
type ImageContent struct {
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Alt string `json:"alt"`
|
||||
Caption string `json:"caption"`
|
||||
}
|
||||
|
||||
type LinkContent struct {
|
||||
Type string `json:"type"`
|
||||
Href string `json:"href"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type TextContent struct {
|
||||
Type string `json:"type"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type ListContent struct {
|
||||
Type string `json:"type"`
|
||||
ListItems []ListItemContent `json:"listItems"`
|
||||
}
|
||||
|
||||
type ListItemContent struct {
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type JSONDocument struct {
|
||||
Success bool `json:"success"`
|
||||
Error ErrorDetails `json:"error"`
|
||||
Metadata struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
URL string `json:"url"`
|
||||
Hostname string `json:"hostname"`
|
||||
Image string `json:"image"`
|
||||
Description string `json:"description"`
|
||||
Sitename string `json:"sitename"`
|
||||
Date string `json:"date"`
|
||||
Categories []string `json:"categories"`
|
||||
Tags []string `json:"tags"`
|
||||
License string `json:"license"`
|
||||
} `json:"metadata"`
|
||||
Content []interface{} `json:"content"`
|
||||
Comments string `json:"comments"`
|
||||
}
|
||||
|
||||
func ExtractResultToAPIResponse(extract *trafilatura.ExtractResult) *JSONDocument {
|
||||
jsonDoc := &JSONDocument{}
|
||||
|
||||
// Populate success
|
||||
jsonDoc.Success = true
|
||||
|
||||
// Populate metadata
|
||||
jsonDoc.Metadata.Title = extract.Metadata.Title
|
||||
jsonDoc.Metadata.Author = extract.Metadata.Author
|
||||
jsonDoc.Metadata.URL = extract.Metadata.URL
|
||||
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
|
||||
jsonDoc.Metadata.Description = extract.Metadata.Description
|
||||
jsonDoc.Metadata.Image = extract.Metadata.Image
|
||||
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
|
||||
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
|
||||
jsonDoc.Metadata.Categories = extract.Metadata.Categories
|
||||
jsonDoc.Metadata.Tags = extract.Metadata.Tags
|
||||
jsonDoc.Metadata.License = extract.Metadata.License
|
||||
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
|
||||
|
||||
// Populate content
|
||||
if extract.ContentNode != nil {
|
||||
jsonDoc.Content = parseContent(extract.ContentNode)
|
||||
}
|
||||
|
||||
// Populate comments
|
||||
if extract.CommentsNode != nil {
|
||||
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
|
||||
}
|
||||
|
||||
return jsonDoc
|
||||
}
|
||||
|
||||
func parseContent(node *html.Node) []interface{} {
|
||||
var content []interface{}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Data {
|
||||
case "img":
|
||||
image := ImageContent{
|
||||
Type: "img",
|
||||
URL: dom.GetAttribute(child, "src"),
|
||||
Alt: dom.GetAttribute(child, "alt"),
|
||||
Caption: dom.GetAttribute(child, "caption"),
|
||||
}
|
||||
content = append(content, image)
|
||||
|
||||
case "a":
|
||||
link := LinkContent{
|
||||
Type: "a",
|
||||
Href: dom.GetAttribute(child, "href"),
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, link)
|
||||
|
||||
case "h1":
|
||||
text := TextContent{
|
||||
Type: "h1",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h2":
|
||||
text := TextContent{
|
||||
Type: "h2",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h3":
|
||||
text := TextContent{
|
||||
Type: "h3",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h4":
|
||||
text := TextContent{
|
||||
Type: "h4",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h5":
|
||||
text := TextContent{
|
||||
Type: "h5",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "ul", "ol":
|
||||
list := ListContent{
|
||||
Type: child.Data,
|
||||
ListItems: []ListItemContent{},
|
||||
}
|
||||
for listItem := child.FirstChild; listItem != nil; listItem = listItem.NextSibling {
|
||||
if listItem.Data == "li" {
|
||||
listItemContent := ListItemContent{
|
||||
Data: dom.InnerText(listItem),
|
||||
}
|
||||
list.ListItems = append(list.ListItems, listItemContent)
|
||||
}
|
||||
}
|
||||
content = append(content, list)
|
||||
|
||||
default:
|
||||
text := TextContent{
|
||||
Type: "p",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
47
proxychain/responsemodifiers/api_content.go
Normal file
47
proxychain/responsemodifiers/api_content.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
|
||||
"github.com/markusmobius/go-trafilatura"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/api"
|
||||
)
|
||||
|
||||
// APIContent creates an JSON representation of the article and returns it as an API response.
|
||||
func APIContent() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// we set content-type twice here, in case another response modifier
|
||||
// tries to forward over the original headers
|
||||
chain.Context.Set("content-type", "application/json")
|
||||
chain.Response.Header.Set("content-type", "application/json")
|
||||
|
||||
// extract dom contents
|
||||
opts := trafilatura.Options{
|
||||
IncludeImages: true,
|
||||
IncludeLinks: true,
|
||||
// FavorPrecision: true,
|
||||
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
|
||||
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
|
||||
OriginalURL: chain.Request.URL,
|
||||
}
|
||||
|
||||
result, err := trafilatura.Extract(chain.Response.Body, opts)
|
||||
if err != nil {
|
||||
chain.Response.Body = api.CreateAPIErrReader(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
res := api.ExtractResultToAPIResponse(result)
|
||||
jsonData, err := json.MarshalIndent(res, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
chain.Response.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
70
proxychain/responsemodifiers/api_content_test.go
Normal file
70
proxychain/responsemodifiers/api_content_test.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"ladder/proxychain/responsemodifiers/api"
|
||||
)
|
||||
|
||||
func TestCreateAPIErrReader(t *testing.T) {
|
||||
_, baseErr := url.Parse("://this is an invalid url")
|
||||
wrappedErr := fmt.Errorf("wrapped error: %w", baseErr)
|
||||
|
||||
readCloser := api.CreateAPIErrReader(wrappedErr)
|
||||
defer readCloser.Close()
|
||||
|
||||
// Read and unmarshal the JSON output
|
||||
data, err := io.ReadAll(readCloser)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from ReadCloser: %v", err)
|
||||
}
|
||||
fmt.Println(string(data))
|
||||
|
||||
var apiErr api.Error
|
||||
err = json.Unmarshal(data, &apiErr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
|
||||
// Verify the structure of the APIError
|
||||
if apiErr.Success {
|
||||
t.Errorf("Expected Success to be false, got true")
|
||||
}
|
||||
|
||||
if apiErr.Error.Message != wrappedErr.Error() {
|
||||
t.Errorf("Expected error message to be '%v', got '%v'", wrappedErr.Error(), apiErr.Error.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateAPIErrReader2(t *testing.T) {
|
||||
_, baseErr := url.Parse("://this is an invalid url")
|
||||
|
||||
readCloser := api.CreateAPIErrReader(baseErr)
|
||||
defer readCloser.Close()
|
||||
|
||||
// Read and unmarshal the JSON output
|
||||
data, err := io.ReadAll(readCloser)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from ReadCloser: %v", err)
|
||||
}
|
||||
fmt.Println(string(data))
|
||||
|
||||
var apiErr api.Error
|
||||
err = json.Unmarshal(data, &apiErr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
|
||||
// Verify the structure of the APIError
|
||||
if apiErr.Success {
|
||||
t.Errorf("Expected Success to be false, got true")
|
||||
}
|
||||
|
||||
if apiErr.Error.Message != baseErr.Error() {
|
||||
t.Errorf("Expected error message to be '%v', got '%v'", baseErr.Error(), apiErr.Error.Message)
|
||||
}
|
||||
}
|
||||
42
proxychain/responsemodifiers/block_element_removal.go
Normal file
42
proxychain/responsemodifiers/block_element_removal.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
//go:embed block_element_removal.js
|
||||
var blockElementRemoval string
|
||||
|
||||
// BlockElementRemoval prevents paywall javascript from removing a
|
||||
// particular element by detecting the removal, then immediately reinserting it.
|
||||
// This is useful when a page will return a "fake" 404, after flashing the content briefly.
|
||||
// If the /outline/ API works, but the regular API doesn't, try this modifier.
|
||||
func BlockElementRemoval(cssSelector string) proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
params := map[string]string{
|
||||
// ie: "div.article-content"
|
||||
"{{CSS_SELECTOR}}": cssSelector,
|
||||
}
|
||||
|
||||
rr := rewriters.NewScriptInjectorRewriterWithParams(
|
||||
blockElementRemoval,
|
||||
rewriters.BeforeDOMContentLoaded,
|
||||
params,
|
||||
)
|
||||
|
||||
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = htmlRewriter
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
35
proxychain/responsemodifiers/block_element_removal.js
Normal file
35
proxychain/responsemodifiers/block_element_removal.js
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Monitors and restores specific DOM elements if they are removed.
|
||||
*
|
||||
* This self-invoking function creates a MutationObserver to watch for removal of elements matching
|
||||
* "{{CSS_SELECTOR}}". If such an element is removed, it logs the event and attempts to restore the
|
||||
* element after a 50ms delay. The restored element is reinserted at its original location or prepended
|
||||
* to the document body if the original location is unavailable.
|
||||
*/
|
||||
(function() {
|
||||
function handleMutation(mutationList) {
|
||||
for (const mutation of mutationList) {
|
||||
if (mutation.type === "childList") {
|
||||
for (const node of Array.from(mutation.removedNodes)) {
|
||||
if (node.outerHTML && node.querySelector("{{CSS_SELECTOR}}")) {
|
||||
console.log(
|
||||
"proxychain: prevented removal of element containing 'article-content'",
|
||||
);
|
||||
console.log(node.outerHTML);
|
||||
setTimeout(() => {
|
||||
let e = document.querySelector("{{CSS_SELECTOR}}");
|
||||
if (e != null) {
|
||||
e.replaceWith(node);
|
||||
} else {
|
||||
document.body.prepend(node);
|
||||
}
|
||||
}, 50);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const observer = new MutationObserver(handleMutation);
|
||||
observer.observe(document, { childList: true, subtree: true });
|
||||
})();
|
||||
21
proxychain/responsemodifiers/bypass_cors.go
Normal file
21
proxychain/responsemodifiers/bypass_cors.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// BypassCORS modifies response headers to prevent the browser
|
||||
// from enforcing any CORS restrictions. This should run at the end of the chain.
|
||||
func BypassCORS() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddOnceResponseModifications(
|
||||
SetResponseHeader("Access-Control-Allow-Origin", "*"),
|
||||
SetResponseHeader("Access-Control-Expose-Headers", "*"),
|
||||
SetResponseHeader("Access-Control-Allow-Credentials", "true"),
|
||||
SetResponseHeader("Access-Control-Allow-Methods", "GET, PUT, POST, DELETE, HEAD, OPTIONS, PATCH"),
|
||||
SetResponseHeader("Access-Control-Allow-Headers", "*"),
|
||||
DeleteResponseHeader("X-Frame-Options"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
30
proxychain/responsemodifiers/bypass_csp.go
Normal file
30
proxychain/responsemodifiers/bypass_csp.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// TODO: handle edge case where CSP is specified in meta tag:
|
||||
// <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
|
||||
|
||||
// BypassContentSecurityPolicy modifies response headers to prevent the browser
|
||||
// from enforcing any CSP restrictions. This should run at the end of the chain.
|
||||
func BypassContentSecurityPolicy() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.AddOnceResponseModifications(
|
||||
DeleteResponseHeader("Content-Security-Policy"),
|
||||
DeleteResponseHeader("Content-Security-Policy-Report-Only"),
|
||||
DeleteResponseHeader("X-Content-Security-Policy"),
|
||||
DeleteResponseHeader("X-WebKit-CSP"),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SetContentSecurityPolicy modifies response headers to a specific CSP
|
||||
func SetContentSecurityPolicy(csp string) proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
chain.Response.Header.Set("Content-Security-Policy", csp)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
53
proxychain/responsemodifiers/forward_response_headers.go
Normal file
53
proxychain/responsemodifiers/forward_response_headers.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
var forwardBlacklist map[string]bool
|
||||
|
||||
func init() {
|
||||
forwardBlacklist = map[string]bool{
|
||||
"content-length": true,
|
||||
"content-encoding": true,
|
||||
"transfer-encoding": true,
|
||||
"strict-transport-security": true,
|
||||
"connection": true,
|
||||
"keep-alive": true,
|
||||
}
|
||||
}
|
||||
|
||||
// ForwardResponseHeaders forwards the response headers from the upstream server to the client
|
||||
func ForwardResponseHeaders() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// fmt.Println(chain.Response.Header)
|
||||
for uname, headers := range chain.Response.Header {
|
||||
name := strings.ToLower(uname)
|
||||
if forwardBlacklist[name] {
|
||||
continue
|
||||
}
|
||||
|
||||
// patch location header to forward to proxy instead
|
||||
if name == "location" {
|
||||
u, err := url.Parse(chain.Context.BaseURL())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newLocation := fmt.Sprintf("%s://%s/%s", u.Scheme, u.Host, headers[0])
|
||||
chain.Context.Set("location", newLocation)
|
||||
}
|
||||
|
||||
// forward headers
|
||||
for _, value := range headers {
|
||||
fmt.Println(name, value)
|
||||
chain.Context.Set(name, value)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
189
proxychain/responsemodifiers/generate_readable_outline.go
Normal file
189
proxychain/responsemodifiers/generate_readable_outline.go
Normal file
@@ -0,0 +1,189 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"embed"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io"
|
||||
"log"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
|
||||
//"github.com/go-shiori/dom"
|
||||
"github.com/markusmobius/go-trafilatura"
|
||||
)
|
||||
|
||||
//go:embed generate_readable_outline.html
|
||||
var templateFS embed.FS
|
||||
|
||||
// GenerateReadableOutline creates an reader-friendly distilled representation of the article.
|
||||
// This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM.
|
||||
func GenerateReadableOutline() proxychain.ResponseModification {
|
||||
// get template only once, and resuse for subsequent calls
|
||||
f := "generate_readable_outline.html"
|
||||
tmpl, err := template.ParseFS(templateFS, f)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f))
|
||||
}
|
||||
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// ===========================================================
|
||||
// 1. extract dom contents using reading mode algo
|
||||
// ===========================================================
|
||||
opts := trafilatura.Options{
|
||||
IncludeImages: false,
|
||||
IncludeLinks: true,
|
||||
FavorRecall: true,
|
||||
Deduplicate: true,
|
||||
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
|
||||
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
|
||||
OriginalURL: chain.Request.URL,
|
||||
}
|
||||
|
||||
extract, err := trafilatura.Extract(chain.Response.Body, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 2. render generate_readable_outline.html template using metadata from step 1
|
||||
// ============================================================================
|
||||
|
||||
// render DOM to string without H1 title
|
||||
removeFirstH1(extract.ContentNode)
|
||||
// rewrite all links to stay on /outline/ path
|
||||
rewriteHrefLinks(extract.ContentNode, chain.Context.BaseURL(), chain.APIPrefix)
|
||||
var b bytes.Buffer
|
||||
html.Render(&b, extract.ContentNode)
|
||||
distilledHTML := b.String()
|
||||
|
||||
// populate template parameters
|
||||
data := map[string]interface{}{
|
||||
"Success": true,
|
||||
"Image": extract.Metadata.Image,
|
||||
"Description": extract.Metadata.Description,
|
||||
"Sitename": extract.Metadata.Sitename,
|
||||
"Hostname": extract.Metadata.Hostname,
|
||||
"Url": "/" + chain.Request.URL.String(),
|
||||
"Title": extract.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
|
||||
"Date": extract.Metadata.Date.String(),
|
||||
"Author": createWikipediaSearchLinks(extract.Metadata.Author),
|
||||
//"Author": extract.Metadata.Author,
|
||||
"Body": distilledHTML,
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 3. queue sending the response back to the client by replacing the response body
|
||||
// (the response body will be read as a stream in proxychain.Execute() later on.)
|
||||
// ============================================================================
|
||||
pr, pw := io.Pipe() // pipe io.writer contents into io.reader
|
||||
|
||||
// Use a goroutine for writing to the pipe so we don't deadlock the request
|
||||
go func() {
|
||||
defer pw.Close()
|
||||
|
||||
err := tmpl.Execute(pw, data) // <- render template
|
||||
if err != nil {
|
||||
log.Printf("WARN: GenerateReadableOutline template rendering error: %s\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
chain.Context.Set("content-type", "text/html")
|
||||
chain.Response.Body = pr // <- replace response body reader with our new reader from pipe
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================
|
||||
// DOM Rendering helpers
|
||||
// =============================================
|
||||
|
||||
func removeFirstH1(n *html.Node) {
|
||||
var recurse func(*html.Node) bool
|
||||
recurse = func(n *html.Node) bool {
|
||||
if n.Type == html.ElementNode && n.DataAtom == atom.H1 {
|
||||
return true // Found the first H1, return true to stop
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if recurse(c) {
|
||||
n.RemoveChild(c)
|
||||
return false // Removed first H1, no need to continue
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
recurse(n)
|
||||
}
|
||||
|
||||
func rewriteHrefLinks(n *html.Node, baseURL string, apiPath string) {
|
||||
u, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
log.Printf("GenerateReadableOutline :: rewriteHrefLinks error - %s\n", err)
|
||||
}
|
||||
apiPath = strings.Trim(apiPath, "/")
|
||||
proxyURL := fmt.Sprintf("%s://%s", u.Scheme, u.Host)
|
||||
newProxyURL := fmt.Sprintf("%s/%s", proxyURL, apiPath)
|
||||
|
||||
var recurse func(*html.Node) bool
|
||||
recurse = func(n *html.Node) bool {
|
||||
if n.Type == html.ElementNode && n.DataAtom == atom.A {
|
||||
for i := range n.Attr {
|
||||
attr := n.Attr[i]
|
||||
if attr.Key != "href" {
|
||||
continue
|
||||
}
|
||||
// rewrite url on a.href: http://localhost:8080/https://example.com -> http://localhost:8080/outline/https://example.com
|
||||
attr.Val = strings.Replace(attr.Val, proxyURL, newProxyURL, 1)
|
||||
// rewrite relative URLs too
|
||||
if strings.HasPrefix(attr.Val, "/") {
|
||||
attr.Val = fmt.Sprintf("/%s%s", apiPath, attr.Val)
|
||||
}
|
||||
n.Attr[i].Val = attr.Val
|
||||
log.Println(attr.Val)
|
||||
}
|
||||
}
|
||||
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
recurse(c)
|
||||
}
|
||||
return false
|
||||
}
|
||||
recurse(n)
|
||||
}
|
||||
|
||||
// createWikipediaSearchLinks takes in comma or semicolon separated terms,
|
||||
// then turns them into <a> links searching for the term.
|
||||
func createWikipediaSearchLinks(searchTerms string) string {
|
||||
semiColonSplit := strings.Split(searchTerms, ";")
|
||||
|
||||
var links []string
|
||||
for i, termGroup := range semiColonSplit {
|
||||
commaSplit := strings.Split(termGroup, ",")
|
||||
for _, term := range commaSplit {
|
||||
trimmedTerm := strings.TrimSpace(term)
|
||||
if trimmedTerm == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
encodedTerm := url.QueryEscape(trimmedTerm)
|
||||
|
||||
wikiURL := fmt.Sprintf("https://en.wikipedia.org/w/index.php?search=%s", encodedTerm)
|
||||
|
||||
link := fmt.Sprintf("<a href=\"%s\">%s</a>", wikiURL, trimmedTerm)
|
||||
links = append(links, link)
|
||||
}
|
||||
|
||||
// If it's not the last element in semiColonSplit, add a comma to the last link
|
||||
if i < len(semiColonSplit)-1 {
|
||||
links[len(links)-1] = links[len(links)-1] + ","
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(links, " ")
|
||||
}
|
||||
380
proxychain/responsemodifiers/generate_readable_outline.html
Normal file
380
proxychain/responsemodifiers/generate_readable_outline.html
Normal file
@@ -0,0 +1,380 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<link rel="stylesheet" href="/styles.css" />
|
||||
<script src="/script.js" defer></script>
|
||||
<script>
|
||||
const handleThemeChange = () => {
|
||||
let theme = localStorage.getItem("theme");
|
||||
if (theme === null) {
|
||||
localStorage.setItem("theme", "system");
|
||||
theme = "system";
|
||||
}
|
||||
if (
|
||||
theme === "dark" ||
|
||||
(theme === "system" &&
|
||||
window.matchMedia("(prefers-color-scheme: dark)").matches)
|
||||
) {
|
||||
document.documentElement.classList.add("dark");
|
||||
} else {
|
||||
document.documentElement.classList.remove("dark");
|
||||
}
|
||||
};
|
||||
handleThemeChange();
|
||||
</script>
|
||||
<title>ladder | {{.Title}}</title>
|
||||
</head>
|
||||
|
||||
<body
|
||||
class="antialiased bg-white dark:bg-slate-900 text-slate-900 dark:text-slate-200"
|
||||
>
|
||||
<div class="flex flex-col gap-4 max-w-3xl mx-4 lg:mx-auto pt-10">
|
||||
<div class="flex justify-between place-items-center">
|
||||
<div
|
||||
class="hover:drop-shadow-[0_0px_4px_rgba(122,167,209,.3)] transition-colors duration-300 focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
|
||||
>
|
||||
|
||||
<div class="flex">
|
||||
<a
|
||||
href="/"
|
||||
class="flex -ml-2 h-8 font-extrabold tracking-tight hover:no-underline focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
viewBox="0 0 512 512"
|
||||
class="h-8 focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
|
||||
>
|
||||
<path
|
||||
fill="#7AA7D1"
|
||||
d="M262.074 485.246C254.809 485.265 247.407 485.534 240.165 484.99L226.178 483.306C119.737 468.826 34.1354 383.43 25.3176 274.714C24.3655 262.975 23.5876 253.161 24.3295 241.148C31.4284 126.212 123.985 31.919 238.633 24.1259L250.022 23.8366C258.02 23.8001 266.212 23.491 274.183 24.1306C320.519 27.8489 366.348 45.9743 402.232 75.4548L416.996 88.2751C444.342 114.373 464.257 146.819 475.911 182.72L480.415 197.211C486.174 219.054 488.67 242.773 487.436 265.259L486.416 275.75C478.783 352.041 436.405 418.1 369.36 455.394L355.463 462.875C326.247 477.031 294.517 484.631 262.074 485.246ZM253.547 72.4475C161.905 73.0454 83.5901 144.289 73.0095 234.5C69.9101 260.926 74.7763 292.594 83.9003 317.156C104.53 372.691 153.9 416.616 211.281 430.903C226.663 434.733 242.223 436.307 258.044 436.227C353.394 435.507 430.296 361.835 438.445 267.978C439.794 252.442 438.591 236.759 435.59 221.5C419.554 139.955 353.067 79.4187 269.856 72.7052C264.479 72.2714 258.981 72.423 253.586 72.4127L253.547 72.4475Z"
|
||||
/>
|
||||
<path
|
||||
fill="#7AA7D1"
|
||||
d="M153.196 310.121L133.153 285.021C140.83 283.798 148.978 285.092 156.741 284.353L156.637 277.725L124.406 278.002C123.298 277.325 122.856 276.187 122.058 275.193L116.089 267.862C110.469 260.975 103.827 254.843 98.6026 247.669C103.918 246.839 105.248 246.537 111.14 246.523L129.093 246.327C130.152 238.785 128.62 240.843 122.138 240.758C111.929 240.623 110.659 242.014 105.004 234.661L97.9953 225.654C94.8172 221.729 91.2219 218.104 88.2631 214.005C84.1351 208.286 90.1658 209.504 94.601 209.489L236.752 209.545C257.761 209.569 268.184 211.009 285.766 221.678L285.835 206.051C285.837 197.542 286.201 189.141 284.549 180.748C280.22 158.757 260.541 143.877 240.897 135.739C238.055 134.561 232.259 133.654 235.575 129.851C244.784 119.288 263.680 111.990 277.085 111.105C288.697 109.828 301.096 113.537 311.75 117.703C360.649 136.827 393.225 183.042 398.561 234.866C402.204 270.253 391.733 308.356 367.999 335.1C332.832 374.727 269.877 384.883 223.294 360.397C206.156 351.388 183.673 333.299 175.08 316.6C173.511 313.551 174.005 313.555 170.443 313.52L160.641 313.449C158.957 313.435 156.263 314.031 155.122 312.487L153.196 310.121Z"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
<a
|
||||
href="/https://{{.Hostname}}"
|
||||
class="flex ml-1 h-8 font-extrabold tracking-tight hover:no-underline focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
|
||||
>
|
||||
<span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle">{{.Sitename}}</span>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="flex justify-center z-10">
|
||||
<div class="relative" id="dropdown">
|
||||
<button
|
||||
aria-expanded="closed"
|
||||
onclick="toggleDropdown()"
|
||||
type="button"
|
||||
class="inline-flex items-center justify-center whitespace-nowrap rounded-full h-12 px-4 py-2 text-sm font-medium text-slate-600 dark:text-slate-400 ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 bg-white dark:bg-slate-900 hover:bg-slate-200 dark:hover:bg-slate-700 hover:text-slate-500 dark:hover:text-slate-200"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
class="h-5 w-5"
|
||||
>
|
||||
<path
|
||||
d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"
|
||||
/>
|
||||
<circle cx="12" cy="12" r="3" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<div
|
||||
id="dropdown_panel"
|
||||
class="hidden absolute right-0 mt-2 w-52 rounded-md bg-white dark:bg-slate-900 shadow-md border border-slate-400 dark:border-slate-700"
|
||||
>
|
||||
<div
|
||||
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
|
||||
>
|
||||
Font family
|
||||
<div class="grid grid-cols-2 gap-2">
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="font"
|
||||
id="sans-serif"
|
||||
value="sans-serif"
|
||||
class="peer hidden"
|
||||
checked
|
||||
/>
|
||||
<label
|
||||
for="sans-serif"
|
||||
tabindex="0"
|
||||
class="flex items-center justify-center h-10 cursor-pointer select-none rounded-md p-2 text-sm font-sans text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>Sans-serif</label
|
||||
>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="font"
|
||||
id="serif"
|
||||
value="serif"
|
||||
class="peer hidden"
|
||||
/>
|
||||
<label
|
||||
for="serif"
|
||||
tabindex="0"
|
||||
class="flex items-center justify-center h-10 cursor-pointer select-none rounded-md p-2 text-sm font-serif text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>Serif</label
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
class="shrink-0 bg-slate-400 dark:bg-slate-700 h-[1px] w-full"
|
||||
></div>
|
||||
<div
|
||||
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
|
||||
>
|
||||
Font size
|
||||
<div class="grid grid-cols-4 gap-2">
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="fontsize"
|
||||
id="sm"
|
||||
value="text-sm"
|
||||
class="peer hidden"
|
||||
/>
|
||||
<label
|
||||
for="sm"
|
||||
tabindex="0"
|
||||
title="Small"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-sm text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>sm</label
|
||||
>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="fontsize"
|
||||
id="base"
|
||||
value="text-base"
|
||||
class="peer hidden"
|
||||
checked
|
||||
/>
|
||||
<label
|
||||
for="base"
|
||||
tabindex="0"
|
||||
title="Medium"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-base text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>md</label
|
||||
>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="fontsize"
|
||||
id="lg"
|
||||
value="text-lg"
|
||||
class="peer hidden"
|
||||
/>
|
||||
<label
|
||||
for="lg"
|
||||
tabindex="0"
|
||||
title="Large"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-lg text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>lg</label
|
||||
>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
class="shrink-0 bg-slate-200 dark:bg-slate-700 h-[1px] w-full"
|
||||
></div>
|
||||
<div
|
||||
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
|
||||
>
|
||||
Appearance
|
||||
<div class="grid grid-cols-4 gap-2">
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="theme"
|
||||
id="light"
|
||||
value="light"
|
||||
class="peer hidden"
|
||||
/>
|
||||
<label
|
||||
for="light"
|
||||
tabindex="0"
|
||||
title="Light"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-sm text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
class="h-5 w-5"
|
||||
>
|
||||
<circle cx="12" cy="12" r="4" />
|
||||
<path d="M12 2v2" />
|
||||
<path d="M12 20v2" />
|
||||
<path d="m4.93 4.93 1.41 1.41" />
|
||||
<path d="m17.66 17.66 1.41 1.41" />
|
||||
<path d="M2 12h2" />
|
||||
<path d="M20 12h2" />
|
||||
<path d="m6.34 17.66-1.41 1.41" />
|
||||
<path d="m19.07 4.93-1.41 1.41" />
|
||||
</svg>
|
||||
</label>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="theme"
|
||||
id="dark"
|
||||
value="dark"
|
||||
class="peer hidden"
|
||||
/>
|
||||
<label
|
||||
for="dark"
|
||||
tabindex="0"
|
||||
title="Dark"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-base text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
class="h-5 w-5"
|
||||
>
|
||||
<path d="M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z" />
|
||||
</svg>
|
||||
</label>
|
||||
</div>
|
||||
<div>
|
||||
<input
|
||||
type="radio"
|
||||
name="theme"
|
||||
id="system"
|
||||
value="system"
|
||||
class="peer hidden"
|
||||
checked
|
||||
/>
|
||||
<label
|
||||
for="system"
|
||||
tabindex="0"
|
||||
title="System preference"
|
||||
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-lg text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
class="h-5 w-5"
|
||||
>
|
||||
<path d="M12 8a2.83 2.83 0 0 0 4 4 4 4 0 1 1-4-4" />
|
||||
<path d="M12 2v2" />
|
||||
<path d="M12 20v2" />
|
||||
<path d="m4.9 4.9 1.4 1.4" />
|
||||
<path d="m17.7 17.7 1.4 1.4" />
|
||||
<path d="M2 12h2" />
|
||||
<path d="M20 12h2" />
|
||||
<path d="m6.3 17.7-1.4 1.4" />
|
||||
<path d="m19.1 4.9-1.4 1.4" />
|
||||
</svg>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<main class="flex flex-col space-y-3">
|
||||
{{if not .Success}}
|
||||
<h1>
|
||||
Error
|
||||
</h1>
|
||||
<p>
|
||||
There was a problem querying
|
||||
<a href="{{.Params}}">{{.Params}}</a>
|
||||
</p>
|
||||
<code class="text-red-500 dark:text-red-400">
|
||||
{{.Error}}
|
||||
</code>
|
||||
{{else}}
|
||||
<div class="flex flex-col gap-1 mt-3">
|
||||
<h1>
|
||||
<a href="{{.Url}}" class="text-slate-900 dark:text-slate-200"> {{.Title}} </a>
|
||||
</h1>
|
||||
{{if ne .Date ""}}
|
||||
<small
|
||||
class="text-sm font-medium leading-none text-slate-600 dark:text-slate-400"
|
||||
>{{.Date}}</small
|
||||
>
|
||||
{{end}}
|
||||
{{if ne .Author ""}}
|
||||
<small
|
||||
class="text-sm font-medium leading-none text-slate-600 dark:text-slate-400"
|
||||
>{{.Author}}</small
|
||||
>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col space-y-3">
|
||||
<div>
|
||||
<div class="grid grid-cols-1 justify-items-center">
|
||||
<div><img src="{{.Image}}" alt="{{.Description}}" class="h-auto w-auto object-cover max-w-full mx-auto rounded-md shadow-md dark:shadow-slate-700"/></div>
|
||||
<div class="mt-2 text-sm text-slate-600 dark:text-slate-400">{{.Description}}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>{{.Body}}</div>
|
||||
{{end}}
|
||||
</main>
|
||||
|
||||
<div class="my-2"></div>
|
||||
<footer class="mx-4 text-center text-slate-600 dark:text-slate-400">
|
||||
<p>
|
||||
Code Licensed Under GPL v3.0 |
|
||||
<a
|
||||
href="https://github.com/everywall/ladder"
|
||||
class="hover:text-blue-500 dark:hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
|
||||
>View Source</a
|
||||
>
|
||||
|
|
||||
<a
|
||||
href="https://github.com/everywall"
|
||||
class="hover:text-blue-500 dark:hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
|
||||
>Everywall</a
|
||||
>
|
||||
</p>
|
||||
</footer>
|
||||
<div class="my-2"></div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
41
proxychain/responsemodifiers/inject_script.go
Normal file
41
proxychain/responsemodifiers/inject_script.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
// injectScript modifies HTTP responses
|
||||
// to execute javascript at a particular time.
|
||||
func injectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
|
||||
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = htmlRewriter
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// InjectScriptBeforeDOMContentLoaded modifies HTTP responses to inject a JS before DOM Content is loaded (script tag in head)
|
||||
func InjectScriptBeforeDOMContentLoaded(js string) proxychain.ResponseModification {
|
||||
return injectScript(js, rewriters.BeforeDOMContentLoaded)
|
||||
}
|
||||
|
||||
// InjectScriptAfterDOMContentLoaded modifies HTTP responses to inject a JS after DOM Content is loaded (script tag in head)
|
||||
func InjectScriptAfterDOMContentLoaded(js string) proxychain.ResponseModification {
|
||||
return injectScript(js, rewriters.AfterDOMContentLoaded)
|
||||
}
|
||||
|
||||
// InjectScriptAfterDOMIdle modifies HTTP responses to inject a JS after the DOM is idle (ie: js framework loaded)
|
||||
func InjectScriptAfterDOMIdle(js string) proxychain.ResponseModification {
|
||||
return injectScript(js, rewriters.AfterDOMIdle)
|
||||
}
|
||||
107
proxychain/responsemodifiers/modify_incoming_cookies.go
Normal file
107
proxychain/responsemodifiers/modify_incoming_cookies.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
http "github.com/bogdanfinn/fhttp"
|
||||
//"net/http"
|
||||
//http "github.com/Danny-Dasilva/fhttp"
|
||||
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// DeleteIncomingCookies prevents ALL cookies from being sent from the proxy server
|
||||
// back down to the client.
|
||||
func DeleteIncomingCookies(_ ...string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
px.Response.Header.Del("Set-Cookie")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteIncomingCookiesExcept prevents non-whitelisted cookies from being sent from the proxy server
|
||||
// to the client. Cookies whose names are in the whitelist are not removed.
|
||||
func DeleteIncomingCookiesExcept(whitelist ...string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
// Convert whitelist slice to a map for efficient lookups
|
||||
whitelistMap := make(map[string]struct{})
|
||||
for _, cookieName := range whitelist {
|
||||
whitelistMap[cookieName] = struct{}{}
|
||||
}
|
||||
|
||||
// If the response has no cookies, return early
|
||||
if px.Response.Header == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter the cookies in the response
|
||||
filteredCookies := []string{}
|
||||
for _, cookieStr := range px.Response.Header["Set-Cookie"] {
|
||||
cookie := parseCookie(cookieStr)
|
||||
|
||||
if _, found := whitelistMap[cookie.Name]; found {
|
||||
filteredCookies = append(filteredCookies, cookieStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Update the Set-Cookie header with the filtered cookies
|
||||
if len(filteredCookies) > 0 {
|
||||
px.Response.Header["Set-Cookie"] = filteredCookies
|
||||
} else {
|
||||
px.Response.Header.Del("Set-Cookie")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// parseCookie parses a cookie string and returns an http.Cookie object.
|
||||
func parseCookie(cookieStr string) *http.Cookie {
|
||||
header := http.Header{}
|
||||
header.Add("Set-Cookie", cookieStr)
|
||||
request := http.Request{Header: header}
|
||||
return request.Cookies()[0]
|
||||
}
|
||||
|
||||
// SetIncomingCookies adds a raw cookie string being sent from the proxy server down to the client
|
||||
func SetIncomingCookies(cookies string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
px.Response.Header.Set("Set-Cookie", cookies)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SetIncomingCookie modifies a specific cookie in the response from the proxy server to the client.
|
||||
func SetIncomingCookie(name string, val string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
if px.Response.Header == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
updatedCookies := []string{}
|
||||
found := false
|
||||
|
||||
// Iterate over existing cookies and modify the one that matches the cookieName
|
||||
for _, cookieStr := range px.Response.Header["Set-Cookie"] {
|
||||
cookie := parseCookie(cookieStr)
|
||||
if cookie.Name == name {
|
||||
// Replace the cookie with the new value
|
||||
updatedCookies = append(updatedCookies, fmt.Sprintf("%s=%s", name, val))
|
||||
found = true
|
||||
} else {
|
||||
// Keep the cookie as is
|
||||
updatedCookies = append(updatedCookies, cookieStr)
|
||||
}
|
||||
}
|
||||
|
||||
// If the specified cookie wasn't found, add it
|
||||
if !found {
|
||||
updatedCookies = append(updatedCookies, fmt.Sprintf("%s=%s", name, val))
|
||||
}
|
||||
|
||||
// Update the Set-Cookie header
|
||||
px.Response.Header["Set-Cookie"] = updatedCookies
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
21
proxychain/responsemodifiers/modify_response_header.go
Normal file
21
proxychain/responsemodifiers/modify_response_header.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"ladder/proxychain"
|
||||
)
|
||||
|
||||
// SetResponseHeader modifies response headers from the upstream server
|
||||
func SetResponseHeader(key string, value string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
px.Context.Response().Header.Set(key, value)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteResponseHeader removes response headers from the upstream server
|
||||
func DeleteResponseHeader(key string) proxychain.ResponseModification {
|
||||
return func(px *proxychain.ProxyChain) error {
|
||||
px.Context.Response().Header.Del(key)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
56
proxychain/responsemodifiers/patch_dynamic_resource_urls.go
Normal file
56
proxychain/responsemodifiers/patch_dynamic_resource_urls.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
//go:embed patch_dynamic_resource_urls.js
|
||||
var patchDynamicResourceURLsScript string
|
||||
|
||||
// PatchDynamicResourceURLs patches the javascript runtime to rewrite URLs client-side.
|
||||
// - This function is designed to allow the proxified page
|
||||
// to still be browsible by routing all resource URLs through the proxy.
|
||||
// - Native APIs capable of network requests will be hooked
|
||||
// and the URLs arguments modified to point to the proxy instead.
|
||||
// - fetch('/relative_path') -> fetch('/https://proxiedsite.com/relative_path')
|
||||
// - Element.setAttribute('src', "/assets/img.jpg") -> Element.setAttribute('src', "/https://proxiedsite.com/assets/img.jpg") -> fetch('/https://proxiedsite.com/relative_path')
|
||||
func PatchDynamicResourceURLs() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// this is the original URL sent by client:
|
||||
// http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
originalURI := chain.Context.Request().URI()
|
||||
|
||||
// this is the extracted URL that the client requests to proxy
|
||||
// http://proxiedsite.com/foo/bar
|
||||
reqURL := chain.Request.URL
|
||||
|
||||
params := map[string]string{
|
||||
// ie: http://localhost:8080
|
||||
"{{PROXY_ORIGIN}}": fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host()),
|
||||
// ie: http://proxiedsite.com
|
||||
"{{ORIGIN}}": fmt.Sprintf("%s://%s", reqURL.Scheme, reqURL.Host),
|
||||
}
|
||||
|
||||
rr := rewriters.NewScriptInjectorRewriterWithParams(
|
||||
patchDynamicResourceURLsScript,
|
||||
rewriters.BeforeDOMContentLoaded,
|
||||
params,
|
||||
)
|
||||
|
||||
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = htmlRewriter
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
366
proxychain/responsemodifiers/patch_dynamic_resource_urls.js
Normal file
366
proxychain/responsemodifiers/patch_dynamic_resource_urls.js
Normal file
@@ -0,0 +1,366 @@
|
||||
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
|
||||
// Also overrides the attribute setter prototype to modify the request URLs
|
||||
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
|
||||
(() => {
|
||||
// ============== PARAMS ===========================
|
||||
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
// proxyOrigin is http://localhost:8080
|
||||
const proxyOrigin = "{{PROXY_ORIGIN}}";
|
||||
//const proxyOrigin = globalThis.window.location.origin;
|
||||
|
||||
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
|
||||
// origin is http://proxiedsite.com
|
||||
const origin = "{{ORIGIN}}";
|
||||
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
|
||||
// ============== END PARAMS ======================
|
||||
|
||||
const blacklistedSchemes = [
|
||||
"ftp:",
|
||||
"mailto:",
|
||||
"tel:",
|
||||
"file:",
|
||||
"blob:",
|
||||
"javascript:",
|
||||
"about:",
|
||||
"magnet:",
|
||||
"ws:",
|
||||
"wss:",
|
||||
];
|
||||
|
||||
function rewriteURL(url) {
|
||||
if (!url) return url;
|
||||
|
||||
// fetch url might be string, url, or request object
|
||||
// handle all three by downcasting to string
|
||||
const isStr = typeof url === "string";
|
||||
if (!isStr) {
|
||||
x = String(url);
|
||||
if (x == "[object Request]") {
|
||||
url = url.url;
|
||||
} else {
|
||||
url = String(url);
|
||||
}
|
||||
}
|
||||
|
||||
const oldUrl = url;
|
||||
|
||||
// don't rewrite special URIs
|
||||
if (blacklistedSchemes.includes(url)) return url;
|
||||
|
||||
// don't rewrite invalid URIs
|
||||
try {
|
||||
new URL(url, origin);
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
|
||||
// don't double rewrite
|
||||
if (url.startsWith(`${proxyOrigin}/http://`)) return url;
|
||||
if (url.startsWith(`${proxyOrigin}/https://`)) return url;
|
||||
if (url.startsWith(`/${proxyOrigin}`)) return url;
|
||||
if (url.startsWith(`/${origin}`)) return url;
|
||||
if (url.startsWith(`/http://`)) return url;
|
||||
if (url.startsWith(`/https://`)) return url;
|
||||
if (url.startsWith(`/http%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/https%3A%2F%2F`)) return url;
|
||||
if (url.startsWith(`/%2Fhttp`)) return url;
|
||||
|
||||
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
|
||||
|
||||
//originDomain = origin.replace("https://", "");
|
||||
let scheme = origin.split(":")[0];
|
||||
|
||||
if (url.startsWith("//")) {
|
||||
url = `/${scheme}://${encodeURIComponent(url.substring(2))}`;
|
||||
} else if (url.startsWith("/")) {
|
||||
url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
|
||||
} else if (
|
||||
url.startsWith(proxyOrigin) && !url.startsWith(`${proxyOrigin}/http`)
|
||||
) {
|
||||
// edge case where client js uses current url host to write an absolute path
|
||||
url = "".replace(proxyOrigin, `${proxyOrigin}/${origin}`);
|
||||
} else if (url.startsWith(origin)) {
|
||||
url = `/${encodeURIComponent(url)}`;
|
||||
} else if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||
url = `/${proxyOrigin}/${encodeURIComponent(url)}`;
|
||||
}
|
||||
console.log(`proxychain: rewrite JS URL: ${oldUrl} -> ${url}`);
|
||||
return url;
|
||||
}
|
||||
|
||||
/*
|
||||
// sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked
|
||||
function hideMonkeyPatch(objectOrName, method, originalToString) {
|
||||
let obj;
|
||||
let isGlobalFunction = false;
|
||||
|
||||
if (typeof objectOrName === "string") {
|
||||
obj = globalThis[objectOrName];
|
||||
isGlobalFunction = (typeof obj === "function") &&
|
||||
(method === objectOrName);
|
||||
} else {
|
||||
obj = objectOrName;
|
||||
}
|
||||
|
||||
if (isGlobalFunction) {
|
||||
const originalFunction = obj;
|
||||
globalThis[objectOrName] = function(...args) {
|
||||
return originalFunction.apply(this, args);
|
||||
};
|
||||
globalThis[objectOrName].toString = () => originalToString;
|
||||
} else if (obj && typeof obj[method] === "function") {
|
||||
const originalMethod = obj[method];
|
||||
obj[method] = function(...args) {
|
||||
return originalMethod.apply(this, args);
|
||||
};
|
||||
obj[method].toString = () => originalToString;
|
||||
} else {
|
||||
console.warn(
|
||||
`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
*/
|
||||
function hideMonkeyPatch(objectOrName, method, originalToString) {
|
||||
return;
|
||||
}
|
||||
|
||||
// monkey patch fetch
|
||||
const oldFetch = fetch;
|
||||
fetch = async (url, init) => {
|
||||
return oldFetch(rewriteURL(url), init);
|
||||
};
|
||||
hideMonkeyPatch("fetch", "fetch", "function fetch() { [native code] }");
|
||||
|
||||
// monkey patch xmlhttprequest
|
||||
const oldOpen = XMLHttpRequest.prototype.open;
|
||||
XMLHttpRequest.prototype.open = function (
|
||||
method,
|
||||
url,
|
||||
async = true,
|
||||
user = null,
|
||||
password = null,
|
||||
) {
|
||||
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
XMLHttpRequest.prototype,
|
||||
"open",
|
||||
'function(){if("function"==typeof eo)return eo.apply(this,arguments)}',
|
||||
);
|
||||
|
||||
const oldSend = XMLHttpRequest.prototype.send;
|
||||
XMLHttpRequest.prototype.send = function (method, url) {
|
||||
return oldSend.call(this, method, rewriteURL(url));
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
XMLHttpRequest.prototype,
|
||||
"send",
|
||||
'function(){if("function"==typeof eo)return eo.apply(this,arguments)}',
|
||||
);
|
||||
|
||||
// monkey patch service worker registration
|
||||
const oldRegister = ServiceWorkerContainer.prototype.register;
|
||||
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
|
||||
return oldRegister.call(this, rewriteURL(scriptURL), options);
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
ServiceWorkerContainer.prototype,
|
||||
"register",
|
||||
"function register() { [native code] }",
|
||||
);
|
||||
|
||||
// monkey patch URL.toString() method
|
||||
const oldToString = URL.prototype.toString;
|
||||
URL.prototype.toString = function () {
|
||||
let originalURL = oldToString.call(this);
|
||||
return rewriteURL(originalURL);
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
URL.prototype,
|
||||
"toString",
|
||||
"function toString() { [native code] }",
|
||||
);
|
||||
|
||||
// monkey patch URL.toJSON() method
|
||||
const oldToJson = URL.prototype.toString;
|
||||
URL.prototype.toString = function () {
|
||||
let originalURL = oldToJson.call(this);
|
||||
return rewriteURL(originalURL);
|
||||
};
|
||||
hideMonkeyPatch(
|
||||
URL.prototype,
|
||||
"toString",
|
||||
"function toJSON() { [native code] }",
|
||||
);
|
||||
|
||||
// Monkey patch URL.href getter and setter
|
||||
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(
|
||||
URL.prototype,
|
||||
"href",
|
||||
);
|
||||
Object.defineProperty(URL.prototype, "href", {
|
||||
get: function () {
|
||||
let originalHref = originalHrefDescriptor.get.call(this);
|
||||
return rewriteURL(originalHref);
|
||||
},
|
||||
set: function (newValue) {
|
||||
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
|
||||
},
|
||||
});
|
||||
|
||||
// TODO: do one more pass of this by manually traversing the DOM
|
||||
// AFTER all the JS and page has loaded just in case
|
||||
|
||||
// Monkey patch setter
|
||||
const elements = [
|
||||
{ tag: "a", attribute: "href" },
|
||||
{ tag: "img", attribute: "src" },
|
||||
// { tag: 'img', attribute: 'srcset' }, // TODO: handle srcset
|
||||
{ tag: "script", attribute: "src" },
|
||||
{ tag: "link", attribute: "href" },
|
||||
{ tag: "link", attribute: "icon" },
|
||||
{ tag: "iframe", attribute: "src" },
|
||||
{ tag: "audio", attribute: "src" },
|
||||
{ tag: "video", attribute: "src" },
|
||||
{ tag: "source", attribute: "src" },
|
||||
// { tag: 'source', attribute: 'srcset' }, // TODO: handle srcset
|
||||
{ tag: "embed", attribute: "src" },
|
||||
{ tag: "embed", attribute: "pluginspage" },
|
||||
{ tag: "html", attribute: "manifest" },
|
||||
{ tag: "object", attribute: "src" },
|
||||
{ tag: "input", attribute: "src" },
|
||||
{ tag: "track", attribute: "src" },
|
||||
{ tag: "form", attribute: "action" },
|
||||
{ tag: "area", attribute: "href" },
|
||||
{ tag: "base", attribute: "href" },
|
||||
{ tag: "blockquote", attribute: "cite" },
|
||||
{ tag: "del", attribute: "cite" },
|
||||
{ tag: "ins", attribute: "cite" },
|
||||
{ tag: "q", attribute: "cite" },
|
||||
{ tag: "button", attribute: "formaction" },
|
||||
{ tag: "input", attribute: "formaction" },
|
||||
{ tag: "meta", attribute: "content" },
|
||||
{ tag: "object", attribute: "data" },
|
||||
];
|
||||
|
||||
elements.forEach(({ tag, attribute }) => {
|
||||
const proto = document.createElement(tag).constructor.prototype;
|
||||
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
|
||||
if (descriptor && descriptor.set) {
|
||||
Object.defineProperty(proto, attribute, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
// calling rewriteURL will end up calling a setter for href,
|
||||
// leading to a recusive loop and a Maximum call stack size exceeded
|
||||
// error, so we guard against this with a local semaphore flag
|
||||
const isRewritingSetKey = Symbol.for("isRewritingSet");
|
||||
if (!this[isRewritingSetKey]) {
|
||||
this[isRewritingSetKey] = true;
|
||||
descriptor.set.call(this, rewriteURL(value));
|
||||
//descriptor.set.call(this, value);
|
||||
this[isRewritingSetKey] = false;
|
||||
} else {
|
||||
// Directly set the value without rewriting
|
||||
descriptor.set.call(this, value);
|
||||
}
|
||||
},
|
||||
get() {
|
||||
const isRewritingGetKey = Symbol.for("isRewritingGet");
|
||||
if (!this[isRewritingGetKey]) {
|
||||
this[isRewritingGetKey] = true;
|
||||
let oldURL = descriptor.get.call(this);
|
||||
let newURL = rewriteURL(oldURL);
|
||||
this[isRewritingGetKey] = false;
|
||||
return newURL;
|
||||
} else {
|
||||
return descriptor.get.call(this);
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// monkey-patching Element.setAttribute
|
||||
const originalSetAttribute = Element.prototype.setAttribute;
|
||||
Element.prototype.setAttribute = function (name, value) {
|
||||
const isMatchingElement = elements.some((element) => {
|
||||
return this.tagName.toLowerCase() === element.tag &&
|
||||
name.toLowerCase() === element.attribute;
|
||||
});
|
||||
if (isMatchingElement) {
|
||||
value = rewriteURL(value);
|
||||
}
|
||||
originalSetAttribute.call(this, name, value);
|
||||
};
|
||||
|
||||
// sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters.
|
||||
// in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could
|
||||
// contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links.
|
||||
function rewriteInnerHTML(html, elements) {
|
||||
const isRewritingHTMLKey = Symbol.for("isRewritingHTML");
|
||||
|
||||
// Check if already processing
|
||||
if (document[isRewritingHTMLKey]) {
|
||||
return html;
|
||||
}
|
||||
|
||||
const tempContainer = document.createElement("div");
|
||||
document[isRewritingHTMLKey] = true;
|
||||
|
||||
try {
|
||||
tempContainer.innerHTML = html;
|
||||
|
||||
// Create a map for quick lookup
|
||||
const elementsMap = new Map(elements.map((e) => [e.tag, e.attribute]));
|
||||
|
||||
// Loop-based DOM traversal
|
||||
const nodes = [...tempContainer.querySelectorAll("*")];
|
||||
for (const node of nodes) {
|
||||
const attribute = elementsMap.get(node.tagName.toLowerCase());
|
||||
if (attribute && node.hasAttribute(attribute)) {
|
||||
const originalUrl = node.getAttribute(attribute);
|
||||
const rewrittenUrl = rewriteURL(originalUrl);
|
||||
node.setAttribute(attribute, rewrittenUrl);
|
||||
}
|
||||
}
|
||||
|
||||
return tempContainer.innerHTML;
|
||||
} finally {
|
||||
// Clear the flag
|
||||
document[isRewritingHTMLKey] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Store original setters
|
||||
const originalSetters = {};
|
||||
|
||||
["innerHTML", "outerHTML"].forEach((property) => {
|
||||
const descriptor = Object.getOwnPropertyDescriptor(
|
||||
Element.prototype,
|
||||
property,
|
||||
);
|
||||
if (descriptor && descriptor.set) {
|
||||
originalSetters[property] = descriptor.set;
|
||||
|
||||
Object.defineProperty(Element.prototype, property, {
|
||||
...descriptor,
|
||||
set(value) {
|
||||
const isRewritingHTMLKey = Symbol.for("isRewritingHTML");
|
||||
if (!this[isRewritingHTMLKey]) {
|
||||
this[isRewritingHTMLKey] = true;
|
||||
try {
|
||||
// Use custom logic
|
||||
descriptor.set.call(this, rewriteInnerHTML(value, elements));
|
||||
} finally {
|
||||
this[isRewritingHTMLKey] = false;
|
||||
}
|
||||
} else {
|
||||
// Use original setter in recursive call
|
||||
originalSetters[property].call(this, value);
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
})();
|
||||
33
proxychain/responsemodifiers/patch_google_analytics.go
Normal file
33
proxychain/responsemodifiers/patch_google_analytics.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"io"
|
||||
"ladder/proxychain"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed patch_google_analytics.js
|
||||
var gaPatch string
|
||||
|
||||
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
|
||||
// Some sites will not display content until GA is loaded, so we fake one instead.
|
||||
// Credit to Raymond Hill @ github.com/gorhill/uBlock
|
||||
func PatchGoogleAnalytics() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
|
||||
// preflight check
|
||||
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
|
||||
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
|
||||
if !(isGADomain || isGAPath) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// send modified js payload to client containing
|
||||
// stub functions from patch_google_analytics.js
|
||||
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
|
||||
chain.Response.Body = gaPatchReader
|
||||
chain.Context.Set("content-type", "text/javascript")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
109
proxychain/responsemodifiers/patch_google_analytics.js
Normal file
109
proxychain/responsemodifiers/patch_google_analytics.js
Normal file
@@ -0,0 +1,109 @@
|
||||
// uBlock Origin - a browser extension to block requests.
|
||||
// Copyright (C) 2019-present Raymond Hill
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see {http://www.gnu.org/licenses/}.
|
||||
//
|
||||
// Home: https://github.com/gorhill/uBlock
|
||||
|
||||
(function() {
|
||||
"use strict";
|
||||
// https://developers.google.com/analytics/devguides/collection/analyticsjs/
|
||||
const noopfn = function() {
|
||||
};
|
||||
//
|
||||
const Tracker = function() {
|
||||
};
|
||||
const p = Tracker.prototype;
|
||||
p.get = noopfn;
|
||||
p.set = noopfn;
|
||||
p.send = noopfn;
|
||||
//
|
||||
const w = window;
|
||||
const gaName = w.GoogleAnalyticsObject || "ga";
|
||||
const gaQueue = w[gaName];
|
||||
// https://github.com/uBlockOrigin/uAssets/pull/4115
|
||||
const ga = function() {
|
||||
const len = arguments.length;
|
||||
if (len === 0) return;
|
||||
const args = Array.from(arguments);
|
||||
let fn;
|
||||
let a = args[len - 1];
|
||||
if (a instanceof Object && a.hitCallback instanceof Function) {
|
||||
fn = a.hitCallback;
|
||||
} else if (a instanceof Function) {
|
||||
fn = () => {
|
||||
a(ga.create());
|
||||
};
|
||||
} else {
|
||||
const pos = args.indexOf("hitCallback");
|
||||
if (pos !== -1 && args[pos + 1] instanceof Function) {
|
||||
fn = args[pos + 1];
|
||||
}
|
||||
}
|
||||
if (fn instanceof Function === false) return;
|
||||
try {
|
||||
fn();
|
||||
} catch (ex) {
|
||||
}
|
||||
};
|
||||
ga.create = function() {
|
||||
return new Tracker();
|
||||
};
|
||||
ga.getByName = function() {
|
||||
return new Tracker();
|
||||
};
|
||||
ga.getAll = function() {
|
||||
return [new Tracker()];
|
||||
};
|
||||
ga.remove = noopfn;
|
||||
// https://github.com/uBlockOrigin/uAssets/issues/2107
|
||||
ga.loaded = true;
|
||||
w[gaName] = ga;
|
||||
// https://github.com/gorhill/uBlock/issues/3075
|
||||
const dl = w.dataLayer;
|
||||
if (dl instanceof Object) {
|
||||
if (dl.hide instanceof Object && typeof dl.hide.end === "function") {
|
||||
dl.hide.end();
|
||||
dl.hide.end = () => { };
|
||||
}
|
||||
if (typeof dl.push === "function") {
|
||||
const doCallback = function(item) {
|
||||
if (item instanceof Object === false) return;
|
||||
if (typeof item.eventCallback !== "function") return;
|
||||
setTimeout(item.eventCallback, 1);
|
||||
item.eventCallback = () => { };
|
||||
};
|
||||
dl.push = new Proxy(dl.push, {
|
||||
apply: function(target, thisArg, args) {
|
||||
doCallback(args[0]);
|
||||
return Reflect.apply(target, thisArg, args);
|
||||
},
|
||||
});
|
||||
if (Array.isArray(dl)) {
|
||||
const q = dl.slice();
|
||||
for (const item of q) {
|
||||
doCallback(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// empty ga queue
|
||||
if (gaQueue instanceof Function && Array.isArray(gaQueue.q)) {
|
||||
const q = gaQueue.q.slice();
|
||||
gaQueue.q.length = 0;
|
||||
for (const entry of q) {
|
||||
ga(...entry);
|
||||
}
|
||||
}
|
||||
})();
|
||||
100
proxychain/responsemodifiers/patch_tracker_scripts.go
Normal file
100
proxychain/responsemodifiers/patch_tracker_scripts.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"ladder/proxychain"
|
||||
"log"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
//go:embed vendor/ddg-tracker-surrogates/mapping.json
|
||||
var mappingJSON []byte
|
||||
|
||||
//go:embed vendor/ddg-tracker-surrogates/surrogates/*
|
||||
var surrogateFS embed.FS
|
||||
|
||||
var rules domainRules
|
||||
|
||||
func init() {
|
||||
err := json.Unmarshal([]byte(mappingJSON), &rules)
|
||||
if err != nil {
|
||||
log.Printf("[ERROR]: PatchTrackerScripts: failed to deserialize ladder/proxychain/responsemodifiers/vendor/ddg-tracker-surrogates/mapping.json")
|
||||
}
|
||||
}
|
||||
|
||||
// mapping.json schema
|
||||
type rule struct {
|
||||
RegexRule *regexp.Regexp `json:"regexRule"`
|
||||
Surrogate string `json:"surrogate"`
|
||||
Action string `json:"action,omitempty"`
|
||||
}
|
||||
|
||||
type domainRules map[string][]rule
|
||||
|
||||
func (r *rule) UnmarshalJSON(data []byte) error {
|
||||
type Tmp struct {
|
||||
RegexRule string `json:"regexRule"`
|
||||
Surrogate string `json:"surrogate"`
|
||||
Action string `json:"action,omitempty"`
|
||||
}
|
||||
|
||||
var tmp Tmp
|
||||
if err := json.Unmarshal(data, &tmp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
regex := regexp.MustCompile(tmp.RegexRule)
|
||||
|
||||
r.RegexRule = regex
|
||||
r.Surrogate = tmp.Surrogate
|
||||
r.Action = tmp.Action
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PatchTrackerScripts replaces any request to tracker scripts such as google analytics
|
||||
// with a no-op stub that mocks the API structure of the original scripts they replace.
|
||||
// Some pages depend on the existence of these structures for proper loading, so this may fix
|
||||
// some broken elements.
|
||||
// Surrogate script code borrowed from: DuckDuckGo Privacy Essentials browser extension for Firefox, Chrome. (Apache 2.0 license)
|
||||
func PatchTrackerScripts() proxychain.ResponseModification {
|
||||
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
|
||||
// preflight checks
|
||||
reqURL := chain.Request.URL.String()
|
||||
isTracker := false
|
||||
//
|
||||
|
||||
var surrogateScript io.ReadCloser
|
||||
for domain, domainRules := range rules {
|
||||
for _, rule := range domainRules {
|
||||
if !rule.RegexRule.MatchString(reqURL) {
|
||||
continue
|
||||
}
|
||||
|
||||
// found tracker script, replacing response body with nop stub from
|
||||
// ./vendor/ddg-tracker-surrogates/surrogates/{{rule.Surrogate}}
|
||||
isTracker = true
|
||||
script, err := surrogateFS.Open("vendor/ddg-tracker-surrogates/surrogates/" + rule.Surrogate)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
surrogateScript = io.NopCloser(script)
|
||||
log.Printf("INFO: PatchTrackerScripts :: injecting surrogate for '%s' => 'surrogates/%s'\n", domain, rule.Surrogate)
|
||||
break
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if !isTracker {
|
||||
return nil
|
||||
}
|
||||
|
||||
chain.Response.Body = surrogateScript
|
||||
chain.Context.Set("content-type", "text/javascript")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
36
proxychain/responsemodifiers/rewrite_http_resource_urls.go
Normal file
36
proxychain/responsemodifiers/rewrite_http_resource_urls.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package responsemodifiers
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"ladder/proxychain"
|
||||
"ladder/proxychain/responsemodifiers/rewriters"
|
||||
)
|
||||
|
||||
// RewriteHTMLResourceURLs modifies HTTP responses
|
||||
// to rewrite URLs attributes in HTML content (such as src, href)
|
||||
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
|
||||
// - This function is designed to allow the proxified page
|
||||
// to still be browsible by routing all resource URLs through the proxy.
|
||||
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// don't add rewriter if it's not even html
|
||||
ct := chain.Response.Header.Get("content-type")
|
||||
if !strings.HasPrefix(ct, "text/html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
|
||||
originalURI := chain.Context.Request().URI()
|
||||
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
|
||||
|
||||
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
|
||||
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
|
||||
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
|
||||
chain.Response.Body = htmlRewriter
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
(() => {
|
||||
document.addEventListener("DOMContentLoaded", (event) => {
|
||||
initIdleMutationObserver();
|
||||
});
|
||||
|
||||
function initIdleMutationObserver() {
|
||||
let debounceTimer;
|
||||
const debounceDelay = 500; // adjust the delay as needed
|
||||
|
||||
const observer = new MutationObserver((mutations) => {
|
||||
// Clear the previous timer and set a new one
|
||||
clearTimeout(debounceTimer);
|
||||
debounceTimer = setTimeout(() => {
|
||||
execute();
|
||||
observer.disconnect(); // Disconnect after first execution
|
||||
}, debounceDelay);
|
||||
});
|
||||
|
||||
const config = { attributes: false, childList: true, subtree: true };
|
||||
observer.observe(document.body, config);
|
||||
}
|
||||
|
||||
function execute() {
|
||||
"{{AFTER_DOM_IDLE_SCRIPT}}";
|
||||
//console.log('DOM is now idle. Executing...');
|
||||
}
|
||||
})();
|
||||
|
||||
3
proxychain/responsemodifiers/rewriters/css_rewriter.go
Normal file
3
proxychain/responsemodifiers/rewriters/css_rewriter.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package rewriters
|
||||
|
||||
// todo: implement
|
||||
133
proxychain/responsemodifiers/rewriters/html_rewriter.go
Normal file
133
proxychain/responsemodifiers/rewriters/html_rewriter.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
|
||||
type IHTMLTokenRewriter interface {
|
||||
// ShouldModify determines whether a given HTML token requires modification.
|
||||
ShouldModify(*html.Token) bool
|
||||
|
||||
// ModifyToken applies modifications to a given HTML token.
|
||||
// It returns strings representing content to be prepended and
|
||||
// appended to the token. If no modifications are required or if an error occurs,
|
||||
// it returns empty strings for both 'prepend' and 'append'.
|
||||
// Note: The original token is not modified if an error occurs.
|
||||
ModifyToken(*html.Token) (prepend, append string)
|
||||
}
|
||||
|
||||
// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
|
||||
// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
|
||||
//
|
||||
// - HTMLRewriter reads the http.Response.Body stream,
|
||||
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
|
||||
//
|
||||
// - When ProxyChain.Execute() is called, the response body will be read from the server
|
||||
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
|
||||
// without ever buffering the entire HTTP response in memory.
|
||||
type HTMLRewriter struct {
|
||||
tokenizer *html.Tokenizer
|
||||
currentToken *html.Token
|
||||
tokenBuffer *bytes.Buffer
|
||||
currentTokenProcessed bool
|
||||
rewriters []IHTMLTokenRewriter
|
||||
}
|
||||
|
||||
// NewHTMLRewriter creates a new HTMLRewriter instance.
|
||||
// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
|
||||
// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
|
||||
// applies its specific modifications to the HTML tokens.
|
||||
// The HTMLRewriter reads from the provided 'src', applies the modifications,
|
||||
// and returns the processed content as a new io.ReadCloser.
|
||||
// This new io.ReadCloser can be used to stream the modified content back to the client.
|
||||
//
|
||||
// Parameters:
|
||||
// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
|
||||
// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
|
||||
func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
|
||||
return &HTMLRewriter{
|
||||
tokenizer: html.NewTokenizer(src),
|
||||
currentToken: nil,
|
||||
tokenBuffer: new(bytes.Buffer),
|
||||
currentTokenProcessed: false,
|
||||
rewriters: rewriters,
|
||||
}
|
||||
}
|
||||
|
||||
// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
|
||||
func (r *HTMLRewriter) Close() error {
|
||||
r.tokenBuffer.Reset()
|
||||
r.currentToken = nil
|
||||
r.currentTokenProcessed = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
|
||||
func (r *HTMLRewriter) Read(p []byte) (int, error) {
|
||||
if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
|
||||
tokenType := r.tokenizer.Next()
|
||||
|
||||
// done reading html, close out reader
|
||||
if tokenType == html.ErrorToken {
|
||||
if r.tokenizer.Err() == io.EOF {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
return 0, r.tokenizer.Err()
|
||||
}
|
||||
|
||||
// get the next token; reset buffer
|
||||
t := r.tokenizer.Token()
|
||||
r.currentToken = &t
|
||||
r.tokenBuffer.Reset()
|
||||
|
||||
// buffer += "<prepends> <token> <appends>"
|
||||
// process token through all registered rewriters
|
||||
// rewriters will modify the token, and optionally
|
||||
// return a <prepend> or <append> string token
|
||||
appends := make([]string, 0, len(r.rewriters))
|
||||
for _, rewriter := range r.rewriters {
|
||||
if !rewriter.ShouldModify(r.currentToken) {
|
||||
continue
|
||||
}
|
||||
|
||||
prepend, a := rewriter.ModifyToken(r.currentToken)
|
||||
appends = append(appends, a)
|
||||
// add <prepends> to buffer
|
||||
r.tokenBuffer.WriteString(prepend)
|
||||
}
|
||||
|
||||
// add <token> to buffer
|
||||
if tokenType == html.TextToken {
|
||||
// don't unescape textTokens (such as inline scripts).
|
||||
// Token.String() by default will escape the inputs, but
|
||||
// we don't want to modify the original source
|
||||
r.tokenBuffer.WriteString(r.currentToken.Data)
|
||||
} else {
|
||||
r.tokenBuffer.WriteString(r.currentToken.String())
|
||||
}
|
||||
|
||||
// add <appends> to buffer
|
||||
for _, a := range appends {
|
||||
r.tokenBuffer.WriteString(a)
|
||||
}
|
||||
|
||||
r.currentTokenProcessed = false
|
||||
}
|
||||
|
||||
n, err := r.tokenBuffer.Read(p)
|
||||
if err == io.EOF || r.tokenBuffer.Len() == 0 {
|
||||
r.currentTokenProcessed = true
|
||||
err = nil // EOF in this context is expected and not an actual error
|
||||
}
|
||||
|
||||
return n, err
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html/atom"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
rewriteAttrs map[string]map[string]bool
|
||||
specialRewriteAttrs map[string]map[string]bool
|
||||
schemeBlacklist map[string]bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
// define all tag/attributes which might contain URLs
|
||||
// to attempt to rewrite to point to proxy instead
|
||||
rewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
|
||||
"a": {"href": true},
|
||||
"form": {"action": true},
|
||||
"link": {"href": true, "manifest": true, "icon": true},
|
||||
"script": {"src": true},
|
||||
"video": {"src": true, "poster": true},
|
||||
"audio": {"src": true},
|
||||
"iframe": {"src": true, "longdesc": true},
|
||||
"embed": {"src": true},
|
||||
"object": {"data": true, "codebase": true},
|
||||
"source": {"src": true, "srcset": true},
|
||||
"track": {"src": true},
|
||||
"area": {"href": true},
|
||||
"base": {"href": true},
|
||||
"blockquote": {"cite": true},
|
||||
"del": {"cite": true},
|
||||
"ins": {"cite": true},
|
||||
"q": {"cite": true},
|
||||
"body": {"background": true},
|
||||
"button": {"formaction": true},
|
||||
"input": {"src": true, "formaction": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// might contain URL but requires special handling
|
||||
specialRewriteAttrs = map[string]map[string]bool{
|
||||
"img": {"srcset": true},
|
||||
"source": {"srcset": true},
|
||||
"meta": {"content": true},
|
||||
}
|
||||
|
||||
// define URIs to NOT rewrite
|
||||
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
|
||||
schemeBlacklist = map[string]bool{
|
||||
"data": true,
|
||||
"tel": true,
|
||||
"mailto": true,
|
||||
"file": true,
|
||||
"blob": true,
|
||||
"javascript": true,
|
||||
"about": true,
|
||||
"magnet": true,
|
||||
"ws": true,
|
||||
"wss": true,
|
||||
"ftp": true,
|
||||
}
|
||||
}
|
||||
|
||||
// HTMLTokenURLRewriter implements HTMLTokenRewriter
|
||||
// it rewrites URLs within HTML resources to use a specified proxy URL.
|
||||
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
|
||||
type HTMLTokenURLRewriter struct {
|
||||
baseURL *url.URL
|
||||
proxyURL string // ladder URL, not proxied site URL
|
||||
}
|
||||
|
||||
// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
|
||||
// It initializes the tokenizer with the provided source and sets the proxy URL.
|
||||
// baseURL might be https://medium.com/foobar
|
||||
// proxyURL is http://localhost:8080
|
||||
func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
|
||||
return &HTMLTokenURLRewriter{
|
||||
baseURL: baseURL,
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
|
||||
// fmt.Printf("touch token: %s\n", token.String())
|
||||
attrLen := len(token.Attr)
|
||||
if attrLen == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if token.Type == html.StartTagToken {
|
||||
return true
|
||||
}
|
||||
|
||||
if token.Type == html.SelfClosingTagToken {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
|
||||
for i := range token.Attr {
|
||||
attr := &token.Attr[i]
|
||||
|
||||
switch {
|
||||
// don't touch tag/attributes that don't contain URIs
|
||||
case !rewriteAttrs[token.Data][attr.Key]:
|
||||
continue
|
||||
// don't touch attributes with special URIs (like data:)
|
||||
case schemeBlacklist[strings.Split(attr.Val, ":")[0]]:
|
||||
continue
|
||||
// don't double-overwrite the url
|
||||
case strings.HasPrefix(attr.Val, r.proxyURL):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/http://"):
|
||||
continue
|
||||
case strings.HasPrefix(attr.Val, "/https://"):
|
||||
continue
|
||||
// handle special rewrites
|
||||
case specialRewriteAttrs[token.Data][attr.Key]:
|
||||
r.handleSpecialAttr(token, attr, r.baseURL)
|
||||
continue
|
||||
default:
|
||||
// rewrite url
|
||||
handleURLPart(attr, r.baseURL)
|
||||
}
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// dispatcher for ModifyURL based on URI type
|
||||
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
case strings.HasPrefix(attr.Val, "//"):
|
||||
handleProtocolRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "/"):
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "https://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
case strings.HasPrefix(attr.Val, "http://"):
|
||||
handleAbsolutePath(attr, baseURL)
|
||||
default:
|
||||
handleDocumentRelativePath(attr, baseURL)
|
||||
}
|
||||
}
|
||||
|
||||
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
|
||||
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
attr.Val = strings.TrimPrefix(attr.Val, "/")
|
||||
handleRootRelativePath(attr, baseURL)
|
||||
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Root-relative URLs: These are relative to the root path and start with a "/".
|
||||
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
// Skip processing if it's already in the correct format
|
||||
if strings.HasPrefix(attr.Val, "/http://") || strings.HasPrefix(attr.Val, "/https://") {
|
||||
return
|
||||
}
|
||||
|
||||
// doublecheck this is a valid relative URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
// log.Printf("BASEURL patch: %s\n", baseURL)
|
||||
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
baseURL.Host,
|
||||
strings.TrimPrefix(attr.Val, "/"),
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
|
||||
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
|
||||
if strings.HasPrefix(attr.Val, "#") {
|
||||
return
|
||||
}
|
||||
|
||||
relativePath := path.Join(strings.Trim(baseURL.RawPath, "/"), strings.Trim(attr.Val, "/"))
|
||||
attr.Val = fmt.Sprintf(
|
||||
"%s://%s/%s",
|
||||
baseURL.Scheme,
|
||||
strings.Trim(baseURL.Host, "/"),
|
||||
relativePath,
|
||||
)
|
||||
attr.Val = escape(attr.Val)
|
||||
attr.Val = fmt.Sprintf("/%s", attr.Val)
|
||||
|
||||
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// full URIs beginning with https?://proxiedsite.com
|
||||
func handleAbsolutePath(attr *html.Attribute, _ *url.URL) {
|
||||
// check if valid URL
|
||||
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
|
||||
|
||||
u, err := url.Parse(attr.Val)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if !(u.Scheme == "http" || u.Scheme == "https") {
|
||||
return
|
||||
}
|
||||
|
||||
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
|
||||
// attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
|
||||
|
||||
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
|
||||
}
|
||||
|
||||
// handle edge cases for special attributes
|
||||
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
|
||||
switch {
|
||||
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
|
||||
case token.DataAtom == atom.Img && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
case token.DataAtom == atom.Source && attr.Key == "srcset":
|
||||
handleSrcSet(attr, baseURL)
|
||||
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
|
||||
case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
|
||||
handleMetaRefresh(attr, baseURL)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
|
||||
sec := strings.Split(attr.Val, ";url=")[0]
|
||||
url := strings.Split(attr.Val, ";url=")[1]
|
||||
f := &html.Attribute{Val: url, Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
attr.Val = fmt.Sprintf("%s;url=%s", sec, f.Val)
|
||||
}
|
||||
|
||||
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
|
||||
var srcSetBuilder strings.Builder
|
||||
srcSetItems := strings.Split(attr.Val, ",")
|
||||
|
||||
for i, srcItem := range srcSetItems {
|
||||
srcParts := strings.Fields(srcItem)
|
||||
|
||||
if len(srcParts) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
f := &html.Attribute{Val: srcParts[0], Key: "src"}
|
||||
handleURLPart(f, baseURL)
|
||||
|
||||
if i > 0 {
|
||||
srcSetBuilder.WriteString(", ")
|
||||
}
|
||||
|
||||
srcSetBuilder.WriteString(f.Val)
|
||||
if len(srcParts) > 1 {
|
||||
srcSetBuilder.WriteString(" ")
|
||||
srcSetBuilder.WriteString(strings.Join(srcParts[1:], " "))
|
||||
}
|
||||
}
|
||||
|
||||
attr.Val = srcSetBuilder.String()
|
||||
}
|
||||
|
||||
func escape(str string) string {
|
||||
// return str
|
||||
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
package rewriters
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// ScriptInjectorRewriter implements HTMLTokenRewriter
|
||||
// ScriptInjectorRewriter is a struct that injects JS into the page
|
||||
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
|
||||
type ScriptInjectorRewriter struct {
|
||||
execTime ScriptExecTime
|
||||
script string
|
||||
}
|
||||
|
||||
type ScriptExecTime int
|
||||
|
||||
const (
|
||||
BeforeDOMContentLoaded ScriptExecTime = iota
|
||||
AfterDOMContentLoaded
|
||||
AfterDOMIdle
|
||||
)
|
||||
|
||||
func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
|
||||
// modify if token == <head>
|
||||
return token.DataAtom == atom.Head && token.Type == html.StartTagToken
|
||||
}
|
||||
|
||||
//go:embed after_dom_idle_script_injector.js
|
||||
var afterDomIdleScriptInjector string
|
||||
|
||||
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
|
||||
switch {
|
||||
case r.execTime == BeforeDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
|
||||
|
||||
case r.execTime == AfterDOMContentLoaded:
|
||||
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
|
||||
|
||||
case r.execTime == AfterDOMIdle:
|
||||
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
|
||||
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
|
||||
|
||||
default:
|
||||
return "", ""
|
||||
}
|
||||
}
|
||||
|
||||
// applies parameters by string replacement of the template script
|
||||
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
|
||||
// Sort the keys by length in descending order
|
||||
keys := make([]string, 0, len(params))
|
||||
for key := range params {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
return len(keys[i]) > len(keys[j])
|
||||
})
|
||||
|
||||
for _, key := range keys {
|
||||
r.script = strings.ReplaceAll(r.script, key, params[key])
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
|
||||
return &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
}
|
||||
|
||||
// NewScriptInjectorRewriterWith implements a HtmlTokenRewriter
|
||||
// and injects JS into the page for execution at a particular time
|
||||
// accepting arguments into the script, which will be added via a string replace
|
||||
// the params map represents the key-value pair of the params.
|
||||
// the key will be string replaced with the value
|
||||
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
|
||||
rr := &ScriptInjectorRewriter{
|
||||
execTime: execTime,
|
||||
script: script,
|
||||
}
|
||||
rr.applyParams(params)
|
||||
return rr
|
||||
}
|
||||
1
proxychain/responsemodifiers/vendor/ddg-tracker-surrogates
vendored
Submodule
1
proxychain/responsemodifiers/vendor/ddg-tracker-surrogates
vendored
Submodule
Submodule proxychain/responsemodifiers/vendor/ddg-tracker-surrogates added at ba0d8cefe4
Reference in New Issue
Block a user