fix "modifer" -> "modifier" typo everywhere

This commit is contained in:
Kevin Pham
2023-12-03 17:04:30 -06:00
parent 117ded5653
commit 6c0721dcb8
67 changed files with 73 additions and 77 deletions

View File

@@ -0,0 +1,56 @@
package api
import (
"bytes"
"encoding/json"
"errors"
"io"
"reflect"
)
type Error struct {
Success bool `json:"success"`
Error ErrorDetails `json:"error"`
}
type ErrorDetails struct {
Message string `json:"message"`
Type string `json:"type"`
Cause string `json:"cause"`
}
func CreateAPIErrReader(err error) io.ReadCloser {
if err == nil {
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "No error provided"}`))
}
baseErr := getBaseError(err)
apiErr := Error{
Success: false,
Error: ErrorDetails{
Message: err.Error(),
Type: reflect.TypeOf(err).String(),
Cause: baseErr.Error(),
},
}
// Serialize the APIError into JSON
jsonData, jsonErr := json.Marshal(apiErr)
if jsonErr != nil {
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "Failed to serialize error"}`))
}
// Return the JSON data as an io.ReadCloser
return io.NopCloser(bytes.NewBuffer(jsonData))
}
func getBaseError(err error) error {
for {
unwrapped := errors.Unwrap(err)
if unwrapped == nil {
return err
}
err = unwrapped
}
}

View File

@@ -0,0 +1,174 @@
package api
import (
"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura"
"golang.org/x/net/html"
)
// =======================================================================================
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
type ImageContent struct {
Type string `json:"type"`
URL string `json:"url"`
Alt string `json:"alt"`
Caption string `json:"caption"`
}
type LinkContent struct {
Type string `json:"type"`
Href string `json:"href"`
Data string `json:"data"`
}
type TextContent struct {
Type string `json:"type"`
Data string `json:"data"`
}
type ListContent struct {
Type string `json:"type"`
ListItems []ListItemContent `json:"listItems"`
}
type ListItemContent struct {
Data string `json:"data"`
}
type JSONDocument struct {
Success bool `json:"success"`
Error ErrorDetails `json:"error"`
Metadata struct {
Title string `json:"title"`
Author string `json:"author"`
URL string `json:"url"`
Hostname string `json:"hostname"`
Image string `json:"image"`
Description string `json:"description"`
Sitename string `json:"sitename"`
Date string `json:"date"`
Categories []string `json:"categories"`
Tags []string `json:"tags"`
License string `json:"license"`
} `json:"metadata"`
Content []interface{} `json:"content"`
Comments string `json:"comments"`
}
func ExtractResultToAPIResponse(extract *trafilatura.ExtractResult) *JSONDocument {
jsonDoc := &JSONDocument{}
// Populate success
jsonDoc.Success = true
// Populate metadata
jsonDoc.Metadata.Title = extract.Metadata.Title
jsonDoc.Metadata.Author = extract.Metadata.Author
jsonDoc.Metadata.URL = extract.Metadata.URL
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
jsonDoc.Metadata.Description = extract.Metadata.Description
jsonDoc.Metadata.Image = extract.Metadata.Image
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
jsonDoc.Metadata.Categories = extract.Metadata.Categories
jsonDoc.Metadata.Tags = extract.Metadata.Tags
jsonDoc.Metadata.License = extract.Metadata.License
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
// Populate content
if extract.ContentNode != nil {
jsonDoc.Content = parseContent(extract.ContentNode)
}
// Populate comments
if extract.CommentsNode != nil {
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
}
return jsonDoc
}
func parseContent(node *html.Node) []interface{} {
var content []interface{}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Data {
case "img":
image := ImageContent{
Type: "img",
URL: dom.GetAttribute(child, "src"),
Alt: dom.GetAttribute(child, "alt"),
Caption: dom.GetAttribute(child, "caption"),
}
content = append(content, image)
case "a":
link := LinkContent{
Type: "a",
Href: dom.GetAttribute(child, "href"),
Data: dom.InnerText(child),
}
content = append(content, link)
case "h1":
text := TextContent{
Type: "h1",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h2":
text := TextContent{
Type: "h2",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h3":
text := TextContent{
Type: "h3",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h4":
text := TextContent{
Type: "h4",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h5":
text := TextContent{
Type: "h5",
Data: dom.InnerText(child),
}
content = append(content, text)
case "ul", "ol":
list := ListContent{
Type: child.Data,
ListItems: []ListItemContent{},
}
for listItem := child.FirstChild; listItem != nil; listItem = listItem.NextSibling {
if listItem.Data == "li" {
listItemContent := ListItemContent{
Data: dom.InnerText(listItem),
}
list.ListItems = append(list.ListItems, listItemContent)
}
}
content = append(content, list)
default:
text := TextContent{
Type: "p",
Data: dom.InnerText(child),
}
content = append(content, text)
}
}
return content
}

View File

@@ -0,0 +1,47 @@
package responsemodifiers
import (
"bytes"
"encoding/json"
"io"
"github.com/markusmobius/go-trafilatura"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/api"
)
// APIContent creates an JSON representation of the article and returns it as an API response.
func APIContent() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// we set content-type twice here, in case another response modifier
// tries to forward over the original headers
chain.Context.Set("content-type", "application/json")
chain.Response.Header.Set("content-type", "application/json")
// extract dom contents
opts := trafilatura.Options{
IncludeImages: true,
IncludeLinks: true,
// FavorPrecision: true,
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
OriginalURL: chain.Request.URL,
}
result, err := trafilatura.Extract(chain.Response.Body, opts)
if err != nil {
chain.Response.Body = api.CreateAPIErrReader(err)
return nil
}
res := api.ExtractResultToAPIResponse(result)
jsonData, err := json.MarshalIndent(res, "", " ")
if err != nil {
return err
}
chain.Response.Body = io.NopCloser(bytes.NewReader(jsonData))
return nil
}
}

View File

@@ -0,0 +1,70 @@
package responsemodifiers
import (
"encoding/json"
"fmt"
"io"
"net/url"
"testing"
"ladder/proxychain/responsemodifiers/api"
)
func TestCreateAPIErrReader(t *testing.T) {
_, baseErr := url.Parse("://this is an invalid url")
wrappedErr := fmt.Errorf("wrapped error: %w", baseErr)
readCloser := api.CreateAPIErrReader(wrappedErr)
defer readCloser.Close()
// Read and unmarshal the JSON output
data, err := io.ReadAll(readCloser)
if err != nil {
t.Fatalf("Failed to read from ReadCloser: %v", err)
}
fmt.Println(string(data))
var apiErr api.Error
err = json.Unmarshal(data, &apiErr)
if err != nil {
t.Fatalf("Failed to unmarshal JSON: %v", err)
}
// Verify the structure of the APIError
if apiErr.Success {
t.Errorf("Expected Success to be false, got true")
}
if apiErr.Error.Message != wrappedErr.Error() {
t.Errorf("Expected error message to be '%v', got '%v'", wrappedErr.Error(), apiErr.Error.Message)
}
}
func TestCreateAPIErrReader2(t *testing.T) {
_, baseErr := url.Parse("://this is an invalid url")
readCloser := api.CreateAPIErrReader(baseErr)
defer readCloser.Close()
// Read and unmarshal the JSON output
data, err := io.ReadAll(readCloser)
if err != nil {
t.Fatalf("Failed to read from ReadCloser: %v", err)
}
fmt.Println(string(data))
var apiErr api.Error
err = json.Unmarshal(data, &apiErr)
if err != nil {
t.Fatalf("Failed to unmarshal JSON: %v", err)
}
// Verify the structure of the APIError
if apiErr.Success {
t.Errorf("Expected Success to be false, got true")
}
if apiErr.Error.Message != baseErr.Error() {
t.Errorf("Expected error message to be '%v', got '%v'", baseErr.Error(), apiErr.Error.Message)
}
}

View File

@@ -0,0 +1,42 @@
package responsemodifiers
import (
_ "embed"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
//go:embed block_element_removal.js
var blockElementRemoval string
// BlockElementRemoval prevents paywall javascript from removing a
// particular element by detecting the removal, then immediately reinserting it.
// This is useful when a page will return a "fake" 404, after flashing the content briefly.
// If the /outline/ API works, but the regular API doesn't, try this modifier.
func BlockElementRemoval(cssSelector string) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
params := map[string]string{
// ie: "div.article-content"
"{{CSS_SELECTOR}}": cssSelector,
}
rr := rewriters.NewScriptInjectorRewriterWithParams(
blockElementRemoval,
rewriters.BeforeDOMContentLoaded,
params,
)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}

View File

@@ -0,0 +1,35 @@
/**
* Monitors and restores specific DOM elements if they are removed.
*
* This self-invoking function creates a MutationObserver to watch for removal of elements matching
* "{{CSS_SELECTOR}}". If such an element is removed, it logs the event and attempts to restore the
* element after a 50ms delay. The restored element is reinserted at its original location or prepended
* to the document body if the original location is unavailable.
*/
(function() {
function handleMutation(mutationList) {
for (const mutation of mutationList) {
if (mutation.type === "childList") {
for (const node of Array.from(mutation.removedNodes)) {
if (node.outerHTML && node.querySelector("{{CSS_SELECTOR}}")) {
console.log(
"proxychain: prevented removal of element containing 'article-content'",
);
console.log(node.outerHTML);
setTimeout(() => {
let e = document.querySelector("{{CSS_SELECTOR}}");
if (e != null) {
e.replaceWith(node);
} else {
document.body.prepend(node);
}
}, 50);
}
}
}
}
}
const observer = new MutationObserver(handleMutation);
observer.observe(document, { childList: true, subtree: true });
})();

View File

@@ -0,0 +1,21 @@
package responsemodifiers
import (
"ladder/proxychain"
)
// BypassCORS modifies response headers to prevent the browser
// from enforcing any CORS restrictions. This should run at the end of the chain.
func BypassCORS() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
chain.AddOnceResponseModifications(
SetResponseHeader("Access-Control-Allow-Origin", "*"),
SetResponseHeader("Access-Control-Expose-Headers", "*"),
SetResponseHeader("Access-Control-Allow-Credentials", "true"),
SetResponseHeader("Access-Control-Allow-Methods", "GET, PUT, POST, DELETE, HEAD, OPTIONS, PATCH"),
SetResponseHeader("Access-Control-Allow-Headers", "*"),
DeleteResponseHeader("X-Frame-Options"),
)
return nil
}
}

View File

@@ -0,0 +1,30 @@
package responsemodifiers
import (
"ladder/proxychain"
)
// TODO: handle edge case where CSP is specified in meta tag:
// <meta http-equiv="Content-Security-Policy" content="default-src 'self'">
// BypassContentSecurityPolicy modifies response headers to prevent the browser
// from enforcing any CSP restrictions. This should run at the end of the chain.
func BypassContentSecurityPolicy() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
chain.AddOnceResponseModifications(
DeleteResponseHeader("Content-Security-Policy"),
DeleteResponseHeader("Content-Security-Policy-Report-Only"),
DeleteResponseHeader("X-Content-Security-Policy"),
DeleteResponseHeader("X-WebKit-CSP"),
)
return nil
}
}
// SetContentSecurityPolicy modifies response headers to a specific CSP
func SetContentSecurityPolicy(csp string) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
chain.Response.Header.Set("Content-Security-Policy", csp)
return nil
}
}

View File

@@ -0,0 +1,53 @@
package responsemodifiers
import (
"fmt"
"net/url"
"strings"
"ladder/proxychain"
)
var forwardBlacklist map[string]bool
func init() {
forwardBlacklist = map[string]bool{
"content-length": true,
"content-encoding": true,
"transfer-encoding": true,
"strict-transport-security": true,
"connection": true,
"keep-alive": true,
}
}
// ForwardResponseHeaders forwards the response headers from the upstream server to the client
func ForwardResponseHeaders() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// fmt.Println(chain.Response.Header)
for uname, headers := range chain.Response.Header {
name := strings.ToLower(uname)
if forwardBlacklist[name] {
continue
}
// patch location header to forward to proxy instead
if name == "location" {
u, err := url.Parse(chain.Context.BaseURL())
if err != nil {
return err
}
newLocation := fmt.Sprintf("%s://%s/%s", u.Scheme, u.Host, headers[0])
chain.Context.Set("location", newLocation)
}
// forward headers
for _, value := range headers {
fmt.Println(name, value)
chain.Context.Set(name, value)
}
}
return nil
}
}

View File

@@ -0,0 +1,189 @@
package responsemodifiers
import (
"bytes"
"embed"
"fmt"
"html/template"
"io"
"log"
"net/url"
"strings"
"ladder/proxychain"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
//"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura"
)
//go:embed generate_readable_outline.html
var templateFS embed.FS
// GenerateReadableOutline creates an reader-friendly distilled representation of the article.
// This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM.
func GenerateReadableOutline() proxychain.ResponseModification {
// get template only once, and resuse for subsequent calls
f := "generate_readable_outline.html"
tmpl, err := template.ParseFS(templateFS, f)
if err != nil {
panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f))
}
return func(chain *proxychain.ProxyChain) error {
// ===========================================================
// 1. extract dom contents using reading mode algo
// ===========================================================
opts := trafilatura.Options{
IncludeImages: false,
IncludeLinks: true,
FavorRecall: true,
Deduplicate: true,
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
OriginalURL: chain.Request.URL,
}
extract, err := trafilatura.Extract(chain.Response.Body, opts)
if err != nil {
return err
}
// ============================================================================
// 2. render generate_readable_outline.html template using metadata from step 1
// ============================================================================
// render DOM to string without H1 title
removeFirstH1(extract.ContentNode)
// rewrite all links to stay on /outline/ path
rewriteHrefLinks(extract.ContentNode, chain.Context.BaseURL(), chain.APIPrefix)
var b bytes.Buffer
html.Render(&b, extract.ContentNode)
distilledHTML := b.String()
// populate template parameters
data := map[string]interface{}{
"Success": true,
"Image": extract.Metadata.Image,
"Description": extract.Metadata.Description,
"Sitename": extract.Metadata.Sitename,
"Hostname": extract.Metadata.Hostname,
"Url": "/" + chain.Request.URL.String(),
"Title": extract.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
"Date": extract.Metadata.Date.String(),
"Author": createWikipediaSearchLinks(extract.Metadata.Author),
//"Author": extract.Metadata.Author,
"Body": distilledHTML,
}
// ============================================================================
// 3. queue sending the response back to the client by replacing the response body
// (the response body will be read as a stream in proxychain.Execute() later on.)
// ============================================================================
pr, pw := io.Pipe() // pipe io.writer contents into io.reader
// Use a goroutine for writing to the pipe so we don't deadlock the request
go func() {
defer pw.Close()
err := tmpl.Execute(pw, data) // <- render template
if err != nil {
log.Printf("WARN: GenerateReadableOutline template rendering error: %s\n", err)
}
}()
chain.Context.Set("content-type", "text/html")
chain.Response.Body = pr // <- replace response body reader with our new reader from pipe
return nil
}
}
// =============================================
// DOM Rendering helpers
// =============================================
func removeFirstH1(n *html.Node) {
var recurse func(*html.Node) bool
recurse = func(n *html.Node) bool {
if n.Type == html.ElementNode && n.DataAtom == atom.H1 {
return true // Found the first H1, return true to stop
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if recurse(c) {
n.RemoveChild(c)
return false // Removed first H1, no need to continue
}
}
return false
}
recurse(n)
}
func rewriteHrefLinks(n *html.Node, baseURL string, apiPath string) {
u, err := url.Parse(baseURL)
if err != nil {
log.Printf("GenerateReadableOutline :: rewriteHrefLinks error - %s\n", err)
}
apiPath = strings.Trim(apiPath, "/")
proxyURL := fmt.Sprintf("%s://%s", u.Scheme, u.Host)
newProxyURL := fmt.Sprintf("%s/%s", proxyURL, apiPath)
var recurse func(*html.Node) bool
recurse = func(n *html.Node) bool {
if n.Type == html.ElementNode && n.DataAtom == atom.A {
for i := range n.Attr {
attr := n.Attr[i]
if attr.Key != "href" {
continue
}
// rewrite url on a.href: http://localhost:8080/https://example.com -> http://localhost:8080/outline/https://example.com
attr.Val = strings.Replace(attr.Val, proxyURL, newProxyURL, 1)
// rewrite relative URLs too
if strings.HasPrefix(attr.Val, "/") {
attr.Val = fmt.Sprintf("/%s%s", apiPath, attr.Val)
}
n.Attr[i].Val = attr.Val
log.Println(attr.Val)
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
recurse(c)
}
return false
}
recurse(n)
}
// createWikipediaSearchLinks takes in comma or semicolon separated terms,
// then turns them into <a> links searching for the term.
func createWikipediaSearchLinks(searchTerms string) string {
semiColonSplit := strings.Split(searchTerms, ";")
var links []string
for i, termGroup := range semiColonSplit {
commaSplit := strings.Split(termGroup, ",")
for _, term := range commaSplit {
trimmedTerm := strings.TrimSpace(term)
if trimmedTerm == "" {
continue
}
encodedTerm := url.QueryEscape(trimmedTerm)
wikiURL := fmt.Sprintf("https://en.wikipedia.org/w/index.php?search=%s", encodedTerm)
link := fmt.Sprintf("<a href=\"%s\">%s</a>", wikiURL, trimmedTerm)
links = append(links, link)
}
// If it's not the last element in semiColonSplit, add a comma to the last link
if i < len(semiColonSplit)-1 {
links[len(links)-1] = links[len(links)-1] + ","
}
}
return strings.Join(links, " ")
}

View File

@@ -0,0 +1,380 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/styles.css" />
<script src="/script.js" defer></script>
<script>
const handleThemeChange = () => {
let theme = localStorage.getItem("theme");
if (theme === null) {
localStorage.setItem("theme", "system");
theme = "system";
}
if (
theme === "dark" ||
(theme === "system" &&
window.matchMedia("(prefers-color-scheme: dark)").matches)
) {
document.documentElement.classList.add("dark");
} else {
document.documentElement.classList.remove("dark");
}
};
handleThemeChange();
</script>
<title>ladder | {{.Title}}</title>
</head>
<body
class="antialiased bg-white dark:bg-slate-900 text-slate-900 dark:text-slate-200"
>
<div class="flex flex-col gap-4 max-w-3xl mx-4 lg:mx-auto pt-10">
<div class="flex justify-between place-items-center">
<div
class="hover:drop-shadow-[0_0px_4px_rgba(122,167,209,.3)] transition-colors duration-300 focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
>
<div class="flex">
<a
href="/"
class="flex -ml-2 h-8 font-extrabold tracking-tight hover:no-underline focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
>
<svg
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 512 512"
class="h-8 focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
>
<path
fill="#7AA7D1"
d="M262.074 485.246C254.809 485.265 247.407 485.534 240.165 484.99L226.178 483.306C119.737 468.826 34.1354 383.43 25.3176 274.714C24.3655 262.975 23.5876 253.161 24.3295 241.148C31.4284 126.212 123.985 31.919 238.633 24.1259L250.022 23.8366C258.02 23.8001 266.212 23.491 274.183 24.1306C320.519 27.8489 366.348 45.9743 402.232 75.4548L416.996 88.2751C444.342 114.373 464.257 146.819 475.911 182.72L480.415 197.211C486.174 219.054 488.67 242.773 487.436 265.259L486.416 275.75C478.783 352.041 436.405 418.1 369.36 455.394L355.463 462.875C326.247 477.031 294.517 484.631 262.074 485.246ZM253.547 72.4475C161.905 73.0454 83.5901 144.289 73.0095 234.5C69.9101 260.926 74.7763 292.594 83.9003 317.156C104.53 372.691 153.9 416.616 211.281 430.903C226.663 434.733 242.223 436.307 258.044 436.227C353.394 435.507 430.296 361.835 438.445 267.978C439.794 252.442 438.591 236.759 435.59 221.5C419.554 139.955 353.067 79.4187 269.856 72.7052C264.479 72.2714 258.981 72.423 253.586 72.4127L253.547 72.4475Z"
/>
<path
fill="#7AA7D1"
d="M153.196 310.121L133.153 285.021C140.83 283.798 148.978 285.092 156.741 284.353L156.637 277.725L124.406 278.002C123.298 277.325 122.856 276.187 122.058 275.193L116.089 267.862C110.469 260.975 103.827 254.843 98.6026 247.669C103.918 246.839 105.248 246.537 111.14 246.523L129.093 246.327C130.152 238.785 128.62 240.843 122.138 240.758C111.929 240.623 110.659 242.014 105.004 234.661L97.9953 225.654C94.8172 221.729 91.2219 218.104 88.2631 214.005C84.1351 208.286 90.1658 209.504 94.601 209.489L236.752 209.545C257.761 209.569 268.184 211.009 285.766 221.678L285.835 206.051C285.837 197.542 286.201 189.141 284.549 180.748C280.22 158.757 260.541 143.877 240.897 135.739C238.055 134.561 232.259 133.654 235.575 129.851C244.784 119.288 263.680 111.990 277.085 111.105C288.697 109.828 301.096 113.537 311.75 117.703C360.649 136.827 393.225 183.042 398.561 234.866C402.204 270.253 391.733 308.356 367.999 335.1C332.832 374.727 269.877 384.883 223.294 360.397C206.156 351.388 183.673 333.299 175.08 316.6C173.511 313.551 174.005 313.555 170.443 313.52L160.641 313.449C158.957 313.435 156.263 314.031 155.122 312.487L153.196 310.121Z"
/>
</svg>
</a>
<a
href="/https://{{.Hostname}}"
class="flex ml-1 h-8 font-extrabold tracking-tight hover:no-underline focus:outline-none focus:ring focus:border-[#7AA7D1] ring-offset-2"
>
<span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle">{{.Sitename}}</span>
</a>
</div>
</div>
<div class="flex justify-center z-10">
<div class="relative" id="dropdown">
<button
aria-expanded="closed"
onclick="toggleDropdown()"
type="button"
class="inline-flex items-center justify-center whitespace-nowrap rounded-full h-12 px-4 py-2 text-sm font-medium text-slate-600 dark:text-slate-400 ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 bg-white dark:bg-slate-900 hover:bg-slate-200 dark:hover:bg-slate-700 hover:text-slate-500 dark:hover:text-slate-200"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-5 w-5"
>
<path
d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"
/>
<circle cx="12" cy="12" r="3" />
</svg>
</button>
<div
id="dropdown_panel"
class="hidden absolute right-0 mt-2 w-52 rounded-md bg-white dark:bg-slate-900 shadow-md border border-slate-400 dark:border-slate-700"
>
<div
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
>
Font family
<div class="grid grid-cols-2 gap-2">
<div>
<input
type="radio"
name="font"
id="sans-serif"
value="sans-serif"
class="peer hidden"
checked
/>
<label
for="sans-serif"
tabindex="0"
class="flex items-center justify-center h-10 cursor-pointer select-none rounded-md p-2 text-sm font-sans text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>Sans-serif</label
>
</div>
<div>
<input
type="radio"
name="font"
id="serif"
value="serif"
class="peer hidden"
/>
<label
for="serif"
tabindex="0"
class="flex items-center justify-center h-10 cursor-pointer select-none rounded-md p-2 text-sm font-serif text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>Serif</label
>
</div>
</div>
</div>
<div
class="shrink-0 bg-slate-400 dark:bg-slate-700 h-[1px] w-full"
></div>
<div
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
>
Font size
<div class="grid grid-cols-4 gap-2">
<div>
<input
type="radio"
name="fontsize"
id="sm"
value="text-sm"
class="peer hidden"
/>
<label
for="sm"
tabindex="0"
title="Small"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-sm text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>sm</label
>
</div>
<div>
<input
type="radio"
name="fontsize"
id="base"
value="text-base"
class="peer hidden"
checked
/>
<label
for="base"
tabindex="0"
title="Medium"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-base text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>md</label
>
</div>
<div>
<input
type="radio"
name="fontsize"
id="lg"
value="text-lg"
class="peer hidden"
/>
<label
for="lg"
tabindex="0"
title="Large"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-lg text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>lg</label
>
</div>
</div>
</div>
<div
class="shrink-0 bg-slate-200 dark:bg-slate-700 h-[1px] w-full"
></div>
<div
class="flex flex-col gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm"
>
Appearance
<div class="grid grid-cols-4 gap-2">
<div>
<input
type="radio"
name="theme"
id="light"
value="light"
class="peer hidden"
/>
<label
for="light"
tabindex="0"
title="Light"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-sm text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-5 w-5"
>
<circle cx="12" cy="12" r="4" />
<path d="M12 2v2" />
<path d="M12 20v2" />
<path d="m4.93 4.93 1.41 1.41" />
<path d="m17.66 17.66 1.41 1.41" />
<path d="M2 12h2" />
<path d="M20 12h2" />
<path d="m6.34 17.66-1.41 1.41" />
<path d="m19.07 4.93-1.41 1.41" />
</svg>
</label>
</div>
<div>
<input
type="radio"
name="theme"
id="dark"
value="dark"
class="peer hidden"
/>
<label
for="dark"
tabindex="0"
title="Dark"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-base text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-5 w-5"
>
<path d="M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z" />
</svg>
</label>
</div>
<div>
<input
type="radio"
name="theme"
id="system"
value="system"
class="peer hidden"
checked
/>
<label
for="system"
tabindex="0"
title="System preference"
class="flex items-end justify-center h-10 w-10 cursor-pointer select-none rounded-md p-2 text-lg text-slate-600 dark:text-slate-200 text-center hover:bg-slate-200 dark:hover:bg-slate-700 peer-checked:bg-slate-200 dark:peer-checked:bg-slate-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-5 w-5"
>
<path d="M12 8a2.83 2.83 0 0 0 4 4 4 4 0 1 1-4-4" />
<path d="M12 2v2" />
<path d="M12 20v2" />
<path d="m4.9 4.9 1.4 1.4" />
<path d="m17.7 17.7 1.4 1.4" />
<path d="M2 12h2" />
<path d="M20 12h2" />
<path d="m6.3 17.7-1.4 1.4" />
<path d="m19.1 4.9-1.4 1.4" />
</svg>
</label>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<main class="flex flex-col space-y-3">
{{if not .Success}}
<h1>
Error
</h1>
<p>
There was a problem querying
<a href="{{.Params}}">{{.Params}}</a>
</p>
<code class="text-red-500 dark:text-red-400">
{{.Error}}
</code>
{{else}}
<div class="flex flex-col gap-1 mt-3">
<h1>
<a href="{{.Url}}" class="text-slate-900 dark:text-slate-200"> {{.Title}} </a>
</h1>
{{if ne .Date ""}}
<small
class="text-sm font-medium leading-none text-slate-600 dark:text-slate-400"
>{{.Date}}</small
>
{{end}}
{{if ne .Author ""}}
<small
class="text-sm font-medium leading-none text-slate-600 dark:text-slate-400"
>{{.Author}}</small
>
{{end}}
</div>
<div class="flex flex-col space-y-3">
<div>
<div class="grid grid-cols-1 justify-items-center">
<div><img src="{{.Image}}" alt="{{.Description}}" class="h-auto w-auto object-cover max-w-full mx-auto rounded-md shadow-md dark:shadow-slate-700"/></div>
<div class="mt-2 text-sm text-slate-600 dark:text-slate-400">{{.Description}}</div>
</div>
</div>
<div>{{.Body}}</div>
{{end}}
</main>
<div class="my-2"></div>
<footer class="mx-4 text-center text-slate-600 dark:text-slate-400">
<p>
Code Licensed Under GPL v3.0 |
<a
href="https://github.com/everywall/ladder"
class="hover:text-blue-500 dark:hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
>View Source</a
>
|
<a
href="https://github.com/everywall"
class="hover:text-blue-500 dark:hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
>Everywall</a
>
</p>
</footer>
<div class="my-2"></div>
</div>
</body>
</html>

View File

@@ -0,0 +1,41 @@
package responsemodifiers
import (
_ "embed"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
// injectScript modifies HTTP responses
// to execute javascript at a particular time.
func injectScript(js string, execTime rewriters.ScriptExecTime) proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
rr := rewriters.NewScriptInjectorRewriter(js, execTime)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}
// InjectScriptBeforeDOMContentLoaded modifies HTTP responses to inject a JS before DOM Content is loaded (script tag in head)
func InjectScriptBeforeDOMContentLoaded(js string) proxychain.ResponseModification {
return injectScript(js, rewriters.BeforeDOMContentLoaded)
}
// InjectScriptAfterDOMContentLoaded modifies HTTP responses to inject a JS after DOM Content is loaded (script tag in head)
func InjectScriptAfterDOMContentLoaded(js string) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMContentLoaded)
}
// InjectScriptAfterDOMIdle modifies HTTP responses to inject a JS after the DOM is idle (ie: js framework loaded)
func InjectScriptAfterDOMIdle(js string) proxychain.ResponseModification {
return injectScript(js, rewriters.AfterDOMIdle)
}

View File

@@ -0,0 +1,107 @@
package responsemodifiers
import (
"fmt"
http "github.com/bogdanfinn/fhttp"
//"net/http"
//http "github.com/Danny-Dasilva/fhttp"
"ladder/proxychain"
)
// DeleteIncomingCookies prevents ALL cookies from being sent from the proxy server
// back down to the client.
func DeleteIncomingCookies(_ ...string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
px.Response.Header.Del("Set-Cookie")
return nil
}
}
// DeleteIncomingCookiesExcept prevents non-whitelisted cookies from being sent from the proxy server
// to the client. Cookies whose names are in the whitelist are not removed.
func DeleteIncomingCookiesExcept(whitelist ...string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
// Convert whitelist slice to a map for efficient lookups
whitelistMap := make(map[string]struct{})
for _, cookieName := range whitelist {
whitelistMap[cookieName] = struct{}{}
}
// If the response has no cookies, return early
if px.Response.Header == nil {
return nil
}
// Filter the cookies in the response
filteredCookies := []string{}
for _, cookieStr := range px.Response.Header["Set-Cookie"] {
cookie := parseCookie(cookieStr)
if _, found := whitelistMap[cookie.Name]; found {
filteredCookies = append(filteredCookies, cookieStr)
}
}
// Update the Set-Cookie header with the filtered cookies
if len(filteredCookies) > 0 {
px.Response.Header["Set-Cookie"] = filteredCookies
} else {
px.Response.Header.Del("Set-Cookie")
}
return nil
}
}
// parseCookie parses a cookie string and returns an http.Cookie object.
func parseCookie(cookieStr string) *http.Cookie {
header := http.Header{}
header.Add("Set-Cookie", cookieStr)
request := http.Request{Header: header}
return request.Cookies()[0]
}
// SetIncomingCookies adds a raw cookie string being sent from the proxy server down to the client
func SetIncomingCookies(cookies string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
px.Response.Header.Set("Set-Cookie", cookies)
return nil
}
}
// SetIncomingCookie modifies a specific cookie in the response from the proxy server to the client.
func SetIncomingCookie(name string, val string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
if px.Response.Header == nil {
return nil
}
updatedCookies := []string{}
found := false
// Iterate over existing cookies and modify the one that matches the cookieName
for _, cookieStr := range px.Response.Header["Set-Cookie"] {
cookie := parseCookie(cookieStr)
if cookie.Name == name {
// Replace the cookie with the new value
updatedCookies = append(updatedCookies, fmt.Sprintf("%s=%s", name, val))
found = true
} else {
// Keep the cookie as is
updatedCookies = append(updatedCookies, cookieStr)
}
}
// If the specified cookie wasn't found, add it
if !found {
updatedCookies = append(updatedCookies, fmt.Sprintf("%s=%s", name, val))
}
// Update the Set-Cookie header
px.Response.Header["Set-Cookie"] = updatedCookies
return nil
}
}

View File

@@ -0,0 +1,21 @@
package responsemodifiers
import (
"ladder/proxychain"
)
// SetResponseHeader modifies response headers from the upstream server
func SetResponseHeader(key string, value string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
px.Context.Response().Header.Set(key, value)
return nil
}
}
// DeleteResponseHeader removes response headers from the upstream server
func DeleteResponseHeader(key string) proxychain.ResponseModification {
return func(px *proxychain.ProxyChain) error {
px.Context.Response().Header.Del(key)
return nil
}
}

View File

@@ -0,0 +1,56 @@
package responsemodifiers
import (
_ "embed"
"fmt"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
//go:embed patch_dynamic_resource_urls.js
var patchDynamicResourceURLsScript string
// PatchDynamicResourceURLs patches the javascript runtime to rewrite URLs client-side.
// - This function is designed to allow the proxified page
// to still be browsible by routing all resource URLs through the proxy.
// - Native APIs capable of network requests will be hooked
// and the URLs arguments modified to point to the proxy instead.
// - fetch('/relative_path') -> fetch('/https://proxiedsite.com/relative_path')
// - Element.setAttribute('src', "/assets/img.jpg") -> Element.setAttribute('src', "/https://proxiedsite.com/assets/img.jpg") -> fetch('/https://proxiedsite.com/relative_path')
func PatchDynamicResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
// this is the original URL sent by client:
// http://localhost:8080/http://proxiedsite.com/foo/bar
originalURI := chain.Context.Request().URI()
// this is the extracted URL that the client requests to proxy
// http://proxiedsite.com/foo/bar
reqURL := chain.Request.URL
params := map[string]string{
// ie: http://localhost:8080
"{{PROXY_ORIGIN}}": fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host()),
// ie: http://proxiedsite.com
"{{ORIGIN}}": fmt.Sprintf("%s://%s", reqURL.Scheme, reqURL.Host),
}
rr := rewriters.NewScriptInjectorRewriterWithParams(
patchDynamicResourceURLsScript,
rewriters.BeforeDOMContentLoaded,
params,
)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}

View File

@@ -0,0 +1,366 @@
// Overrides the global fetch and XMLHttpRequest open methods to modify the request URLs.
// Also overrides the attribute setter prototype to modify the request URLs
// fetch("/relative_script.js") -> fetch("http://localhost:8080/relative_script.js")
(() => {
// ============== PARAMS ===========================
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
// proxyOrigin is http://localhost:8080
const proxyOrigin = "{{PROXY_ORIGIN}}";
//const proxyOrigin = globalThis.window.location.origin;
// if the original request was: http://localhost:8080/http://proxiedsite.com/foo/bar
// origin is http://proxiedsite.com
const origin = "{{ORIGIN}}";
//const origin = (new URL(decodeURIComponent(globalThis.window.location.pathname.substring(1)))).origin
// ============== END PARAMS ======================
const blacklistedSchemes = [
"ftp:",
"mailto:",
"tel:",
"file:",
"blob:",
"javascript:",
"about:",
"magnet:",
"ws:",
"wss:",
];
function rewriteURL(url) {
if (!url) return url;
// fetch url might be string, url, or request object
// handle all three by downcasting to string
const isStr = typeof url === "string";
if (!isStr) {
x = String(url);
if (x == "[object Request]") {
url = url.url;
} else {
url = String(url);
}
}
const oldUrl = url;
// don't rewrite special URIs
if (blacklistedSchemes.includes(url)) return url;
// don't rewrite invalid URIs
try {
new URL(url, origin);
} catch {
return url;
}
// don't double rewrite
if (url.startsWith(`${proxyOrigin}/http://`)) return url;
if (url.startsWith(`${proxyOrigin}/https://`)) return url;
if (url.startsWith(`/${proxyOrigin}`)) return url;
if (url.startsWith(`/${origin}`)) return url;
if (url.startsWith(`/http://`)) return url;
if (url.startsWith(`/https://`)) return url;
if (url.startsWith(`/http%3A%2F%2F`)) return url;
if (url.startsWith(`/https%3A%2F%2F`)) return url;
if (url.startsWith(`/%2Fhttp`)) return url;
//console.log(`proxychain: origin: ${origin} // proxyOrigin: ${proxyOrigin} // original: ${oldUrl}`)
//originDomain = origin.replace("https://", "");
let scheme = origin.split(":")[0];
if (url.startsWith("//")) {
url = `/${scheme}://${encodeURIComponent(url.substring(2))}`;
} else if (url.startsWith("/")) {
url = `/${origin}/${encodeURIComponent(url.substring(1))}`;
} else if (
url.startsWith(proxyOrigin) && !url.startsWith(`${proxyOrigin}/http`)
) {
// edge case where client js uses current url host to write an absolute path
url = "".replace(proxyOrigin, `${proxyOrigin}/${origin}`);
} else if (url.startsWith(origin)) {
url = `/${encodeURIComponent(url)}`;
} else if (url.startsWith("http://") || url.startsWith("https://")) {
url = `/${proxyOrigin}/${encodeURIComponent(url)}`;
}
console.log(`proxychain: rewrite JS URL: ${oldUrl} -> ${url}`);
return url;
}
/*
// sometimes anti-bot protections like cloudflare or akamai bot manager check if JS is hooked
function hideMonkeyPatch(objectOrName, method, originalToString) {
let obj;
let isGlobalFunction = false;
if (typeof objectOrName === "string") {
obj = globalThis[objectOrName];
isGlobalFunction = (typeof obj === "function") &&
(method === objectOrName);
} else {
obj = objectOrName;
}
if (isGlobalFunction) {
const originalFunction = obj;
globalThis[objectOrName] = function(...args) {
return originalFunction.apply(this, args);
};
globalThis[objectOrName].toString = () => originalToString;
} else if (obj && typeof obj[method] === "function") {
const originalMethod = obj[method];
obj[method] = function(...args) {
return originalMethod.apply(this, args);
};
obj[method].toString = () => originalToString;
} else {
console.warn(
`proxychain: cannot hide monkey patch: ${method} is not a function on the provided object.`,
);
}
}
*/
function hideMonkeyPatch(objectOrName, method, originalToString) {
return;
}
// monkey patch fetch
const oldFetch = fetch;
fetch = async (url, init) => {
return oldFetch(rewriteURL(url), init);
};
hideMonkeyPatch("fetch", "fetch", "function fetch() { [native code] }");
// monkey patch xmlhttprequest
const oldOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (
method,
url,
async = true,
user = null,
password = null,
) {
return oldOpen.call(this, method, rewriteURL(url), async, user, password);
};
hideMonkeyPatch(
XMLHttpRequest.prototype,
"open",
'function(){if("function"==typeof eo)return eo.apply(this,arguments)}',
);
const oldSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function (method, url) {
return oldSend.call(this, method, rewriteURL(url));
};
hideMonkeyPatch(
XMLHttpRequest.prototype,
"send",
'function(){if("function"==typeof eo)return eo.apply(this,arguments)}',
);
// monkey patch service worker registration
const oldRegister = ServiceWorkerContainer.prototype.register;
ServiceWorkerContainer.prototype.register = function (scriptURL, options) {
return oldRegister.call(this, rewriteURL(scriptURL), options);
};
hideMonkeyPatch(
ServiceWorkerContainer.prototype,
"register",
"function register() { [native code] }",
);
// monkey patch URL.toString() method
const oldToString = URL.prototype.toString;
URL.prototype.toString = function () {
let originalURL = oldToString.call(this);
return rewriteURL(originalURL);
};
hideMonkeyPatch(
URL.prototype,
"toString",
"function toString() { [native code] }",
);
// monkey patch URL.toJSON() method
const oldToJson = URL.prototype.toString;
URL.prototype.toString = function () {
let originalURL = oldToJson.call(this);
return rewriteURL(originalURL);
};
hideMonkeyPatch(
URL.prototype,
"toString",
"function toJSON() { [native code] }",
);
// Monkey patch URL.href getter and setter
const originalHrefDescriptor = Object.getOwnPropertyDescriptor(
URL.prototype,
"href",
);
Object.defineProperty(URL.prototype, "href", {
get: function () {
let originalHref = originalHrefDescriptor.get.call(this);
return rewriteURL(originalHref);
},
set: function (newValue) {
originalHrefDescriptor.set.call(this, rewriteURL(newValue));
},
});
// TODO: do one more pass of this by manually traversing the DOM
// AFTER all the JS and page has loaded just in case
// Monkey patch setter
const elements = [
{ tag: "a", attribute: "href" },
{ tag: "img", attribute: "src" },
// { tag: 'img', attribute: 'srcset' }, // TODO: handle srcset
{ tag: "script", attribute: "src" },
{ tag: "link", attribute: "href" },
{ tag: "link", attribute: "icon" },
{ tag: "iframe", attribute: "src" },
{ tag: "audio", attribute: "src" },
{ tag: "video", attribute: "src" },
{ tag: "source", attribute: "src" },
// { tag: 'source', attribute: 'srcset' }, // TODO: handle srcset
{ tag: "embed", attribute: "src" },
{ tag: "embed", attribute: "pluginspage" },
{ tag: "html", attribute: "manifest" },
{ tag: "object", attribute: "src" },
{ tag: "input", attribute: "src" },
{ tag: "track", attribute: "src" },
{ tag: "form", attribute: "action" },
{ tag: "area", attribute: "href" },
{ tag: "base", attribute: "href" },
{ tag: "blockquote", attribute: "cite" },
{ tag: "del", attribute: "cite" },
{ tag: "ins", attribute: "cite" },
{ tag: "q", attribute: "cite" },
{ tag: "button", attribute: "formaction" },
{ tag: "input", attribute: "formaction" },
{ tag: "meta", attribute: "content" },
{ tag: "object", attribute: "data" },
];
elements.forEach(({ tag, attribute }) => {
const proto = document.createElement(tag).constructor.prototype;
const descriptor = Object.getOwnPropertyDescriptor(proto, attribute);
if (descriptor && descriptor.set) {
Object.defineProperty(proto, attribute, {
...descriptor,
set(value) {
// calling rewriteURL will end up calling a setter for href,
// leading to a recusive loop and a Maximum call stack size exceeded
// error, so we guard against this with a local semaphore flag
const isRewritingSetKey = Symbol.for("isRewritingSet");
if (!this[isRewritingSetKey]) {
this[isRewritingSetKey] = true;
descriptor.set.call(this, rewriteURL(value));
//descriptor.set.call(this, value);
this[isRewritingSetKey] = false;
} else {
// Directly set the value without rewriting
descriptor.set.call(this, value);
}
},
get() {
const isRewritingGetKey = Symbol.for("isRewritingGet");
if (!this[isRewritingGetKey]) {
this[isRewritingGetKey] = true;
let oldURL = descriptor.get.call(this);
let newURL = rewriteURL(oldURL);
this[isRewritingGetKey] = false;
return newURL;
} else {
return descriptor.get.call(this);
}
},
});
}
});
// monkey-patching Element.setAttribute
const originalSetAttribute = Element.prototype.setAttribute;
Element.prototype.setAttribute = function (name, value) {
const isMatchingElement = elements.some((element) => {
return this.tagName.toLowerCase() === element.tag &&
name.toLowerCase() === element.attribute;
});
if (isMatchingElement) {
value = rewriteURL(value);
}
originalSetAttribute.call(this, name, value);
};
// sometimes, libraries will set the Element.innerHTML or Element.outerHTML directly with a string instead of setters.
// in this case, we intercept it, create a fake DOM, parse it and then rewrite all attributes that could
// contain a URL. Then we return the replacement innerHTML/outerHTML with redirected links.
function rewriteInnerHTML(html, elements) {
const isRewritingHTMLKey = Symbol.for("isRewritingHTML");
// Check if already processing
if (document[isRewritingHTMLKey]) {
return html;
}
const tempContainer = document.createElement("div");
document[isRewritingHTMLKey] = true;
try {
tempContainer.innerHTML = html;
// Create a map for quick lookup
const elementsMap = new Map(elements.map((e) => [e.tag, e.attribute]));
// Loop-based DOM traversal
const nodes = [...tempContainer.querySelectorAll("*")];
for (const node of nodes) {
const attribute = elementsMap.get(node.tagName.toLowerCase());
if (attribute && node.hasAttribute(attribute)) {
const originalUrl = node.getAttribute(attribute);
const rewrittenUrl = rewriteURL(originalUrl);
node.setAttribute(attribute, rewrittenUrl);
}
}
return tempContainer.innerHTML;
} finally {
// Clear the flag
document[isRewritingHTMLKey] = false;
}
}
// Store original setters
const originalSetters = {};
["innerHTML", "outerHTML"].forEach((property) => {
const descriptor = Object.getOwnPropertyDescriptor(
Element.prototype,
property,
);
if (descriptor && descriptor.set) {
originalSetters[property] = descriptor.set;
Object.defineProperty(Element.prototype, property, {
...descriptor,
set(value) {
const isRewritingHTMLKey = Symbol.for("isRewritingHTML");
if (!this[isRewritingHTMLKey]) {
this[isRewritingHTMLKey] = true;
try {
// Use custom logic
descriptor.set.call(this, rewriteInnerHTML(value, elements));
} finally {
this[isRewritingHTMLKey] = false;
}
} else {
// Use original setter in recursive call
originalSetters[property].call(this, value);
}
},
});
}
});
})();

View File

@@ -0,0 +1,33 @@
package responsemodifiers
import (
_ "embed"
"io"
"ladder/proxychain"
"strings"
)
//go:embed patch_google_analytics.js
var gaPatch string
// PatchGoogleAnalytics replaces any request to google analytics with a no-op stub function.
// Some sites will not display content until GA is loaded, so we fake one instead.
// Credit to Raymond Hill @ github.com/gorhill/uBlock
func PatchGoogleAnalytics() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// preflight check
isGADomain := chain.Request.URL.Host == "www.google-analytics.com" || chain.Request.URL.Host == "google-analytics.com"
isGAPath := strings.HasSuffix(chain.Request.URL.Path, "analytics.js")
if !(isGADomain || isGAPath) {
return nil
}
// send modified js payload to client containing
// stub functions from patch_google_analytics.js
gaPatchReader := io.NopCloser(strings.NewReader(gaPatch))
chain.Response.Body = gaPatchReader
chain.Context.Set("content-type", "text/javascript")
return nil
}
}

View File

@@ -0,0 +1,109 @@
// uBlock Origin - a browser extension to block requests.
// Copyright (C) 2019-present Raymond Hill
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see {http://www.gnu.org/licenses/}.
//
// Home: https://github.com/gorhill/uBlock
(function() {
"use strict";
// https://developers.google.com/analytics/devguides/collection/analyticsjs/
const noopfn = function() {
};
//
const Tracker = function() {
};
const p = Tracker.prototype;
p.get = noopfn;
p.set = noopfn;
p.send = noopfn;
//
const w = window;
const gaName = w.GoogleAnalyticsObject || "ga";
const gaQueue = w[gaName];
// https://github.com/uBlockOrigin/uAssets/pull/4115
const ga = function() {
const len = arguments.length;
if (len === 0) return;
const args = Array.from(arguments);
let fn;
let a = args[len - 1];
if (a instanceof Object && a.hitCallback instanceof Function) {
fn = a.hitCallback;
} else if (a instanceof Function) {
fn = () => {
a(ga.create());
};
} else {
const pos = args.indexOf("hitCallback");
if (pos !== -1 && args[pos + 1] instanceof Function) {
fn = args[pos + 1];
}
}
if (fn instanceof Function === false) return;
try {
fn();
} catch (ex) {
}
};
ga.create = function() {
return new Tracker();
};
ga.getByName = function() {
return new Tracker();
};
ga.getAll = function() {
return [new Tracker()];
};
ga.remove = noopfn;
// https://github.com/uBlockOrigin/uAssets/issues/2107
ga.loaded = true;
w[gaName] = ga;
// https://github.com/gorhill/uBlock/issues/3075
const dl = w.dataLayer;
if (dl instanceof Object) {
if (dl.hide instanceof Object && typeof dl.hide.end === "function") {
dl.hide.end();
dl.hide.end = () => { };
}
if (typeof dl.push === "function") {
const doCallback = function(item) {
if (item instanceof Object === false) return;
if (typeof item.eventCallback !== "function") return;
setTimeout(item.eventCallback, 1);
item.eventCallback = () => { };
};
dl.push = new Proxy(dl.push, {
apply: function(target, thisArg, args) {
doCallback(args[0]);
return Reflect.apply(target, thisArg, args);
},
});
if (Array.isArray(dl)) {
const q = dl.slice();
for (const item of q) {
doCallback(item);
}
}
}
}
// empty ga queue
if (gaQueue instanceof Function && Array.isArray(gaQueue.q)) {
const q = gaQueue.q.slice();
gaQueue.q.length = 0;
for (const entry of q) {
ga(...entry);
}
}
})();

View File

@@ -0,0 +1,100 @@
package responsemodifiers
import (
"embed"
"encoding/json"
"io"
"ladder/proxychain"
"log"
"regexp"
)
//go:embed vendor/ddg-tracker-surrogates/mapping.json
var mappingJSON []byte
//go:embed vendor/ddg-tracker-surrogates/surrogates/*
var surrogateFS embed.FS
var rules domainRules
func init() {
err := json.Unmarshal([]byte(mappingJSON), &rules)
if err != nil {
log.Printf("[ERROR]: PatchTrackerScripts: failed to deserialize ladder/proxychain/responsemodifiers/vendor/ddg-tracker-surrogates/mapping.json")
}
}
// mapping.json schema
type rule struct {
RegexRule *regexp.Regexp `json:"regexRule"`
Surrogate string `json:"surrogate"`
Action string `json:"action,omitempty"`
}
type domainRules map[string][]rule
func (r *rule) UnmarshalJSON(data []byte) error {
type Tmp struct {
RegexRule string `json:"regexRule"`
Surrogate string `json:"surrogate"`
Action string `json:"action,omitempty"`
}
var tmp Tmp
if err := json.Unmarshal(data, &tmp); err != nil {
return err
}
regex := regexp.MustCompile(tmp.RegexRule)
r.RegexRule = regex
r.Surrogate = tmp.Surrogate
r.Action = tmp.Action
return nil
}
// PatchTrackerScripts replaces any request to tracker scripts such as google analytics
// with a no-op stub that mocks the API structure of the original scripts they replace.
// Some pages depend on the existence of these structures for proper loading, so this may fix
// some broken elements.
// Surrogate script code borrowed from: DuckDuckGo Privacy Essentials browser extension for Firefox, Chrome. (Apache 2.0 license)
func PatchTrackerScripts() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// preflight checks
reqURL := chain.Request.URL.String()
isTracker := false
//
var surrogateScript io.ReadCloser
for domain, domainRules := range rules {
for _, rule := range domainRules {
if !rule.RegexRule.MatchString(reqURL) {
continue
}
// found tracker script, replacing response body with nop stub from
// ./vendor/ddg-tracker-surrogates/surrogates/{{rule.Surrogate}}
isTracker = true
script, err := surrogateFS.Open("vendor/ddg-tracker-surrogates/surrogates/" + rule.Surrogate)
if err != nil {
panic(err)
}
surrogateScript = io.NopCloser(script)
log.Printf("INFO: PatchTrackerScripts :: injecting surrogate for '%s' => 'surrogates/%s'\n", domain, rule.Surrogate)
break
}
}
if !isTracker {
return nil
}
chain.Response.Body = surrogateScript
chain.Context.Set("content-type", "text/javascript")
return nil
}
}

View File

@@ -0,0 +1,36 @@
package responsemodifiers
import (
_ "embed"
"fmt"
"strings"
"ladder/proxychain"
"ladder/proxychain/responsemodifiers/rewriters"
)
// RewriteHTMLResourceURLs modifies HTTP responses
// to rewrite URLs attributes in HTML content (such as src, href)
// - `<img src='/relative_path'>` -> `<img src='/https://proxiedsite.com/relative_path'>`
// - This function is designed to allow the proxified page
// to still be browsible by routing all resource URLs through the proxy.
func RewriteHTMLResourceURLs() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// don't add rewriter if it's not even html
ct := chain.Response.Header.Get("content-type")
if !strings.HasPrefix(ct, "text/html") {
return nil
}
// proxyURL is the URL of the ladder: http://localhost:8080 (ladder)
originalURI := chain.Context.Request().URI()
proxyURL := fmt.Sprintf("%s://%s", originalURI.Scheme(), originalURI.Host())
// replace http.Response.Body with a readcloser that wraps the original, modifying the html attributes
rr := rewriters.NewHTMLTokenURLRewriter(chain.Request.URL, proxyURL)
htmlRewriter := rewriters.NewHTMLRewriter(chain.Response.Body, rr)
chain.Response.Body = htmlRewriter
return nil
}
}

View File

@@ -0,0 +1,28 @@
(() => {
document.addEventListener("DOMContentLoaded", (event) => {
initIdleMutationObserver();
});
function initIdleMutationObserver() {
let debounceTimer;
const debounceDelay = 500; // adjust the delay as needed
const observer = new MutationObserver((mutations) => {
// Clear the previous timer and set a new one
clearTimeout(debounceTimer);
debounceTimer = setTimeout(() => {
execute();
observer.disconnect(); // Disconnect after first execution
}, debounceDelay);
});
const config = { attributes: false, childList: true, subtree: true };
observer.observe(document.body, config);
}
function execute() {
"{{AFTER_DOM_IDLE_SCRIPT}}";
//console.log('DOM is now idle. Executing...');
}
})();

View File

@@ -0,0 +1,3 @@
package rewriters
// todo: implement

View File

@@ -0,0 +1,133 @@
package rewriters
import (
"bytes"
"io"
"golang.org/x/net/html"
)
// IHTMLTokenRewriter defines an interface for modifying HTML tokens.
type IHTMLTokenRewriter interface {
// ShouldModify determines whether a given HTML token requires modification.
ShouldModify(*html.Token) bool
// ModifyToken applies modifications to a given HTML token.
// It returns strings representing content to be prepended and
// appended to the token. If no modifications are required or if an error occurs,
// it returns empty strings for both 'prepend' and 'append'.
// Note: The original token is not modified if an error occurs.
ModifyToken(*html.Token) (prepend, append string)
}
// HTMLRewriter is a struct that can take multiple TokenHandlers and process all
// HTML tokens from http.Response.Body in a single pass, making changes and returning a new io.ReadCloser
//
// - HTMLRewriter reads the http.Response.Body stream,
// parsing each HTML token one at a time and making modifications (defined by implementations of IHTMLTokenRewriter)
//
// - When ProxyChain.Execute() is called, the response body will be read from the server
// and pulled through each ResponseModification which wraps the ProxyChain.Response.Body
// without ever buffering the entire HTTP response in memory.
type HTMLRewriter struct {
tokenizer *html.Tokenizer
currentToken *html.Token
tokenBuffer *bytes.Buffer
currentTokenProcessed bool
rewriters []IHTMLTokenRewriter
}
// NewHTMLRewriter creates a new HTMLRewriter instance.
// It processes HTML tokens from an io.ReadCloser source (typically http.Response.Body)
// using a series of HTMLTokenRewriters. Each HTMLTokenRewriter in the 'rewriters' slice
// applies its specific modifications to the HTML tokens.
// The HTMLRewriter reads from the provided 'src', applies the modifications,
// and returns the processed content as a new io.ReadCloser.
// This new io.ReadCloser can be used to stream the modified content back to the client.
//
// Parameters:
// - src: An io.ReadCloser representing the source of the HTML content, such as http.Response.Body.
// - rewriters: A slice of HTMLTokenRewriters that define the modifications to be applied to the HTML tokens.
//
// Returns:
// - A pointer to an HTMLRewriter, which implements io.ReadCloser, containing the modified HTML content.
func NewHTMLRewriter(src io.ReadCloser, rewriters ...IHTMLTokenRewriter) *HTMLRewriter {
return &HTMLRewriter{
tokenizer: html.NewTokenizer(src),
currentToken: nil,
tokenBuffer: new(bytes.Buffer),
currentTokenProcessed: false,
rewriters: rewriters,
}
}
// Close resets the internal state of HTMLRewriter, clearing buffers and token data.
func (r *HTMLRewriter) Close() error {
r.tokenBuffer.Reset()
r.currentToken = nil
r.currentTokenProcessed = false
return nil
}
// Read processes the HTML content, rewriting URLs and managing the state of tokens.
func (r *HTMLRewriter) Read(p []byte) (int, error) {
if r.currentToken == nil || r.currentToken.Data == "" || r.currentTokenProcessed {
tokenType := r.tokenizer.Next()
// done reading html, close out reader
if tokenType == html.ErrorToken {
if r.tokenizer.Err() == io.EOF {
return 0, io.EOF
}
return 0, r.tokenizer.Err()
}
// get the next token; reset buffer
t := r.tokenizer.Token()
r.currentToken = &t
r.tokenBuffer.Reset()
// buffer += "<prepends> <token> <appends>"
// process token through all registered rewriters
// rewriters will modify the token, and optionally
// return a <prepend> or <append> string token
appends := make([]string, 0, len(r.rewriters))
for _, rewriter := range r.rewriters {
if !rewriter.ShouldModify(r.currentToken) {
continue
}
prepend, a := rewriter.ModifyToken(r.currentToken)
appends = append(appends, a)
// add <prepends> to buffer
r.tokenBuffer.WriteString(prepend)
}
// add <token> to buffer
if tokenType == html.TextToken {
// don't unescape textTokens (such as inline scripts).
// Token.String() by default will escape the inputs, but
// we don't want to modify the original source
r.tokenBuffer.WriteString(r.currentToken.Data)
} else {
r.tokenBuffer.WriteString(r.currentToken.String())
}
// add <appends> to buffer
for _, a := range appends {
r.tokenBuffer.WriteString(a)
}
r.currentTokenProcessed = false
}
n, err := r.tokenBuffer.Read(p)
if err == io.EOF || r.tokenBuffer.Len() == 0 {
r.currentTokenProcessed = true
err = nil // EOF in this context is expected and not an actual error
}
return n, err
}

View File

@@ -0,0 +1,288 @@
package rewriters
import (
_ "embed"
"fmt"
"log"
"net/url"
"path"
"regexp"
"strings"
"golang.org/x/net/html/atom"
"golang.org/x/net/html"
)
var (
rewriteAttrs map[string]map[string]bool
specialRewriteAttrs map[string]map[string]bool
schemeBlacklist map[string]bool
)
func init() {
// define all tag/attributes which might contain URLs
// to attempt to rewrite to point to proxy instead
rewriteAttrs = map[string]map[string]bool{
"img": {"src": true, "srcset": true, "longdesc": true, "usemap": true},
"a": {"href": true},
"form": {"action": true},
"link": {"href": true, "manifest": true, "icon": true},
"script": {"src": true},
"video": {"src": true, "poster": true},
"audio": {"src": true},
"iframe": {"src": true, "longdesc": true},
"embed": {"src": true},
"object": {"data": true, "codebase": true},
"source": {"src": true, "srcset": true},
"track": {"src": true},
"area": {"href": true},
"base": {"href": true},
"blockquote": {"cite": true},
"del": {"cite": true},
"ins": {"cite": true},
"q": {"cite": true},
"body": {"background": true},
"button": {"formaction": true},
"input": {"src": true, "formaction": true},
"meta": {"content": true},
}
// might contain URL but requires special handling
specialRewriteAttrs = map[string]map[string]bool{
"img": {"srcset": true},
"source": {"srcset": true},
"meta": {"content": true},
}
// define URIs to NOT rewrite
// for example: don't overwrite <img src="data:image/png;base64;iVBORw...">"
schemeBlacklist = map[string]bool{
"data": true,
"tel": true,
"mailto": true,
"file": true,
"blob": true,
"javascript": true,
"about": true,
"magnet": true,
"ws": true,
"wss": true,
"ftp": true,
}
}
// HTMLTokenURLRewriter implements HTMLTokenRewriter
// it rewrites URLs within HTML resources to use a specified proxy URL.
// <img src='/relative_path'> -> <img src='/https://proxiedsite.com/relative_path'>
type HTMLTokenURLRewriter struct {
baseURL *url.URL
proxyURL string // ladder URL, not proxied site URL
}
// NewHTMLTokenURLRewriter creates a new instance of HTMLResourceURLRewriter.
// It initializes the tokenizer with the provided source and sets the proxy URL.
// baseURL might be https://medium.com/foobar
// proxyURL is http://localhost:8080
func NewHTMLTokenURLRewriter(baseURL *url.URL, proxyURL string) *HTMLTokenURLRewriter {
return &HTMLTokenURLRewriter{
baseURL: baseURL,
proxyURL: proxyURL,
}
}
func (r *HTMLTokenURLRewriter) ShouldModify(token *html.Token) bool {
// fmt.Printf("touch token: %s\n", token.String())
attrLen := len(token.Attr)
if attrLen == 0 {
return false
}
if token.Type == html.StartTagToken {
return true
}
if token.Type == html.SelfClosingTagToken {
return true
}
return false
}
func (r *HTMLTokenURLRewriter) ModifyToken(token *html.Token) (string, string) {
for i := range token.Attr {
attr := &token.Attr[i]
switch {
// don't touch tag/attributes that don't contain URIs
case !rewriteAttrs[token.Data][attr.Key]:
continue
// don't touch attributes with special URIs (like data:)
case schemeBlacklist[strings.Split(attr.Val, ":")[0]]:
continue
// don't double-overwrite the url
case strings.HasPrefix(attr.Val, r.proxyURL):
continue
case strings.HasPrefix(attr.Val, "/http://"):
continue
case strings.HasPrefix(attr.Val, "/https://"):
continue
// handle special rewrites
case specialRewriteAttrs[token.Data][attr.Key]:
r.handleSpecialAttr(token, attr, r.baseURL)
continue
default:
// rewrite url
handleURLPart(attr, r.baseURL)
}
}
return "", ""
}
// dispatcher for ModifyURL based on URI type
func handleURLPart(attr *html.Attribute, baseURL *url.URL) {
switch {
case strings.HasPrefix(attr.Val, "//"):
handleProtocolRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Val, "/"):
handleRootRelativePath(attr, baseURL)
case strings.HasPrefix(attr.Val, "https://"):
handleAbsolutePath(attr, baseURL)
case strings.HasPrefix(attr.Val, "http://"):
handleAbsolutePath(attr, baseURL)
default:
handleDocumentRelativePath(attr, baseURL)
}
}
// Protocol-relative URLs: These start with "//" and will use the same protocol (http or https) as the current page.
func handleProtocolRelativePath(attr *html.Attribute, baseURL *url.URL) {
attr.Val = strings.TrimPrefix(attr.Val, "/")
handleRootRelativePath(attr, baseURL)
log.Printf("proto rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Root-relative URLs: These are relative to the root path and start with a "/".
func handleRootRelativePath(attr *html.Attribute, baseURL *url.URL) {
// Skip processing if it's already in the correct format
if strings.HasPrefix(attr.Val, "/http://") || strings.HasPrefix(attr.Val, "/https://") {
return
}
// doublecheck this is a valid relative URL
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
_, err := url.Parse(fmt.Sprintf("http://localhost.com%s", attr.Val))
if err != nil {
log.Println(err)
return
}
// log.Printf("BASEURL patch: %s\n", baseURL)
attr.Val = fmt.Sprintf(
"%s://%s/%s",
baseURL.Scheme,
baseURL.Host,
strings.TrimPrefix(attr.Val, "/"),
)
attr.Val = escape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
log.Printf("root rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// Document-relative URLs: These are relative to the current document's path and don't start with a "/".
func handleDocumentRelativePath(attr *html.Attribute, baseURL *url.URL) {
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
if strings.HasPrefix(attr.Val, "#") {
return
}
relativePath := path.Join(strings.Trim(baseURL.RawPath, "/"), strings.Trim(attr.Val, "/"))
attr.Val = fmt.Sprintf(
"%s://%s/%s",
baseURL.Scheme,
strings.Trim(baseURL.Host, "/"),
relativePath,
)
attr.Val = escape(attr.Val)
attr.Val = fmt.Sprintf("/%s", attr.Val)
log.Printf("doc rel url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// full URIs beginning with https?://proxiedsite.com
func handleAbsolutePath(attr *html.Attribute, _ *url.URL) {
// check if valid URL
log.Printf("PROCESSING: key: %s val: %s\n", attr.Key, attr.Val)
u, err := url.Parse(attr.Val)
if err != nil {
return
}
if !(u.Scheme == "http" || u.Scheme == "https") {
return
}
attr.Val = fmt.Sprintf("/%s", escape(strings.TrimPrefix(attr.Val, "/")))
// attr.Val = fmt.Sprintf("/%s", escape(attr.Val))
log.Printf("abs url rewritten-> '%s'='%s'", attr.Key, attr.Val)
}
// handle edge cases for special attributes
func (r *HTMLTokenURLRewriter) handleSpecialAttr(token *html.Token, attr *html.Attribute, baseURL *url.URL) {
switch {
// srcset attribute doesn't contain a single URL but a comma-separated list of URLs, each potentially followed by a space and a descriptor (like a width, pixel density, or other conditions).
case token.DataAtom == atom.Img && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
case token.DataAtom == atom.Source && attr.Key == "srcset":
handleSrcSet(attr, baseURL)
// meta with http-equiv="refresh": The content attribute of a meta tag, when used for a refresh directive, contains a time interval followed by a URL, like content="5;url=http://example.com/".
case token.DataAtom == atom.Meta && attr.Key == "content" && regexp.MustCompile(`^\d+;url=`).MatchString(attr.Val):
handleMetaRefresh(attr, baseURL)
default:
break
}
}
func handleMetaRefresh(attr *html.Attribute, baseURL *url.URL) {
sec := strings.Split(attr.Val, ";url=")[0]
url := strings.Split(attr.Val, ";url=")[1]
f := &html.Attribute{Val: url, Key: "src"}
handleURLPart(f, baseURL)
attr.Val = fmt.Sprintf("%s;url=%s", sec, f.Val)
}
func handleSrcSet(attr *html.Attribute, baseURL *url.URL) {
var srcSetBuilder strings.Builder
srcSetItems := strings.Split(attr.Val, ",")
for i, srcItem := range srcSetItems {
srcParts := strings.Fields(srcItem)
if len(srcParts) == 0 {
continue
}
f := &html.Attribute{Val: srcParts[0], Key: "src"}
handleURLPart(f, baseURL)
if i > 0 {
srcSetBuilder.WriteString(", ")
}
srcSetBuilder.WriteString(f.Val)
if len(srcParts) > 1 {
srcSetBuilder.WriteString(" ")
srcSetBuilder.WriteString(strings.Join(srcParts[1:], " "))
}
}
attr.Val = srcSetBuilder.String()
}
func escape(str string) string {
// return str
return strings.ReplaceAll(url.PathEscape(str), "%2F", "/")
}

View File

@@ -0,0 +1,92 @@
package rewriters
import (
_ "embed"
"fmt"
"sort"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// ScriptInjectorRewriter implements HTMLTokenRewriter
// ScriptInjectorRewriter is a struct that injects JS into the page
// It uses an HTML tokenizer to process HTML content and injects JS at a specified location
type ScriptInjectorRewriter struct {
execTime ScriptExecTime
script string
}
type ScriptExecTime int
const (
BeforeDOMContentLoaded ScriptExecTime = iota
AfterDOMContentLoaded
AfterDOMIdle
)
func (r *ScriptInjectorRewriter) ShouldModify(token *html.Token) bool {
// modify if token == <head>
return token.DataAtom == atom.Head && token.Type == html.StartTagToken
}
//go:embed after_dom_idle_script_injector.js
var afterDomIdleScriptInjector string
func (r *ScriptInjectorRewriter) ModifyToken(_ *html.Token) (string, string) {
switch {
case r.execTime == BeforeDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", r.script)
case r.execTime == AfterDOMContentLoaded:
return "", fmt.Sprintf("\n<script>\ndocument.addEventListener('DOMContentLoaded', () => { %s });\n</script>", r.script)
case r.execTime == AfterDOMIdle:
s := strings.Replace(afterDomIdleScriptInjector, `'{{AFTER_DOM_IDLE_SCRIPT}}'`, r.script, 1)
return "", fmt.Sprintf("\n<script>\n%s\n</script>\n", s)
default:
return "", ""
}
}
// applies parameters by string replacement of the template script
func (r *ScriptInjectorRewriter) applyParams(params map[string]string) {
// Sort the keys by length in descending order
keys := make([]string, 0, len(params))
for key := range params {
keys = append(keys, key)
}
sort.Slice(keys, func(i, j int) bool {
return len(keys[i]) > len(keys[j])
})
for _, key := range keys {
r.script = strings.ReplaceAll(r.script, key, params[key])
}
}
// NewScriptInjectorRewriter implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
func NewScriptInjectorRewriter(script string, execTime ScriptExecTime) *ScriptInjectorRewriter {
return &ScriptInjectorRewriter{
execTime: execTime,
script: script,
}
}
// NewScriptInjectorRewriterWith implements a HtmlTokenRewriter
// and injects JS into the page for execution at a particular time
// accepting arguments into the script, which will be added via a string replace
// the params map represents the key-value pair of the params.
// the key will be string replaced with the value
func NewScriptInjectorRewriterWithParams(script string, execTime ScriptExecTime, params map[string]string) *ScriptInjectorRewriter {
rr := &ScriptInjectorRewriter{
execTime: execTime,
script: script,
}
rr.applyParams(params)
return rr
}