Compare commits
1 Commits
main
...
add-versio
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5392992350 |
46
.air.toml
46
.air.toml
@@ -1,46 +0,0 @@
|
||||
root = "./"
|
||||
testdata_dir = "testdata"
|
||||
tmp_dir = "tmp"
|
||||
|
||||
[build]
|
||||
args_bin = []
|
||||
bin = "./tmp/main"
|
||||
cmd = "go build -o ./tmp/main ./cmd"
|
||||
delay = 1000
|
||||
exclude_dir = ["assets", "tmp", "vendor", "testdata"]
|
||||
exclude_file = []
|
||||
exclude_regex = ["_test.go"]
|
||||
exclude_unchanged = false
|
||||
follow_symlink = false
|
||||
full_bin = "RULESET=./ruleset.yaml ./tmp/main"
|
||||
include_dir = []
|
||||
include_ext = ["go", "tpl", "tmpl", "yaml", "html"]
|
||||
include_file = []
|
||||
kill_delay = "0s"
|
||||
log = "build-errors.log"
|
||||
poll = false
|
||||
poll_interval = 0
|
||||
post_cmd = []
|
||||
pre_cmd = ["echo 'dev' > handlers/VERSION"]
|
||||
rerun = false
|
||||
rerun_delay = 500
|
||||
send_interrupt = false
|
||||
stop_on_error = false
|
||||
|
||||
[color]
|
||||
app = ""
|
||||
build = "yellow"
|
||||
main = "magenta"
|
||||
runner = "green"
|
||||
watcher = "cyan"
|
||||
|
||||
[log]
|
||||
main_only = false
|
||||
time = false
|
||||
|
||||
[misc]
|
||||
clean_on_exit = true
|
||||
|
||||
[screen]
|
||||
clear_on_rebuild = true
|
||||
keep_scroll = true
|
||||
6
.github/workflows/release-binaries.yaml
vendored
6
.github/workflows/release-binaries.yaml
vendored
@@ -22,7 +22,11 @@ jobs:
|
||||
-
|
||||
name: Set version
|
||||
run: |
|
||||
echo -n $(git describe --tags --abbrev=0) > handlers/VERSION
|
||||
VERSION=$(git describe --tags --abbrev=0)
|
||||
echo -n $VERSION > handlers/VERSION
|
||||
sed -i 's\VERSION\${VERSION}\g' handlers/form.html
|
||||
echo handlers/form.html >> .gitignore
|
||||
echo .gitignore >> .gitignore
|
||||
-
|
||||
name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
|
||||
6
.github/workflows/release-docker.yaml
vendored
6
.github/workflows/release-docker.yaml
vendored
@@ -42,7 +42,11 @@ jobs:
|
||||
- name: Set version
|
||||
id: version
|
||||
run: |
|
||||
echo ${GITHUB_REF#refs/tags/v} > handlers/VERSION
|
||||
VERSION=$(git describe --tags --abbrev=0)
|
||||
echo -n $VERSION > handlers/VERSION
|
||||
sed -i 's\VERSION\${VERSION}\g' handlers/form.html
|
||||
echo handlers/form.html >> .gitignore
|
||||
echo .gitignore >> .gitignore
|
||||
|
||||
# Install the cosign tool except on PR
|
||||
# https://github.com/sigstore/cosign-installer
|
||||
|
||||
2
Makefile
2
Makefile
@@ -1,6 +1,6 @@
|
||||
lint:
|
||||
gofumpt -l -w .
|
||||
golangci-lint run -c .golangci-lint.yaml --fix
|
||||
golangci-lint run -c .golangci-lint.yaml
|
||||
|
||||
go mod tidy
|
||||
go clean
|
||||
|
||||
37
README.md
37
README.md
@@ -6,7 +6,7 @@
|
||||
<div><img alt="License" src="https://img.shields.io/github/license/everywall/ladder"> <img alt="go.mod Go version " src="https://img.shields.io/github/go-mod/go-version/everywall/ladder"> <img alt="GitHub tag (with filter)" src="https://img.shields.io/github/v/tag/everywall/ladder"> <img alt="GitHub (Pre-)Release Date" src="https://img.shields.io/github/release-date-pre/everywall/ladder"> <img alt="GitHub Downloads all releases" src="https://img.shields.io/github/downloads/everywall/ladder/total"> <img alt="GitHub Build Status (with event)" src="https://img.shields.io/github/actions/workflow/status/everywall/ladder/release-binaries.yaml"></div>
|
||||
|
||||
|
||||
*Ladder is a http web proxy.* This is a selfhosted version of [1ft.io](https://1ft.io) and [12ft.io](https://12ft.io). It is inspired by [13ft](https://github.com/wasi-master/13ft).
|
||||
*Ladder is a web proxy to help bypass paywalls.* This is a selfhosted version of [1ft.io](https://1ft.io) and [12ft.io](https://12ft.io). It is inspired by [13ft](https://github.com/wasi-master/13ft).
|
||||
|
||||
### Why
|
||||
|
||||
@@ -14,18 +14,6 @@ Freedom of information is an essential pillar of democracy and informed decision
|
||||
|
||||
> **Disclaimer:** This project is intended for educational purposes only. The author does not endorse or encourage any unethical or illegal activity. Use this tool at your own risk.
|
||||
|
||||
### How it works
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
client->>+ladder: GET
|
||||
ladder-->>ladder: apply RequestModifications
|
||||
ladder->>+website: GET
|
||||
website->>-ladder: 200 OK
|
||||
ladder-->>ladder: apply ResultModifications
|
||||
ladder->>-client: 200 OK
|
||||
```
|
||||
|
||||
### Features
|
||||
- [x] Bypass Paywalls
|
||||
- [x] Remove CORS headers from responses, assets, and images ...
|
||||
@@ -60,12 +48,12 @@ Certain sites may display missing images or encounter formatting issues. This ca
|
||||
|
||||
### Binary
|
||||
1) Download binary [here](https://github.com/everywall/ladder/releases/latest)
|
||||
2) Unpack and run the binary `./ladder -r https://t.ly/14PSf`
|
||||
2) Unpack and run the binary `./ladder`
|
||||
3) Open Browser (Default: http://localhost:8080)
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
docker run -p 8080:8080 -d --env RULESET=https://t.ly/14PSf --name ladder ghcr.io/everywall/ladder:latest
|
||||
docker run -p 8080:8080 -d --name ladder ghcr.io/everywall/ladder:latest
|
||||
```
|
||||
|
||||
### Docker Compose
|
||||
@@ -118,7 +106,7 @@ http://localhost:8080/ruleset
|
||||
| `LOG_URLS` | Log fetched URL's | `true` |
|
||||
| `DISABLE_FORM` | Disables URL Form Frontpage | `false` |
|
||||
| `FORM_PATH` | Path to custom Form HTML | `` |
|
||||
| `RULESET` | Path or URL to a ruleset file, accepts local directories | `https://raw.githubusercontent.com/everywall/ladder-rules/main/ruleset.yaml` or `/path/to/my/rules.yaml` or `/path/to/my/rules/` |
|
||||
| `RULESET` | URL to a ruleset file | `https://raw.githubusercontent.com/everywall/ladder/main/ruleset.yaml` or `/path/to/my/rules.yaml` |
|
||||
| `EXPOSE_RULESET` | Make your Ruleset available to other ladders | `true` |
|
||||
| `ALLOWED_DOMAINS` | Comma separated list of allowed domains. Empty = no limitations | `` |
|
||||
| `ALLOWED_DOMAINS_RULESET` | Allow Domains from Ruleset. false = no limitations | `false` |
|
||||
@@ -127,10 +115,9 @@ http://localhost:8080/ruleset
|
||||
|
||||
### Ruleset
|
||||
|
||||
It is possible to apply custom rules to modify the response or the requested URL. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file, a directory with YAML Files, or an URL to a YAML file that contains a list of rules for each domain. These rules are loaded on startup.
|
||||
|
||||
There is a basic ruleset available in a separate repository [ruleset.yaml](https://raw.githubusercontent.com/everywall/ladder-rules/main/ruleset.yaml). Feel free to add your own rules and create a pull request.
|
||||
It is possible to apply custom rules to modify the response or the requested URL. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file that contains a list of rules for each domain and is loaded on startup
|
||||
|
||||
See in [ruleset.yaml](ruleset.yaml) for an example.
|
||||
|
||||
```yaml
|
||||
- domain: example.com # Includes all subdomains
|
||||
@@ -189,18 +176,8 @@ There is a basic ruleset available in a separate repository [ruleset.yaml](https
|
||||
To run a development server at http://localhost:8080:
|
||||
|
||||
```bash
|
||||
echo "dev" > handlers/VERSION
|
||||
echo "DEV" > handler/VERSION
|
||||
RULESET="./ruleset.yaml" go run cmd/main.go
|
||||
```
|
||||
|
||||
### Optional: Live reloading development server with [cosmtrek/air](https://github.com/cosmtrek/air)
|
||||
|
||||
Install air according to the [installation instructions](https://github.com/cosmtrek/air#installation).
|
||||
|
||||
Run a development server at http://localhost:8080:
|
||||
|
||||
```bash
|
||||
air # or the path to air if you haven't added a path alias to your .bashrc or .zshrc
|
||||
```
|
||||
|
||||
This project uses [pnpm](https://pnpm.io/) to build a stylesheet with the [Tailwind CSS](https://tailwindcss.com/) classes. For local development, if you modify styles in `form.html`, run `pnpm build` to generate a new stylesheet.
|
||||
|
||||
53
cmd/main.go
53
cmd/main.go
@@ -8,7 +8,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"ladder/handlers"
|
||||
"ladder/handlers/cli"
|
||||
|
||||
"github.com/akamensky/argparse"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -18,7 +17,6 @@ import (
|
||||
|
||||
//go:embed favicon.ico
|
||||
var faviconData string
|
||||
|
||||
//go:embed styles.css
|
||||
var cssData embed.FS
|
||||
|
||||
@@ -29,7 +27,6 @@ func main() {
|
||||
if os.Getenv("PORT") == "" {
|
||||
portEnv = "8080"
|
||||
}
|
||||
|
||||
port := parser.String("p", "port", &argparse.Options{
|
||||
Required: false,
|
||||
Default: portEnv,
|
||||
@@ -41,52 +38,11 @@ func main() {
|
||||
Help: "This will spawn multiple processes listening",
|
||||
})
|
||||
|
||||
ruleset := parser.String("r", "ruleset", &argparse.Options{
|
||||
Required: false,
|
||||
Help: "File, Directory or URL to a ruleset.yaml. Overrides RULESET environment variable.",
|
||||
})
|
||||
|
||||
mergeRulesets := parser.Flag("", "merge-rulesets", &argparse.Options{
|
||||
Required: false,
|
||||
Help: "Compiles a directory of yaml files into a single ruleset.yaml. Requires --ruleset arg.",
|
||||
})
|
||||
|
||||
mergeRulesetsGzip := parser.Flag("", "merge-rulesets-gzip", &argparse.Options{
|
||||
Required: false,
|
||||
Help: "Compiles a directory of yaml files into a single ruleset.gz Requires --ruleset arg.",
|
||||
})
|
||||
|
||||
mergeRulesetsOutput := parser.String("", "merge-rulesets-output", &argparse.Options{
|
||||
Required: false,
|
||||
Help: "Specify output file for --merge-rulesets and --merge-rulesets-gzip. Requires --ruleset and --merge-rulesets args.",
|
||||
})
|
||||
|
||||
err := parser.Parse(os.Args)
|
||||
if err != nil {
|
||||
fmt.Print(parser.Usage(err))
|
||||
}
|
||||
|
||||
// utility cli flag to compile ruleset directory into single ruleset.yaml
|
||||
if *mergeRulesets || *mergeRulesetsGzip {
|
||||
output := os.Stdout
|
||||
|
||||
if *mergeRulesetsOutput != "" {
|
||||
output, err = os.Create(*mergeRulesetsOutput)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
err = cli.HandleRulesetMerge(*ruleset, *mergeRulesets, *mergeRulesetsGzip, output)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if os.Getenv("PREFORK") == "true" {
|
||||
*prefork = true
|
||||
}
|
||||
@@ -101,7 +57,6 @@ func main() {
|
||||
userpass := os.Getenv("USERPASS")
|
||||
if userpass != "" {
|
||||
userpass := strings.Split(userpass, ":")
|
||||
|
||||
app.Use(basicauth.New(basicauth.Config{
|
||||
Users: map[string]string{
|
||||
userpass[0]: userpass[1],
|
||||
@@ -117,28 +72,24 @@ func main() {
|
||||
if os.Getenv("NOLOGS") != "true" {
|
||||
app.Use(func(c *fiber.Ctx) error {
|
||||
log.Println(c.Method(), c.Path())
|
||||
|
||||
return c.Next()
|
||||
})
|
||||
}
|
||||
|
||||
app.Get("/", handlers.Form)
|
||||
|
||||
app.Get("/styles.css", func(c *fiber.Ctx) error {
|
||||
cssData, err := cssData.ReadFile("styles.css")
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).SendString("Internal Server Error")
|
||||
}
|
||||
|
||||
c.Set("Content-Type", "text/css")
|
||||
|
||||
return c.Send(cssData)
|
||||
})
|
||||
|
||||
app.Get("ruleset", handlers.Ruleset)
|
||||
|
||||
app.Get("raw/*", handlers.Raw)
|
||||
app.Get("api/*", handlers.Api)
|
||||
app.Get("/*", handlers.ProxySite(*ruleset))
|
||||
app.Get("/*", handlers.ProxySite)
|
||||
|
||||
log.Fatal(app.Listen(":" + *port))
|
||||
}
|
||||
|
||||
@@ -9,11 +9,10 @@ services:
|
||||
environment:
|
||||
- PORT=8080
|
||||
- RULESET=/app/ruleset.yaml
|
||||
#- ALLOWED_DOMAINS=example.com,example.org
|
||||
#- ALLOWED_DOMAINS_RULESET=false
|
||||
#- EXPOSE_RULESET=true
|
||||
#- PREFORK=false
|
||||
#- DISABLE_FORM=false
|
||||
#- DISABLE_FORM=fase
|
||||
#- FORM_PATH=/app/form.html
|
||||
#- X_FORWARDED_FOR=66.249.66.1
|
||||
#- USER_AGENT=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
||||
|
||||
1
go.mod
1
go.mod
@@ -26,5 +26,4 @@ require (
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
golang.org/x/net v0.18.0 // indirect
|
||||
golang.org/x/sys v0.14.0 // indirect
|
||||
golang.org/x/term v0.14.0
|
||||
)
|
||||
|
||||
2
go.sum
2
go.sum
@@ -68,8 +68,6 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
|
||||
golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8=
|
||||
golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"ladder/pkg/ruleset"
|
||||
|
||||
"golang.org/x/term"
|
||||
)
|
||||
|
||||
// HandleRulesetMerge merges a set of ruleset files, specified by the rulesetPath or RULESET env variable, into either YAML or Gzip format.
|
||||
// Exits the program with an error message if the ruleset path is not provided or if loading the ruleset fails.
|
||||
//
|
||||
// Parameters:
|
||||
// - rulesetPath: Specifies the path to the ruleset file.
|
||||
// - mergeRulesets: Indicates if a merge operation should be performed.
|
||||
// - useGzip: Indicates if the merged rulesets should be gzip-ped.
|
||||
// - output: Specifies the output file. If nil, stdout will be used.
|
||||
//
|
||||
// Returns:
|
||||
// - An error if the ruleset loading or merging process fails, otherwise nil.
|
||||
func HandleRulesetMerge(rulesetPath string, mergeRulesets bool, useGzip bool, output *os.File) error {
|
||||
if !mergeRulesets {
|
||||
return nil
|
||||
}
|
||||
|
||||
if rulesetPath == "" {
|
||||
rulesetPath = os.Getenv("RULESET")
|
||||
}
|
||||
|
||||
if rulesetPath == "" {
|
||||
fmt.Println("error: no ruleset provided. Try again with --ruleset <ruleset.yaml>")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
rs, err := ruleset.NewRuleset(rulesetPath)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if useGzip {
|
||||
return gzipMerge(rs, output)
|
||||
}
|
||||
|
||||
return yamlMerge(rs, output)
|
||||
}
|
||||
|
||||
// gzipMerge takes a RuleSet and an optional output file path pointer. It compresses the RuleSet into Gzip format.
|
||||
// If the output file path is provided, the compressed data is written to this file. Otherwise, it prints a warning
|
||||
// and outputs the binary data to stdout
|
||||
//
|
||||
// Parameters:
|
||||
// - rs: The ruleset.RuleSet to be compressed.
|
||||
// - output: The output for the gzip data. If nil, stdout will be used.
|
||||
//
|
||||
// Returns:
|
||||
// - An error if compression or file writing fails, otherwise nil.
|
||||
func gzipMerge(rs ruleset.RuleSet, output io.Writer) error {
|
||||
gzip, err := rs.GzipYaml()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if output != nil {
|
||||
_, err = io.Copy(output, gzip)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if term.IsTerminal(int(os.Stdout.Fd())) {
|
||||
println("warning: binary output can mess up your terminal. Use '--merge-rulesets-output <ruleset.gz>' or pipe it to a file.")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
_, err = io.Copy(os.Stdout, gzip)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// yamlMerge takes a RuleSet and an optional output file path pointer. It converts the RuleSet into YAML format.
|
||||
// If the output file path is provided, the YAML data is written to this file. If not, the YAML data is printed to stdout.
|
||||
//
|
||||
// Parameters:
|
||||
// - rs: The ruleset.RuleSet to be converted to YAML.
|
||||
// - output: The output for the merged data. If nil, stdout will be used.
|
||||
//
|
||||
// Returns:
|
||||
// - An error if YAML conversion or file writing fails, otherwise nil.
|
||||
func yamlMerge(rs ruleset.RuleSet, output io.Writer) error {
|
||||
yaml, err := rs.Yaml()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if output == nil {
|
||||
fmt.Println(yaml)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
_, err = io.WriteString(output, yaml)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write merged YAML ruleset: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -19,7 +19,7 @@
|
||||
</header>
|
||||
<form id="inputForm" method="get" class="mx-4 relative">
|
||||
<div>
|
||||
<input type="url" id="inputField" placeholder="Proxy Search" name="inputField" class="w-full text-sm leading-6 text-slate-400 rounded-md ring-1 ring-slate-900/10 shadow-sm py-1.5 pl-2 pr-3 hover:ring-slate-300 dark:bg-slate-800 dark:highlight-white/5 dark:hover:bg-slate-700" required autofocus>
|
||||
<input type="text" id="inputField" placeholder="Proxy Search" name="inputField" class="w-full text-sm leading-6 text-slate-400 rounded-md ring-1 ring-slate-900/10 shadow-sm py-1.5 pl-2 pr-3 hover:ring-slate-300 dark:bg-slate-800 dark:highlight-white/5 dark:hover:bg-slate-700" required autofocus>
|
||||
<button id="clearButton" type="button" aria-label="Clear Search" title="Clear Search" class="hidden absolute inset-y-0 right-0 items-center pr-2 hover:text-slate-400 hover:dark:text-slate-300" tabindex="-1">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round""><path d="M18 6 6 18"/><path d="m6 6 12 12"/></svg>
|
||||
</button>
|
||||
@@ -28,8 +28,8 @@
|
||||
<footer class="mt-10 mx-4 text-center text-slate-600 dark:text-slate-400">
|
||||
<p>
|
||||
Code Licensed Under GPL v3.0 |
|
||||
<a href="https://github.com/everywall/ladder" class="hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300">View Source</a> |
|
||||
<a href="https://github.com/everywall" class="hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300">Everywall</a>
|
||||
<a href="https://github.com/everywall/ladder" class="hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300">Source</a> |
|
||||
<a href="https://github.com/everywall/ladder/releases" class="hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300">VERSION</a>
|
||||
</p>
|
||||
</footer>
|
||||
</div>
|
||||
@@ -76,4 +76,4 @@
|
||||
</style>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</html>
|
||||
@@ -8,33 +8,19 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"ladder/pkg/ruleset"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
UserAgent = getenv("USER_AGENT", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
|
||||
ForwardedFor = getenv("X_FORWARDED_FOR", "66.249.66.1")
|
||||
rulesSet = ruleset.NewRulesetFromEnv()
|
||||
allowedDomains = []string{}
|
||||
defaultTimeout = 15 // in seconds
|
||||
)
|
||||
|
||||
func init() {
|
||||
rulesSet = loadRules()
|
||||
allowedDomains = strings.Split(os.Getenv("ALLOWED_DOMAINS"), ",")
|
||||
if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" {
|
||||
allowedDomains = append(allowedDomains, rulesSet.Domains()...)
|
||||
}
|
||||
if timeoutStr := os.Getenv("HTTP_TIMEOUT"); timeoutStr != "" {
|
||||
defaultTimeout, _ = strconv.Atoi(timeoutStr)
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// extracts a URL from the request ctx. If the URL in the request
|
||||
// is a relative path, it reconstructs the full URL using the referer header.
|
||||
@@ -86,58 +72,48 @@ func extractUrl(c *fiber.Ctx) (string, error) {
|
||||
// default behavior:
|
||||
// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
|
||||
return urlQuery.String(), nil
|
||||
|
||||
}
|
||||
|
||||
func ProxySite(rulesetPath string) fiber.Handler {
|
||||
if rulesetPath != "" {
|
||||
rs, err := ruleset.NewRuleset(rulesetPath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rulesSet = rs
|
||||
func ProxySite(c *fiber.Ctx) error {
|
||||
// Get the url from the URL
|
||||
url, err := extractUrl(c)
|
||||
if err != nil {
|
||||
log.Println("ERROR In URL extraction:", err)
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Get the url from the URL
|
||||
url, err := extractUrl(c)
|
||||
if err != nil {
|
||||
log.Println("ERROR In URL extraction:", err)
|
||||
}
|
||||
|
||||
queries := c.Queries()
|
||||
body, _, resp, err := fetchSite(url, queries)
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
c.SendStatus(fiber.StatusInternalServerError)
|
||||
return c.SendString(err.Error())
|
||||
}
|
||||
|
||||
c.Cookie(&fiber.Cookie{})
|
||||
c.Set("Content-Type", resp.Header.Get("Content-Type"))
|
||||
c.Set("Content-Security-Policy", resp.Header.Get("Content-Security-Policy"))
|
||||
|
||||
return c.SendString(body)
|
||||
queries := c.Queries()
|
||||
body, _, resp, err := fetchSite(url, queries)
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
c.SendStatus(fiber.StatusInternalServerError)
|
||||
return c.SendString(err.Error())
|
||||
}
|
||||
|
||||
c.Set("Content-Type", resp.Header.Get("Content-Type"))
|
||||
c.Set("Content-Security-Policy", resp.Header.Get("Content-Security-Policy"))
|
||||
|
||||
return c.SendString(body)
|
||||
}
|
||||
|
||||
func modifyURL(uri string, rule ruleset.Rule) (string, error) {
|
||||
func modifyURL(uri string, rule Rule) (string, error) {
|
||||
newUrl, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, urlMod := range rule.URLMods.Domain {
|
||||
for _, urlMod := range rule.UrlMods.Domain {
|
||||
re := regexp.MustCompile(urlMod.Match)
|
||||
newUrl.Host = re.ReplaceAllString(newUrl.Host, urlMod.Replace)
|
||||
}
|
||||
|
||||
for _, urlMod := range rule.URLMods.Path {
|
||||
for _, urlMod := range rule.UrlMods.Path {
|
||||
re := regexp.MustCompile(urlMod.Match)
|
||||
newUrl.Path = re.ReplaceAllString(newUrl.Path, urlMod.Replace)
|
||||
}
|
||||
|
||||
v := newUrl.Query()
|
||||
for _, query := range rule.URLMods.Query {
|
||||
for _, query := range rule.UrlMods.Query {
|
||||
if query.Value == "" {
|
||||
v.Del(query.Key)
|
||||
continue
|
||||
@@ -187,9 +163,7 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
|
||||
}
|
||||
|
||||
// Fetch the site
|
||||
client := &http.Client{
|
||||
Timeout: time.Second * time.Duration(defaultTimeout),
|
||||
}
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest("GET", url, nil)
|
||||
|
||||
if rule.Headers.UserAgent != "" {
|
||||
@@ -230,16 +204,16 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
|
||||
}
|
||||
|
||||
if rule.Headers.CSP != "" {
|
||||
// log.Println(rule.Headers.CSP)
|
||||
log.Println(rule.Headers.CSP)
|
||||
resp.Header.Set("Content-Security-Policy", rule.Headers.CSP)
|
||||
}
|
||||
|
||||
// log.Print("rule", rule) TODO: Add a debug mode to print the rule
|
||||
//log.Print("rule", rule) TODO: Add a debug mode to print the rule
|
||||
body := rewriteHtml(bodyB, u, rule)
|
||||
return body, req, resp, nil
|
||||
}
|
||||
|
||||
func rewriteHtml(bodyB []byte, u *url.URL, rule ruleset.Rule) string {
|
||||
func rewriteHtml(bodyB []byte, u *url.URL, rule Rule) string {
|
||||
// Rewrite the HTML
|
||||
body := string(bodyB)
|
||||
|
||||
@@ -273,11 +247,63 @@ func getenv(key, fallback string) string {
|
||||
return value
|
||||
}
|
||||
|
||||
func fetchRule(domain string, path string) ruleset.Rule {
|
||||
if len(rulesSet) == 0 {
|
||||
return ruleset.Rule{}
|
||||
func loadRules() RuleSet {
|
||||
rulesUrl := os.Getenv("RULESET")
|
||||
if rulesUrl == "" {
|
||||
RulesList := RuleSet{}
|
||||
return RulesList
|
||||
}
|
||||
rule := ruleset.Rule{}
|
||||
log.Println("Loading rules")
|
||||
|
||||
var ruleSet RuleSet
|
||||
if strings.HasPrefix(rulesUrl, "http") {
|
||||
|
||||
resp, err := http.Get(rulesUrl)
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
log.Println("ERROR:", resp.StatusCode, rulesUrl)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
}
|
||||
yaml.Unmarshal(body, &ruleSet)
|
||||
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
}
|
||||
} else {
|
||||
yamlFile, err := os.ReadFile(rulesUrl)
|
||||
if err != nil {
|
||||
log.Println("ERROR:", err)
|
||||
}
|
||||
yaml.Unmarshal(yamlFile, &ruleSet)
|
||||
}
|
||||
|
||||
domains := []string{}
|
||||
for _, rule := range ruleSet {
|
||||
|
||||
domains = append(domains, rule.Domain)
|
||||
domains = append(domains, rule.Domains...)
|
||||
if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" {
|
||||
allowedDomains = append(allowedDomains, domains...)
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("Loaded ", len(ruleSet), " rules for", len(domains), "Domains")
|
||||
return ruleSet
|
||||
}
|
||||
|
||||
func fetchRule(domain string, path string) Rule {
|
||||
if len(rulesSet) == 0 {
|
||||
return Rule{}
|
||||
}
|
||||
rule := Rule{}
|
||||
for _, rule := range rulesSet {
|
||||
domains := rule.Domains
|
||||
if rule.Domain != "" {
|
||||
@@ -296,7 +322,7 @@ func fetchRule(domain string, path string) ruleset.Rule {
|
||||
return rule
|
||||
}
|
||||
|
||||
func applyRules(body string, rule ruleset.Rule) string {
|
||||
func applyRules(body string, rule Rule) string {
|
||||
if len(rulesSet) == 0 {
|
||||
return body
|
||||
}
|
||||
|
||||
@@ -7,15 +7,13 @@ import (
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"ladder/pkg/ruleset"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestProxySite(t *testing.T) {
|
||||
app := fiber.New()
|
||||
app.Get("/:url", ProxySite(""))
|
||||
app.Get("/:url", ProxySite)
|
||||
|
||||
req := httptest.NewRequest("GET", "/https://example.com", nil)
|
||||
resp, err := app.Test(req)
|
||||
@@ -53,7 +51,7 @@ func TestRewriteHtml(t *testing.T) {
|
||||
</html>
|
||||
`
|
||||
|
||||
actual := rewriteHtml(bodyB, u, ruleset.Rule{})
|
||||
actual := rewriteHtml(bodyB, u, Rule{})
|
||||
assert.Equal(t, expected, actual)
|
||||
}
|
||||
|
||||
|
||||
40
handlers/types.go
Normal file
40
handlers/types.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package handlers
|
||||
|
||||
type Regex struct {
|
||||
Match string `yaml:"match"`
|
||||
Replace string `yaml:"replace"`
|
||||
}
|
||||
type KV struct {
|
||||
Key string `yaml:"key"`
|
||||
Value string `yaml:"value"`
|
||||
}
|
||||
|
||||
type RuleSet []Rule
|
||||
|
||||
type Rule struct {
|
||||
Domain string `yaml:"domain,omitempty"`
|
||||
Domains []string `yaml:"domains,omitempty"`
|
||||
Paths []string `yaml:"paths,omitempty"`
|
||||
Headers struct {
|
||||
UserAgent string `yaml:"user-agent,omitempty"`
|
||||
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
|
||||
Referer string `yaml:"referer,omitempty"`
|
||||
Cookie string `yaml:"cookie,omitempty"`
|
||||
CSP string `yaml:"content-security-policy,omitempty"`
|
||||
} `yaml:"headers,omitempty"`
|
||||
GoogleCache bool `yaml:"googleCache,omitempty"`
|
||||
RegexRules []Regex `yaml:"regexRules"`
|
||||
|
||||
UrlMods struct {
|
||||
Domain []Regex `yaml:"domain"`
|
||||
Path []Regex `yaml:"path"`
|
||||
Query []KV `yaml:"query"`
|
||||
} `yaml:"urlMods"`
|
||||
|
||||
Injections []struct {
|
||||
Position string `yaml:"position"`
|
||||
Append string `yaml:"append"`
|
||||
Prepend string `yaml:"prepend"`
|
||||
Replace string `yaml:"replace"`
|
||||
} `yaml:"injections"`
|
||||
}
|
||||
@@ -1,310 +0,0 @@
|
||||
package ruleset
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type Regex struct {
|
||||
Match string `yaml:"match"`
|
||||
Replace string `yaml:"replace"`
|
||||
}
|
||||
type KV struct {
|
||||
Key string `yaml:"key"`
|
||||
Value string `yaml:"value"`
|
||||
}
|
||||
|
||||
type RuleSet []Rule
|
||||
|
||||
type Rule struct {
|
||||
Domain string `yaml:"domain,omitempty"`
|
||||
Domains []string `yaml:"domains,omitempty"`
|
||||
Paths []string `yaml:"paths,omitempty"`
|
||||
Headers struct {
|
||||
UserAgent string `yaml:"user-agent,omitempty"`
|
||||
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
|
||||
Referer string `yaml:"referer,omitempty"`
|
||||
Cookie string `yaml:"cookie,omitempty"`
|
||||
CSP string `yaml:"content-security-policy,omitempty"`
|
||||
} `yaml:"headers,omitempty"`
|
||||
GoogleCache bool `yaml:"googleCache,omitempty"`
|
||||
RegexRules []Regex `yaml:"regexRules,omitempty"`
|
||||
|
||||
URLMods struct {
|
||||
Domain []Regex `yaml:"domain,omitempty"`
|
||||
Path []Regex `yaml:"path,omitempty"`
|
||||
Query []KV `yaml:"query,omitempty"`
|
||||
} `yaml:"urlMods,omitempty"`
|
||||
|
||||
Injections []struct {
|
||||
Position string `yaml:"position,omitempty"`
|
||||
Append string `yaml:"append,omitempty"`
|
||||
Prepend string `yaml:"prepend,omitempty"`
|
||||
Replace string `yaml:"replace,omitempty"`
|
||||
} `yaml:"injections,omitempty"`
|
||||
}
|
||||
|
||||
var remoteRegex = regexp.MustCompile(`^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()!@:%_\+.~#?&\/\/=]*)`)
|
||||
|
||||
// NewRulesetFromEnv creates a new RuleSet based on the RULESET environment variable.
|
||||
// It logs a warning and returns an empty RuleSet if the RULESET environment variable is not set.
|
||||
// If the RULESET is set but the rules cannot be loaded, it panics.
|
||||
func NewRulesetFromEnv() RuleSet {
|
||||
rulesPath, ok := os.LookupEnv("RULESET")
|
||||
if !ok {
|
||||
log.Printf("WARN: No ruleset specified. Set the `RULESET` environment variable to load one for a better success rate.")
|
||||
return RuleSet{}
|
||||
}
|
||||
|
||||
ruleSet, err := NewRuleset(rulesPath)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
return ruleSet
|
||||
}
|
||||
|
||||
// NewRuleset loads a RuleSet from a given string of rule paths, separated by semicolons.
|
||||
// It supports loading rules from both local file paths and remote URLs.
|
||||
// Returns a RuleSet and an error if any issues occur during loading.
|
||||
func NewRuleset(rulePaths string) (RuleSet, error) {
|
||||
var ruleSet RuleSet
|
||||
|
||||
var errs []error
|
||||
|
||||
rp := strings.Split(rulePaths, ";")
|
||||
for _, rule := range rp {
|
||||
var err error
|
||||
|
||||
rulePath := strings.Trim(rule, " ")
|
||||
isRemote := remoteRegex.MatchString(rulePath)
|
||||
|
||||
if isRemote {
|
||||
err = ruleSet.loadRulesFromRemoteFile(rulePath)
|
||||
} else {
|
||||
err = ruleSet.loadRulesFromLocalDir(rulePath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
e := fmt.Errorf("WARN: failed to load ruleset from '%s'", rulePath)
|
||||
errs = append(errs, errors.Join(e, err))
|
||||
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
e := fmt.Errorf("WARN: failed to load %d rulesets", len(rp))
|
||||
errs = append(errs, e)
|
||||
|
||||
// panic if the user specified a local ruleset, but it wasn't found on disk
|
||||
// don't fail silently
|
||||
for _, err := range errs {
|
||||
if errors.Is(os.ErrNotExist, err) {
|
||||
e := fmt.Errorf("PANIC: ruleset '%s' not found", err)
|
||||
panic(errors.Join(e, err))
|
||||
}
|
||||
}
|
||||
|
||||
// else, bubble up any errors, such as syntax or remote host issues
|
||||
return ruleSet, errors.Join(errs...)
|
||||
}
|
||||
|
||||
ruleSet.PrintStats()
|
||||
|
||||
return ruleSet, nil
|
||||
}
|
||||
|
||||
// ================== RULESET loading logic ===================================
|
||||
|
||||
// loadRulesFromLocalDir loads rules from a local directory specified by the path.
|
||||
// It walks through the directory, loading rules from YAML files.
|
||||
// Returns an error if the directory cannot be accessed
|
||||
// If there is an issue loading any file, it will be skipped
|
||||
func (rs *RuleSet) loadRulesFromLocalDir(path string) error {
|
||||
_, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
yamlRegex := regexp.MustCompile(`.*\.ya?ml`)
|
||||
|
||||
err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if isYaml := yamlRegex.MatchString(path); !isYaml {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = rs.loadRulesFromLocalFile(path)
|
||||
if err != nil {
|
||||
log.Printf("WARN: failed to load directory ruleset '%s': %s, skipping", path, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Printf("INFO: loaded ruleset %s\n", path)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// loadRulesFromLocalFile loads rules from a local YAML file specified by the path.
|
||||
// Returns an error if the file cannot be read or if there's a syntax error in the YAML.
|
||||
func (rs *RuleSet) loadRulesFromLocalFile(path string) error {
|
||||
yamlFile, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
e := fmt.Errorf("failed to read rules from local file: '%s'", path)
|
||||
return errors.Join(e, err)
|
||||
}
|
||||
|
||||
var r RuleSet
|
||||
err = yaml.Unmarshal(yamlFile, &r)
|
||||
|
||||
if err != nil {
|
||||
e := fmt.Errorf("failed to load rules from local file, possible syntax error in '%s'", path)
|
||||
ee := errors.Join(e, err)
|
||||
|
||||
if _, ok := os.LookupEnv("DEBUG"); ok {
|
||||
debugPrintRule(string(yamlFile), ee)
|
||||
}
|
||||
|
||||
return ee
|
||||
}
|
||||
|
||||
*rs = append(*rs, r...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// loadRulesFromRemoteFile loads rules from a remote URL.
|
||||
// It supports plain and gzip compressed content.
|
||||
// Returns an error if there's an issue accessing the URL or if there's a syntax error in the YAML.
|
||||
func (rs *RuleSet) loadRulesFromRemoteFile(rulesURL string) error {
|
||||
var r RuleSet
|
||||
|
||||
resp, err := http.Get(rulesURL)
|
||||
if err != nil {
|
||||
e := fmt.Errorf("failed to load rules from remote url '%s'", rulesURL)
|
||||
return errors.Join(e, err)
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
e := fmt.Errorf("failed to load rules from remote url (%s) on '%s'", resp.Status, rulesURL)
|
||||
return errors.Join(e, err)
|
||||
}
|
||||
|
||||
var reader io.Reader
|
||||
|
||||
isGzip := strings.HasSuffix(rulesURL, ".gz") || strings.HasSuffix(rulesURL, ".gzip") || resp.Header.Get("content-encoding") == "gzip"
|
||||
|
||||
if isGzip {
|
||||
reader, err = gzip.NewReader(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader for URL '%s' with status code '%s': %w", rulesURL, resp.Status, err)
|
||||
}
|
||||
} else {
|
||||
reader = resp.Body
|
||||
}
|
||||
|
||||
err = yaml.NewDecoder(reader).Decode(&r)
|
||||
|
||||
if err != nil {
|
||||
e := fmt.Errorf("failed to load rules from remote url '%s' with status code '%s' and possible syntax error", rulesURL, resp.Status)
|
||||
ee := errors.Join(e, err)
|
||||
|
||||
return ee
|
||||
}
|
||||
|
||||
*rs = append(*rs, r...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ================= utility methods ==========================
|
||||
|
||||
// Yaml returns the ruleset as a Yaml string
|
||||
func (rs *RuleSet) Yaml() (string, error) {
|
||||
y, err := yaml.Marshal(rs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(y), nil
|
||||
}
|
||||
|
||||
// GzipYaml returns an io.Reader that streams the Gzip-compressed YAML representation of the RuleSet.
|
||||
func (rs *RuleSet) GzipYaml() (io.Reader, error) {
|
||||
pr, pw := io.Pipe()
|
||||
|
||||
go func() {
|
||||
defer pw.Close()
|
||||
|
||||
gw := gzip.NewWriter(pw)
|
||||
defer gw.Close()
|
||||
|
||||
if err := yaml.NewEncoder(gw).Encode(rs); err != nil {
|
||||
gw.Close() // Ensure to close the gzip writer
|
||||
pw.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
return pr, nil
|
||||
}
|
||||
|
||||
// Domains extracts and returns a slice of all domains present in the RuleSet.
|
||||
func (rs *RuleSet) Domains() []string {
|
||||
var domains []string
|
||||
for _, rule := range *rs {
|
||||
domains = append(domains, rule.Domain)
|
||||
domains = append(domains, rule.Domains...)
|
||||
}
|
||||
return domains
|
||||
}
|
||||
|
||||
// DomainCount returns the count of unique domains present in the RuleSet.
|
||||
func (rs *RuleSet) DomainCount() int {
|
||||
return len(rs.Domains())
|
||||
}
|
||||
|
||||
// Count returns the total number of rules in the RuleSet.
|
||||
func (rs *RuleSet) Count() int {
|
||||
return len(*rs)
|
||||
}
|
||||
|
||||
// PrintStats logs the number of rules and domains loaded in the RuleSet.
|
||||
func (rs *RuleSet) PrintStats() {
|
||||
log.Printf("INFO: Loaded %d rules for %d domains\n", rs.Count(), rs.DomainCount())
|
||||
}
|
||||
|
||||
// debugPrintRule is a utility function for printing a rule and associated error for debugging purposes.
|
||||
func debugPrintRule(rule string, err error) {
|
||||
fmt.Println("------------------------------ BEGIN DEBUG RULESET -----------------------------")
|
||||
fmt.Printf("%s\n", err.Error())
|
||||
fmt.Println("--------------------------------------------------------------------------------")
|
||||
fmt.Println(rule)
|
||||
fmt.Println("------------------------------ END DEBUG RULESET -------------------------------")
|
||||
}
|
||||
@@ -1,173 +0,0 @@
|
||||
package ruleset
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var (
|
||||
validYAML = `
|
||||
- domain: example.com
|
||||
regexRules:
|
||||
- match: "^http:"
|
||||
replace: "https:"`
|
||||
|
||||
invalidYAML = `
|
||||
- domain: [thisIsATestYamlThatIsMeantToFail.example]
|
||||
regexRules:
|
||||
- match: "^http:"
|
||||
replace: "https:"
|
||||
- match: "[incomplete"`
|
||||
)
|
||||
|
||||
func TestLoadRulesFromRemoteFile(t *testing.T) {
|
||||
app := fiber.New()
|
||||
defer app.Shutdown()
|
||||
|
||||
app.Get("/valid-config.yml", func(c *fiber.Ctx) error {
|
||||
c.SendString(validYAML)
|
||||
return nil
|
||||
})
|
||||
|
||||
app.Get("/invalid-config.yml", func(c *fiber.Ctx) error {
|
||||
c.SendString(invalidYAML)
|
||||
return nil
|
||||
})
|
||||
|
||||
app.Get("/valid-config.gz", func(c *fiber.Ctx) error {
|
||||
c.Set("Content-Type", "application/octet-stream")
|
||||
|
||||
rs, err := loadRuleFromString(validYAML)
|
||||
if err != nil {
|
||||
t.Errorf("failed to load valid yaml from string: %s", err.Error())
|
||||
}
|
||||
|
||||
s, err := rs.GzipYaml()
|
||||
if err != nil {
|
||||
t.Errorf("failed to load gzip serialize yaml: %s", err.Error())
|
||||
}
|
||||
|
||||
err = c.SendStream(s)
|
||||
if err != nil {
|
||||
t.Errorf("failed to stream gzip serialized yaml: %s", err.Error())
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// Start the server in a goroutine
|
||||
go func() {
|
||||
if err := app.Listen("127.0.0.1:9999"); err != nil {
|
||||
t.Errorf("Server failed to start: %s", err.Error())
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for the server to start
|
||||
time.Sleep(time.Second * 1)
|
||||
|
||||
rs, err := NewRuleset("http://127.0.0.1:9999/valid-config.yml")
|
||||
if err != nil {
|
||||
t.Errorf("failed to load plaintext ruleset from http server: %s", err.Error())
|
||||
}
|
||||
|
||||
assert.Equal(t, rs[0].Domain, "example.com")
|
||||
|
||||
rs, err = NewRuleset("http://127.0.0.1:9999/valid-config.gz")
|
||||
if err != nil {
|
||||
t.Errorf("failed to load gzipped ruleset from http server: %s", err.Error())
|
||||
}
|
||||
|
||||
assert.Equal(t, rs[0].Domain, "example.com")
|
||||
|
||||
os.Setenv("RULESET", "http://127.0.0.1:9999/valid-config.gz")
|
||||
|
||||
rs = NewRulesetFromEnv()
|
||||
if !assert.Equal(t, rs[0].Domain, "example.com") {
|
||||
t.Error("expected no errors loading ruleset from gzip url using environment variable, but got one")
|
||||
}
|
||||
}
|
||||
|
||||
func loadRuleFromString(yaml string) (RuleSet, error) {
|
||||
// Create a temporary file and load it
|
||||
tmpFile, _ := os.CreateTemp("", "ruleset*.yaml")
|
||||
|
||||
defer os.Remove(tmpFile.Name())
|
||||
|
||||
tmpFile.WriteString(yaml)
|
||||
|
||||
rs := RuleSet{}
|
||||
err := rs.loadRulesFromLocalFile(tmpFile.Name())
|
||||
|
||||
return rs, err
|
||||
}
|
||||
|
||||
// TestLoadRulesFromLocalFile tests the loading of rules from a local YAML file.
|
||||
func TestLoadRulesFromLocalFile(t *testing.T) {
|
||||
rs, err := loadRuleFromString(validYAML)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load rules from valid YAML: %s", err)
|
||||
}
|
||||
|
||||
assert.Equal(t, rs[0].Domain, "example.com")
|
||||
assert.Equal(t, rs[0].RegexRules[0].Match, "^http:")
|
||||
assert.Equal(t, rs[0].RegexRules[0].Replace, "https:")
|
||||
|
||||
_, err = loadRuleFromString(invalidYAML)
|
||||
if err == nil {
|
||||
t.Errorf("Expected an error when loading invalid YAML, but got none")
|
||||
}
|
||||
}
|
||||
|
||||
// TestLoadRulesFromLocalDir tests the loading of rules from a local nested directory full of yaml rulesets
|
||||
func TestLoadRulesFromLocalDir(t *testing.T) {
|
||||
// Create a temporary directory
|
||||
baseDir, err := os.MkdirTemp("", "ruleset_test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temporary directory: %s", err)
|
||||
}
|
||||
|
||||
defer os.RemoveAll(baseDir)
|
||||
|
||||
// Create a nested subdirectory
|
||||
nestedDir := filepath.Join(baseDir, "nested")
|
||||
err = os.Mkdir(nestedDir, 0o755)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create nested directory: %s", err)
|
||||
}
|
||||
|
||||
// Create a nested subdirectory
|
||||
nestedTwiceDir := filepath.Join(nestedDir, "nestedTwice")
|
||||
err = os.Mkdir(nestedTwiceDir, 0o755)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create twice-nested directory: %s", err)
|
||||
}
|
||||
|
||||
testCases := []string{"test.yaml", "test2.yaml", "test-3.yaml", "test 4.yaml", "1987.test.yaml.yml", "foobar.example.com.yaml", "foobar.com.yml"}
|
||||
for _, fileName := range testCases {
|
||||
filePath := filepath.Join(nestedDir, "2x-"+fileName)
|
||||
os.WriteFile(filePath, []byte(validYAML), 0o644)
|
||||
|
||||
filePath = filepath.Join(nestedDir, fileName)
|
||||
os.WriteFile(filePath, []byte(validYAML), 0o644)
|
||||
|
||||
filePath = filepath.Join(baseDir, "base-"+fileName)
|
||||
os.WriteFile(filePath, []byte(validYAML), 0o644)
|
||||
}
|
||||
|
||||
rs := RuleSet{}
|
||||
err = rs.loadRulesFromLocalDir(baseDir)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, rs.Count(), len(testCases)*3)
|
||||
|
||||
for _, rule := range rs {
|
||||
assert.Equal(t, rule.Domain, "example.com")
|
||||
assert.Equal(t, rule.RegexRules[0].Match, "^http:")
|
||||
assert.Equal(t, rule.RegexRules[0].Replace, "https:")
|
||||
}
|
||||
}
|
||||
173
ruleset.yaml
173
ruleset.yaml
@@ -20,4 +20,175 @@
|
||||
</script>
|
||||
- position: h1
|
||||
replace: |
|
||||
<h1>An example with a ladder ;-)</h1>
|
||||
<h1>An example with a ladder ;-)</h1>
|
||||
- domain: www.americanbanker.com
|
||||
paths:
|
||||
- /news
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const inlineGate = document.querySelector('.inline-gate');
|
||||
if (inlineGate) {
|
||||
inlineGate.classList.remove('inline-gate');
|
||||
const inlineGated = document.querySelectorAll('.inline-gated');
|
||||
for (const elem of inlineGated) { elem.classList.remove('inline-gated'); }
|
||||
}
|
||||
});
|
||||
</script>
|
||||
- domain: www.nzz.ch
|
||||
paths:
|
||||
- /international
|
||||
- /sport
|
||||
- /wirtschaft
|
||||
- /technologie
|
||||
- /feuilleton
|
||||
- /zuerich
|
||||
- /wissenschaft
|
||||
- /gesellschaft
|
||||
- /panorama
|
||||
- /mobilitaet
|
||||
- /reisen
|
||||
- /meinung
|
||||
- /finanze
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const paywall = document.querySelector('.dynamic-regwall');
|
||||
removeDOMElement(paywall)
|
||||
});
|
||||
</script>
|
||||
- domains:
|
||||
- www.architecturaldigest.com
|
||||
- www.bonappetit.com
|
||||
- www.cntraveler.com
|
||||
- www.epicurious.com
|
||||
- www.gq.com
|
||||
- www.newyorker.com
|
||||
- www.vanityfair.com
|
||||
- www.vogue.com
|
||||
- www.wired.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('.paywall-bar, div[class^="MessageBannerWrapper-"');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
- domains:
|
||||
- www.nytimes.com
|
||||
- www.time.com
|
||||
headers:
|
||||
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
||||
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
|
||||
referer: https://www.google.com/
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
window.localStorage.clear();
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
- domains:
|
||||
- www.thestar.com
|
||||
- www.niagarafallsreview.ca
|
||||
- www.stcatharinesstandard.ca
|
||||
- www.thepeterboroughexaminer.com
|
||||
- www.therecord.com
|
||||
- www.thespec.com
|
||||
- www.wellandtribune.ca
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
window.localStorage.clear();
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const paywall = document.querySelectorAll('div.subscriber-offers');
|
||||
paywall.forEach(el => { el.remove(); });
|
||||
const subscriber_only = document.querySelectorAll('div.subscriber-only');
|
||||
for (const elem of subscriber_only) {
|
||||
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
|
||||
const content_new = doc.querySelector('div');
|
||||
elem.parentNode.replaceChild(content_new, elem);
|
||||
}
|
||||
elem.removeAttribute('style');
|
||||
elem.removeAttribute('class');
|
||||
}
|
||||
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
|
||||
ads.forEach(el => { el.remove(); });
|
||||
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
|
||||
recommendations.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
- domain: www.usatoday.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
- domain: www.washingtonpost.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
let paywall = document.querySelectorAll('div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]');
|
||||
paywall.forEach(el => { el.remove(); });
|
||||
const images = document.querySelectorAll('img');
|
||||
images.forEach(image => { image.parentElement.style.filter = ''; });
|
||||
const headimage = document.querySelectorAll('div .aspect-custom');
|
||||
headimage.forEach(image => { image.style.filter = ''; });
|
||||
});
|
||||
</script>
|
||||
- domain: medium.com
|
||||
headers:
|
||||
referer: https://t.co/x?amp=1
|
||||
x-forwarded-for: none
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
content-security-policy: script-src 'self';
|
||||
cookie:
|
||||
- domain: tagesspiegel.de
|
||||
headers:
|
||||
content-security-policy: script-src 'self';
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
urlMods:
|
||||
query:
|
||||
- key: amp
|
||||
value: 1
|
||||
- domain: www.ft.com
|
||||
headers:
|
||||
referer: https://t.co/x?amp=1
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const styleTags = document.querySelectorAll('link[rel="stylesheet"]');
|
||||
styleTags.forEach(el => {
|
||||
const href = el.getAttribute('href').substring(1);
|
||||
const updatedHref = href.replace(/(https?:\/\/.+?)\/{2,}/, '$1/');
|
||||
el.setAttribute('href', updatedHref);
|
||||
});
|
||||
setTimeout(() => {
|
||||
const cookie = document.querySelectorAll('.o-cookie-message, .js-article-ribbon, .o-ads, .o-banner, .o-message, .article__content-sign-up');
|
||||
cookie.forEach(el => { el.remove(); });
|
||||
}, 1000);
|
||||
})
|
||||
</script>
|
||||
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
- domains:
|
||||
- www.thestar.com
|
||||
- www.niagarafallsreview.ca
|
||||
- www.stcatharinesstandard.ca
|
||||
- www.thepeterboroughexaminer.com
|
||||
- www.therecord.com
|
||||
- www.thespec.com
|
||||
- www.wellandtribune.ca
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
window.localStorage.clear();
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const paywall = document.querySelectorAll('div.subscriber-offers');
|
||||
paywall.forEach(el => { el.remove(); });
|
||||
const subscriber_only = document.querySelectorAll('div.subscriber-only');
|
||||
for (const elem of subscriber_only) {
|
||||
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
|
||||
const content_new = doc.querySelector('div');
|
||||
elem.parentNode.replaceChild(content_new, elem);
|
||||
}
|
||||
elem.removeAttribute('style');
|
||||
elem.removeAttribute('class');
|
||||
}
|
||||
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
|
||||
ads.forEach(el => { el.remove(); });
|
||||
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
|
||||
recommendations.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
@@ -1,24 +0,0 @@
|
||||
- domain: www.nzz.ch
|
||||
paths:
|
||||
- /international
|
||||
- /sport
|
||||
- /wirtschaft
|
||||
- /technologie
|
||||
- /feuilleton
|
||||
- /zuerich
|
||||
- /wissenschaft
|
||||
- /gesellschaft
|
||||
- /panorama
|
||||
- /mobilitaet
|
||||
- /reisen
|
||||
- /meinung
|
||||
- /finanze
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const paywall = document.querySelector('.dynamic-regwall');
|
||||
removeDOMElement(paywall)
|
||||
});
|
||||
</script>
|
||||
@@ -1,9 +0,0 @@
|
||||
# loads amp version of page
|
||||
- domain: tagesspiegel.de
|
||||
headers:
|
||||
content-security-policy: script-src 'self';
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
urlMods:
|
||||
query:
|
||||
- key: amp
|
||||
value: 1
|
||||
@@ -1,20 +0,0 @@
|
||||
- domain: www.ft.com
|
||||
headers:
|
||||
referer: https://t.co/x?amp=1
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const styleTags = document.querySelectorAll('link[rel="stylesheet"]');
|
||||
styleTags.forEach(el => {
|
||||
const href = el.getAttribute('href').substring(1);
|
||||
const updatedHref = href.replace(/(https?:\/\/.+?)\/{2,}/, '$1/');
|
||||
el.setAttribute('href', updatedHref);
|
||||
});
|
||||
setTimeout(() => {
|
||||
const cookie = document.querySelectorAll('.o-cookie-message, .js-article-ribbon, .o-ads, .o-banner, .o-message, .article__content-sign-up');
|
||||
cookie.forEach(el => { el.remove(); });
|
||||
}, 1000);
|
||||
})
|
||||
</script>
|
||||
@@ -1,19 +0,0 @@
|
||||
- domains:
|
||||
- www.architecturaldigest.com
|
||||
- www.bonappetit.com
|
||||
- www.cntraveler.com
|
||||
- www.epicurious.com
|
||||
- www.gq.com
|
||||
- www.newyorker.com
|
||||
- www.vanityfair.com
|
||||
- www.vogue.com
|
||||
- www.wired.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('.paywall-bar, div[class^="MessageBannerWrapper-"');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
@@ -1,16 +0,0 @@
|
||||
- domain: americanbanker.com
|
||||
paths:
|
||||
- /news
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const inlineGate = document.querySelector('.inline-gate');
|
||||
if (inlineGate) {
|
||||
inlineGate.classList.remove('inline-gate');
|
||||
const inlineGated = document.querySelectorAll('.inline-gated');
|
||||
for (const elem of inlineGated) { elem.classList.remove('inline-gated'); }
|
||||
}
|
||||
});
|
||||
</script>
|
||||
@@ -1,7 +0,0 @@
|
||||
- domain: medium.com
|
||||
headers:
|
||||
referer: https://t.co/x?amp=1
|
||||
x-forwarded-for: none
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
content-security-policy: script-src 'self';
|
||||
cookie:
|
||||
@@ -1,17 +0,0 @@
|
||||
- domains:
|
||||
- www.nytimes.com
|
||||
- www.time.com
|
||||
headers:
|
||||
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
||||
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
|
||||
referer: https://www.google.com/
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
window.localStorage.clear();
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
@@ -1,10 +0,0 @@
|
||||
- domain: www.usatoday.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const banners = document.querySelectorAll('div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]');
|
||||
banners.forEach(el => { el.remove(); });
|
||||
});
|
||||
</script>
|
||||
@@ -1,14 +0,0 @@
|
||||
- domain: www.washingtonpost.com
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
let paywall = document.querySelectorAll('div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]');
|
||||
paywall.forEach(el => { el.remove(); });
|
||||
const images = document.querySelectorAll('img');
|
||||
images.forEach(image => { image.parentElement.style.filter = ''; });
|
||||
const headimage = document.querySelectorAll('div .aspect-custom');
|
||||
headimage.forEach(image => { image.style.filter = ''; });
|
||||
});
|
||||
</script>
|
||||
Reference in New Issue
Block a user