20 Commits

Author SHA1 Message Date
Gianni Carafa
8d1554e10e fix buildjob 2023-11-10 17:31:00 +01:00
Gianni Carafa
ff5bb61891 Merge branch 'main' of github.com:kubero-dev/paywall-ladder 2023-11-10 15:57:48 +01:00
Gianni Carafa
936b418b00 move to new organisation 2023-11-10 15:57:29 +01:00
Gianni C
ac44f12d85 Merge pull request #21 from joncrangle/csp-override
Allow the user to specify the Content Security Policy for a domain
2023-11-10 09:04:54 +01:00
joncrangle
b6f0c644f8 Undo prettier 2023-11-09 22:05:44 -05:00
joncrangle
66c4b3c911 Undo prettier 2023-11-09 22:03:37 -05:00
joncrangle
924696c015 Enable user to define their own content-security-policy 2023-11-09 21:50:46 -05:00
Gianni Carafa
81aa00c2ea include all subdomains 2023-11-09 23:51:36 +01:00
Gianni Carafa
6c1f58e2e7 Add headers field in ruleset. Enable Google Cache. 2023-11-09 23:32:43 +01:00
Gianni Carafa
d3c995df34 unblur main image 2023-11-09 12:09:09 +01:00
Gianni Carafa
6f4a2daeca fix LOG_URLS feature 2023-11-09 09:37:46 +01:00
Gianni Carafa
f728b2c1de fix default port, set to 8080 2023-11-09 09:00:08 +01:00
Gianni C
bc346a3954 Merge pull request #18 from kubero-dev/feature/multiplle-domains
Feature/multiple domains
2023-11-09 08:41:42 +01:00
Gianni Carafa
5442da81b9 allow domain list in rules 2023-11-08 23:36:06 +01:00
Gianni C
73b13914fe Merge pull request #13 from joncrangle/main
Enable multiple domains to apply the same rule and add rules to ruleset
2023-11-08 23:10:15 +01:00
Gianni Carafa
b127f81a9b Add a comment to disable ALLOWED_DOMAINS_RULESET in docker-compose.yaml. 2023-11-08 23:08:48 +01:00
Gianni Carafa
79438a0b59 simplify docker compose 2023-11-08 23:06:45 +01:00
joncrangle
1aa917e0c1 unblur images on washingtonpost 2023-11-08 00:14:10 -05:00
joncrangle
84617b32e3 add conde nytimes torstar usatoday washingtonpost 2023-11-07 22:44:28 -05:00
joncrangle
501dfb106a feat: enable multiple domains to apply same rule 2023-11-07 21:15:24 -05:00
12 changed files with 284 additions and 95 deletions

View File

@@ -37,4 +37,4 @@ jobs:
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GORELEASER_GITHUB_TOKEN: ${{ secrets.GORELEASER_GITHUB_TOKEN }}
# GORELEASER_GITHUB_TOKEN: ${{ secrets.GORELEASER_GITHUB_TOKEN }}

View File

@@ -33,10 +33,10 @@ changelog:
#brews:
# -
# repository:
# owner: kubero-dev
# owner: everywall
# name: homebrew-ladder
# token: "{{ .Env.GORELEASER_GITHUB_TOKEN }}"
# homepage: "https://www.kubero.dev"
# description: "Manage your kubero applications with the CLI"
# homepage: "https://www.everyladder.dev"
# description: "Manage your everyladder applications modify every website"
# test: |
# system "#{bin}/kubero", "--version"
# system "#{bin}/everyladder", "--version"

View File

@@ -3,7 +3,7 @@
</p>
<h1 align="center">Ladder</h1>
<div><img alt="License" src="https://img.shields.io/github/license/kubero-dev/ladder"> <img alt="go.mod Go version " src="https://img.shields.io/github/go-mod/go-version/kubero-dev/ladder"> <img alt="GitHub tag (with filter)" src="https://img.shields.io/github/v/tag/kubero-dev/ladder"> <img alt="GitHub (Pre-)Release Date" src="https://img.shields.io/github/release-date-pre/kubero-dev/ladder"> <img alt="GitHub Downloads all releases" src="https://img.shields.io/github/downloads/kubero-dev/ladder/total"> <img alt="GitHub Build Status (with event)" src="https://img.shields.io/github/actions/workflow/status/kubero-dev/ladder/release-binaries.yaml"></div>
<div><img alt="License" src="https://img.shields.io/github/license/everywall/ladder"> <img alt="go.mod Go version " src="https://img.shields.io/github/go-mod/go-version/everywall/ladder"> <img alt="GitHub tag (with filter)" src="https://img.shields.io/github/v/tag/everywall/ladder"> <img alt="GitHub (Pre-)Release Date" src="https://img.shields.io/github/release-date-pre/everywall/ladder"> <img alt="GitHub Downloads all releases" src="https://img.shields.io/github/downloads/everywall/ladder/total"> <img alt="GitHub Build Status (with event)" src="https://img.shields.io/github/actions/workflow/status/everywall/ladder/release-binaries.yaml"></div>
*Ladder is a web proxy to help bypass paywalls.* This is a selfhosted version of [1ft.io](https://1ft.io) and [12ft.io](https://12ft.io). It is inspired by [13ft](https://github.com/wasi-master/13ft).
@@ -23,7 +23,7 @@ Freedom of information is an essential pillar of democracy and informed decision
- [x] Fetch RAW HTML
- [x] Custom User Agent
- [x] Custom X-Forwarded-For IP
- [x] [Docker container](https://github.com/kubero-dev/ladder/pkgs/container/ladder) (amd64, arm64)
- [x] [Docker container](https://github.com/everywall/ladder/pkgs/container/ladder) (amd64, arm64)
- [x] Linux binary
- [x] Mac OS binary
- [x] Windows binary (untested)
@@ -47,18 +47,18 @@ Some sites do not expose their content to search engines, which means that the p
> **Warning:** If your instance will be publicly accessible, make sure to enable Basic Auth. This will prevent unauthorized users from using your proxy. If you do not enable Basic Auth, anyone can use your proxy to browse nasty/illegal stuff. And you will be responsible for it.
### Binary
1) Download binary [here](https://github.com/kubero-dev/ladder/releases/latest)
1) Download binary [here](https://github.com/everywall/ladder/releases/latest)
2) Unpack and run the binary `./ladder`
3) Open Browser (Default: http://localhost:8080)
### Docker
```bash
docker run -p 8080:8080 -d --name ladder ghcr.io/kubero-dev/ladder:latest
docker run -p 8080:8080 -d --name ladder ghcr.io/everywall/ladder:latest
```
### Docker Compose
```bash
curl https://raw.githubusercontent.com/kubero-dev/ladder/main/docker-compose.yaml --output docker-compose.yaml
curl https://raw.githubusercontent.com/everywall/ladder/main/docker-compose.yaml --output docker-compose.yaml
docker-compose up -d
```
@@ -106,7 +106,7 @@ http://localhost:8080/ruleset
| `LOG_URLS` | Log fetched URL's | `true` |
| `DISABLE_FORM` | Disables URL Form Frontpage | `false` |
| `FORM_PATH` | Path to custom Form HTML | `` |
| `RULESET` | URL to a ruleset file | `https://raw.githubusercontent.com/kubero-dev/ladder/main/ruleset.yaml` or `/path/to/my/rules.yaml` |
| `RULESET` | URL to a ruleset file | `https://raw.githubusercontent.com/everywall/ladder/main/ruleset.yaml` or `/path/to/my/rules.yaml` |
| `EXPOSE_RULESET` | Make your Ruleset available to other ladders | `true` |
| `ALLOWED_DOMAINS` | Comma separated list of allowed domains. Empty = no limitations | `` |
| `ALLOWED_DOMAINS_RULESET` | Allow Domains from Ruleset. false = no limitations | `false` |
@@ -120,13 +120,22 @@ It is possible to apply custom rules to modify the response. This can be used to
See in [ruleset.yaml](ruleset.yaml) for an example.
```yaml
- domain: www.example.com
- domain: example.com # Inbcludes all subdomains
domains: # Additional domains to apply the rule
- www.example.de
- www.beispiel.de
headers:
x-forwarded-for: none # override X-Forwarded-For header or delete with none
referer: none # override Referer header or delete with none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
content-security-policy: script-src 'self'; # override response header
cookie: privacy=1
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
injections:
- position: head # Position where to inject the code
append: |
append: | # possible keys: append, prepend, replace
<script>
window.localStorage.clear();
console.log("test");
@@ -135,7 +144,7 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
- domain: www.anotherdomain.com # Domain where the rule applies
paths: # Paths where the rule applies
- /article
googleCache: false # Search also in Google Cache
googleCache: false # Use Google Cache to fetch the content
regexRules: # Regex rules to apply
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"

View File

@@ -22,9 +22,13 @@ func main() {
parser := argparse.NewParser("ladder", "Every Wall needs a Ladder")
portEnv := os.Getenv("PORT")
if os.Getenv("PORT") == "" {
portEnv = "8080"
}
port := parser.String("p", "port", &argparse.Options{
Required: false,
Default: os.Getenv("PORT"),
Default: portEnv,
Help: "Port the webserver will listen on"})
prefork := parser.Flag("P", "prefork", &argparse.Options{
@@ -43,6 +47,7 @@ func main() {
app := fiber.New(
fiber.Config{
Prefork: *prefork,
GETOnly: true,
},
)

View File

@@ -1,15 +1,19 @@
version: '3'
services:
ladder:
image: ghcr.io/kubero-dev/ladder:latest
image: ghcr.io/everywall/ladder:latest
container_name: ladder
build: .
#build: .
#restart: always
#command: sh -c ./ladder
environment:
- PORT=8080
- RULESET=/app/ruleset.yaml
#- PREFORK=true
#- ALLOWED_DOMAINS_RULESET=false
#- EXPOSE_RULESET=true
#- PREFORK=false
#- DISABLE_FORM=fase
#- FORM_PATH=/app/form.html
#- X_FORWARDED_FOR=66.249.66.1
#- USER_AGENT=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
#- USERPASS=foo:bar
@@ -19,11 +23,4 @@ services:
- "8080:8080"
volumes:
- ./ruleset.yaml:/app/ruleset.yaml
deploy:
resources:
limits:
cpus: "0.50"
memory: 512M
reservations:
cpus: "0.25"
memory: 128M
- ./handlers/form.html:/app/form.html

View File

@@ -36,7 +36,7 @@
</style>
</head>
<body>
<a href="https://github.com/kubero-dev/ladder">
<a href="https://github.com/everywall/ladder">
<div class="github-corner" aria-label="View source on GitHub">
<svg
xmlns:svg="http://www.w3.org/2000/svg"

View File

@@ -33,6 +33,8 @@ func ProxySite(c *fiber.Ctx) error {
}
c.Set("Content-Type", resp.Header.Get("Content-Type"))
c.Set("Content-Security-Policy", resp.Header.Get("Content-Security-Policy"))
return c.SendString(body)
}
@@ -56,17 +58,49 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
return "", nil, nil, fmt.Errorf("domain not allowed. %s not in %s", u.Host, allowedDomains)
}
if os.Getenv("LOG_URLS ") == "true" {
if os.Getenv("LOG_URLS") == "true" {
log.Println(u.String() + urlQuery)
}
rule := fetchRule(u.Host, u.Path)
if rule.GoogleCache {
u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
if err != nil {
return "", nil, nil, err
}
}
// Fetch the site
client := &http.Client{}
req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
if rule.Headers.UserAgent != "" {
req.Header.Set("User-Agent", rule.Headers.UserAgent)
} else {
req.Header.Set("User-Agent", UserAgent)
}
if rule.Headers.XForwardedFor != "" {
if rule.Headers.XForwardedFor != "none" {
req.Header.Set("X-Forwarded-For", rule.Headers.XForwardedFor)
}
} else {
req.Header.Set("X-Forwarded-For", ForwardedFor)
}
if rule.Headers.Referer != "" {
if rule.Headers.Referer != "none" {
req.Header.Set("Referer", rule.Headers.Referer)
}
} else {
req.Header.Set("Referer", u.String())
req.Header.Set("Host", u.Host)
}
if rule.Headers.Cookie != "" {
req.Header.Set("Cookie", rule.Headers.Cookie)
}
resp, err := client.Do(req)
if err != nil {
@@ -79,11 +113,16 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
return "", nil, nil, err
}
body := rewriteHtml(bodyB, u)
if rule.Headers.CSP != "" {
resp.Header.Set("Content-Security-Policy", rule.Headers.CSP)
}
log.Print("rule", rule)
body := rewriteHtml(bodyB, u, rule)
return body, req, resp, nil
}
func rewriteHtml(bodyB []byte, u *url.URL) string {
func rewriteHtml(bodyB []byte, u *url.URL, rule Rule) string {
// Rewrite the HTML
body := string(bodyB)
@@ -104,7 +143,7 @@ func rewriteHtml(bodyB []byte, u *url.URL) string {
body = strings.ReplaceAll(body, "href=\"https://"+u.Host, "href=\"/https://"+u.Host+"/")
if os.Getenv("RULESET") != "" {
body = applyRules(u.Host, u.Path, body)
body = applyRules(body, rule)
}
return body
}
@@ -155,29 +194,48 @@ func loadRules() RuleSet {
yaml.Unmarshal(yamlFile, &ruleSet)
}
domains := []string{}
for _, rule := range ruleSet {
//log.Println("Loaded rules for", rule.Domain)
domains = append(domains, rule.Domain)
domains = append(domains, rule.Domains...)
if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" {
allowedDomains = append(allowedDomains, rule.Domain)
allowedDomains = append(allowedDomains, domains...)
}
}
log.Println("Loaded rules for", len(ruleSet), "Domains")
log.Println("Loaded ", len(ruleSet), " rules for", len(domains), "Domains")
return ruleSet
}
func applyRules(domain string, path string, body string) string {
func fetchRule(domain string, path string) Rule {
if len(rulesSet) == 0 {
return Rule{}
}
rule := Rule{}
for _, rule := range rulesSet {
domains := rule.Domains
if rule.Domain != "" {
domains = append(domains, rule.Domain)
}
for _, ruleDomain := range domains {
if ruleDomain == domain || strings.HasSuffix(domain, ruleDomain) {
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
// return first match
return rule
}
}
}
return rule
}
func applyRules(body string, rule Rule) string {
if len(rulesSet) == 0 {
return body
}
for _, rule := range rulesSet {
if rule.Domain != domain {
continue
}
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
@@ -201,29 +259,10 @@ func applyRules(domain string, path string, body string) string {
log.Fatal(err)
}
}
}
return body
}
type Rule struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
type RuleSet []struct {
Domain string `yaml:"domain"`
Paths []string `yaml:"paths,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Rule `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
}
func StringInSlice(s string, list []string) bool {
for _, x := range list {
if strings.HasPrefix(s, x) {

View File

@@ -51,7 +51,7 @@ func TestRewriteHtml(t *testing.T) {
</html>
`
actual := rewriteHtml(bodyB, u)
actual := rewriteHtml(bodyB, u, Rule{})
assert.Equal(t, expected, actual)
}

29
handlers/types.go Normal file
View File

@@ -0,0 +1,29 @@
package handlers
type Regex struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
type RuleSet []Rule
type Rule struct {
Domain string `yaml:"domain,omitempty"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
Headers struct {
UserAgent string `yaml:"user-agent,omitempty"`
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
Referer string `yaml:"referer,omitempty"`
Cookie string `yaml:"cookie,omitempty"`
CSP string `yaml:"content-security-policy,omitempty"`
} `yaml:"headers,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Regex `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
}

View File

@@ -1,6 +1,6 @@
apiVersion: v2
name: ladder
description: A helm chart to deploy kubero-dev/ladder
description: A helm chart to deploy everywall/ladder
type: application
version: "1.0"
appVersion: "v0.0.11"

View File

@@ -1,5 +1,5 @@
image:
RELEASE: ghcr.io/kubero-dev/ladder:v0.0.11
RELEASE: ghcr.io/everywall/ladder:latest
env:
PORT: 8080
@@ -10,7 +10,7 @@ env:
LOG_URLS: "true"
DISABLE_FORM: "false"
FORM_PATH: ""
RULESET: "https://raw.githubusercontent.com/kubero-dev/ladder/main/ruleset.yaml"
RULESET: "https://raw.githubusercontent.com/everywall/ladder/main/ruleset.yaml"
EXPOSE_RULESET: "true"
ALLOWED_DOMAINS: ""
ALLOWED_DOMAINS_RULESET: "false"

View File

@@ -1,4 +1,12 @@
- domain: www.example.com
- domain: example.com
domains:
- www.beispiel.de
googleCache: true
headers:
x-forwarded-for: none
referer: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
cookie: privacy=1
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
@@ -53,3 +61,105 @@
removeDOMElement(paywall)
});
</script>
- domains:
- www.architecturaldigest.com
- www.bonappetit.com
- www.cntraveler.com
- www.epicurious.com
- www.gq.com
- www.newyorker.com
- www.vanityfair.com
- www.vogue.com
- www.wired.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('.paywall-bar, div[class^="MessageBannerWrapper-"');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.nytimes.com
- www.time.com
headers:
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
referer: https://www.google.com/
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.thestar.com
- www.niagarafallsreview.ca
- www.stcatharinesstandard.ca
- www.thepeterboroughexaminer.com
- www.therecord.com
- www.thespec.com
- www.wellandtribune.ca
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll('div.subscriber-offers');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll('div.subscriber-only');
for (const elem of subscriber_only) {
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
const parser = new DOMParser();
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
const content_new = doc.querySelector('div');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute('style');
elem.removeAttribute('class');
}
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
recommendations.forEach(el => { el.remove(); });
});
</script>
- domain: www.usatoday.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domain: www.washingtonpost.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
let paywall = document.querySelectorAll('div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]');
paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll('img');
images.forEach(image => { image.parentElement.style.filter = ''; });
const headimage = document.querySelectorAll('div .aspect-custom');
headimage.forEach(image => { image.style.filter = ''; });
});
</script>
- domain: medium.com
headers:
referer: https://t.co/x?amp=1
x-forwarded-for: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
content-security-policy: script-src 'self';
cookie: