9 Commits

Author SHA1 Message Date
Gianni Carafa
f728b2c1de fix default port, set to 8080 2023-11-09 09:00:08 +01:00
Gianni C
bc346a3954 Merge pull request #18 from kubero-dev/feature/multiplle-domains
Feature/multiple domains
2023-11-09 08:41:42 +01:00
Gianni Carafa
5442da81b9 allow domain list in rules 2023-11-08 23:36:06 +01:00
Gianni C
73b13914fe Merge pull request #13 from joncrangle/main
Enable multiple domains to apply the same rule and add rules to ruleset
2023-11-08 23:10:15 +01:00
Gianni Carafa
b127f81a9b Add a comment to disable ALLOWED_DOMAINS_RULESET in docker-compose.yaml. 2023-11-08 23:08:48 +01:00
Gianni Carafa
79438a0b59 simplify docker compose 2023-11-08 23:06:45 +01:00
joncrangle
1aa917e0c1 unblur images on washingtonpost 2023-11-08 00:14:10 -05:00
joncrangle
84617b32e3 add conde nytimes torstar usatoday washingtonpost 2023-11-07 22:44:28 -05:00
joncrangle
501dfb106a feat: enable multiple domains to apply same rule 2023-11-07 21:15:24 -05:00
5 changed files with 149 additions and 46 deletions

View File

@@ -121,12 +121,15 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
```yaml ```yaml
- domain: www.example.com - domain: www.example.com
domains: # Additional domains to apply the rule
- www.example.com
- www.beispiel.de
regexRules: regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)" - match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3" replace: <script $1 script="/https://www.example.com/$3"
injections: injections:
- position: head # Position where to inject the code - position: head # Position where to inject the code
append: | append: | # possible keys: append, prepend, replace
<script> <script>
window.localStorage.clear(); window.localStorage.clear();
console.log("test"); console.log("test");

View File

@@ -22,9 +22,13 @@ func main() {
parser := argparse.NewParser("ladder", "Every Wall needs a Ladder") parser := argparse.NewParser("ladder", "Every Wall needs a Ladder")
portEnv := os.Getenv("PORT")
if os.Getenv("PORT") == "" {
portEnv = "8080"
}
port := parser.String("p", "port", &argparse.Options{ port := parser.String("p", "port", &argparse.Options{
Required: false, Required: false,
Default: os.Getenv("PORT"), Default: portEnv,
Help: "Port the webserver will listen on"}) Help: "Port the webserver will listen on"})
prefork := parser.Flag("P", "prefork", &argparse.Options{ prefork := parser.Flag("P", "prefork", &argparse.Options{

View File

@@ -3,13 +3,17 @@ services:
ladder: ladder:
image: ghcr.io/kubero-dev/ladder:latest image: ghcr.io/kubero-dev/ladder:latest
container_name: ladder container_name: ladder
build: . #build: .
#restart: always #restart: always
#command: sh -c ./ladder #command: sh -c ./ladder
environment: environment:
- PORT=8080 - PORT=8080
- RULESET=/app/ruleset.yaml - RULESET=/app/ruleset.yaml
#- PREFORK=true #- ALLOWED_DOMAINS_RULESET=false
#- EXPOSE_RULESET=true
#- PREFORK=false
#- DISABLE_FORM=fase
#- FORM_PATH=/app/form.html
#- X_FORWARDED_FOR=66.249.66.1 #- X_FORWARDED_FOR=66.249.66.1
#- USER_AGENT=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) #- USER_AGENT=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
#- USERPASS=foo:bar #- USERPASS=foo:bar
@@ -19,11 +23,4 @@ services:
- "8080:8080" - "8080:8080"
volumes: volumes:
- ./ruleset.yaml:/app/ruleset.yaml - ./ruleset.yaml:/app/ruleset.yaml
deploy: - ./handlers/form.html:/app/form.html
resources:
limits:
cpus: "0.50"
memory: 512M
reservations:
cpus: "0.25"
memory: 128M

View File

@@ -155,14 +155,17 @@ func loadRules() RuleSet {
yaml.Unmarshal(yamlFile, &ruleSet) yaml.Unmarshal(yamlFile, &ruleSet)
} }
domains := []string{}
for _, rule := range ruleSet { for _, rule := range ruleSet {
//log.Println("Loaded rules for", rule.Domain)
domains = append(domains, rule.Domain)
domains = append(domains, rule.Domains...)
if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" { if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" {
allowedDomains = append(allowedDomains, rule.Domain) allowedDomains = append(allowedDomains, domains...)
} }
} }
log.Println("Loaded rules for", len(ruleSet), "Domains") log.Println("Loaded ", len(ruleSet), " rules for", len(domains), "Domains")
return ruleSet return ruleSet
} }
@@ -172,33 +175,37 @@ func applyRules(domain string, path string, body string) string {
} }
for _, rule := range rulesSet { for _, rule := range rulesSet {
if rule.Domain != domain { domains := rule.Domains
continue domains = append(domains, rule.Domain)
} for _, ruleDomain := range domains {
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) { if ruleDomain != domain {
continue continue
}
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
}
for _, injection := range rule.Injections {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Fatal(err)
} }
if injection.Replace != "" { if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace) continue
} }
if injection.Append != "" { for _, regexRule := range rule.RegexRules {
doc.Find(injection.Position).AppendHtml(injection.Append) re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
} }
if injection.Prepend != "" { for _, injection := range rule.Injections {
doc.Find(injection.Position).PrependHtml(injection.Prepend) doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
} if err != nil {
body, err = doc.Html() log.Fatal(err)
if err != nil { }
log.Fatal(err) if injection.Replace != "" {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
}
if injection.Append != "" {
doc.Find(injection.Position).AppendHtml(injection.Append)
}
if injection.Prepend != "" {
doc.Find(injection.Position).PrependHtml(injection.Prepend)
}
body, err = doc.Html()
if err != nil {
log.Fatal(err)
}
} }
} }
} }
@@ -213,6 +220,7 @@ type Rule struct {
type RuleSet []struct { type RuleSet []struct {
Domain string `yaml:"domain"` Domain string `yaml:"domain"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"` Paths []string `yaml:"paths,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"` GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Rule `yaml:"regexRules"` RegexRules []Rule `yaml:"regexRules"`

View File

@@ -1,4 +1,6 @@
- domain: www.example.com - domain: www.example.com
domains:
- www.beispiel.com
regexRules: regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)" - match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3" replace: <script $1 script="/https://www.example.com/$3"
@@ -53,3 +55,92 @@
removeDOMElement(paywall) removeDOMElement(paywall)
}); });
</script> </script>
- domains:
- www.architecturaldigest.com
- www.bonappetit.com
- www.cntraveler.com
- www.epicurious.com
- www.gq.com
- www.newyorker.com
- www.vanityfair.com
- www.vogue.com
- www.wired.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('.paywall-bar, div[class^="MessageBannerWrapper-"');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.nytimes.com
- www.time.com
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.thestar.com
- www.niagarafallsreview.ca
- www.stcatharinesstandard.ca
- www.thepeterboroughexaminer.com
- www.therecord.com
- www.thespec.com
- www.wellandtribune.ca
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll('div.subscriber-offers');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll('div.subscriber-only');
for (const elem of subscriber_only) {
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
const parser = new DOMParser();
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
const content_new = doc.querySelector('div');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute('style');
elem.removeAttribute('class');
}
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
recommendations.forEach(el => { el.remove(); });
});
</script>
- domain: www.usatoday.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domain: www.washingtonpost.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
let paywall = document.querySelectorAll('div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]');
paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll('img');
images.forEach(image => { image.parentElement.style.filter = ''; });
});
</script>