begin migrating old v1 rulesets to v2 rulesets

This commit is contained in:
Kevin Pham
2023-12-09 16:35:54 -06:00
parent f165a406f1
commit 82b5a74d8c
5 changed files with 111 additions and 11 deletions

View File

@@ -12,7 +12,7 @@ tmp_dir = "tmp"
exclude_regex = ["_test.go"]
exclude_unchanged = false
follow_symlink = false
full_bin = "./tmp/main --ruleset ./ruleset_v2.yaml"
full_bin = "./tmp/main --ruleset ./rulesets_v2"
include_dir = []
include_ext = ["go", "tpl", "tmpl", "yaml", "html", "js"]
include_file = []

View File

@@ -35,18 +35,15 @@ func (rs *Ruleset) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
yamlRuleset := &AuxRuleset{}
if err := unmarshal(&yamlRuleset); err != nil {
// if there is no top-level rule key, we'll try to unmarshal as if it is just a bare rule
recovered := false
yamlRule := &Rule{}
if err := unmarshal(&yamlRule); err != nil {
yamlRuleset.Rules = append(yamlRuleset.Rules, *yamlRule)
recovered = true
}
if !recovered {
// First, try to unmarshal as AuxRuleset
err := unmarshal(yamlRuleset)
if err != nil {
// If that fails, try to unmarshal directly into a slice of Rules
var directRules []Rule
if err := unmarshal(&directRules); err != nil {
return err
}
yamlRuleset.Rules = directRules
}
rs._rulemap = make(map[string]*Rule)
@@ -224,6 +221,7 @@ func (rs *Ruleset) loadRulesFromLocalDir(path string) error {
if !isYAML {
return nil
}
fmt.Printf("loadRulesFromLocalDir :: loading rule: %s\n", path)
tmpRs := Ruleset{_rulemap: make(map[string]*Rule)}
err = tmpRs.loadRulesFromLocalFile(path)
@@ -268,6 +266,7 @@ func (rs *Ruleset) loadRulesFromLocalFile(path string) error {
e := fmt.Errorf("failed to read rules from local file: '%s'", path)
return errors.Join(e, err)
}
fmt.Printf("loadRulesFromLocalFile :: %s\n", path)
isJSON := strings.HasSuffix(path, ".json")
if isJSON {

View File

@@ -0,0 +1,23 @@
- domains:
- demorgen.be
requestmodifiers:
- name: MasqueradeAsGoogleBot
- name: SpoofReferrer
params: ["https://news.google.com"]
- name: SetOutgoingCookie
params: ["isBot", "true"]
- name: SetOutgoingCookie
params: ["authId", "1"]
responsemodifiers:
- name: BypassContentSecurityPolicy
- name: InjectScriptAfterDOMContentLoaded
params:
- |
let paywall = document.querySelectorAll('script[src*="advertising-cdn.dpgmedia.cloud"], div[data-temptation-position="ARTICLE_BOTTOM"]');
paywall.forEach(el => { el.remove(); });
document.querySelector('div[data-advert-placeholder-collapses]').remove();

View File

@@ -0,0 +1,44 @@
- domains:
- myprivacy.dpgmedia.be
- myprivacy.dpgmedia.nl
requestmodifiers:
- name: SpoofReferrer
params: ["https://news.google.com"]
- name: SetOutgoingCookie
params: ["isBot", "true"]
- name: SetOutgoingCookie
params: ["authId", "1"]
- name: SpoofXForwardedFor
params: ["none"]
- domains:
- demorgen.be
requestmodifiers:
- name: MasqueradeAsGoogleBot
- name: SpoofReferrer
params: ["https://news.google.com"]
- name: SetOutgoingCookie
params: ["isBot", "true"]
- name: SetOutgoingCookie
params: ["authId", "1"]
- name: SpoofXForwardedFor
params: ["none"]
responsemodifiers:
- name: BypassContentSecurityPolicy
- name: InjectScriptAfterDOMContentLoaded
params:
- |
let paywall = document.querySelectorAll('script[src*="advertising-cdn.dpgmedia.cloud"], div[data-temptation-position="ARTICLE_BOTTOM"]');
paywall.forEach(el => { el.remove(); });
document.querySelector('div[data-advert-placeholder-collapses]').remove();

View File

@@ -0,0 +1,34 @@
rules:
- domains:
- thestar.com
- niagarafallsreview.ca
- stcatharinesstandard.ca
- thepeterboroughexaminer.com
- therecord.com
- thespec.com
- wellandtribune.ca
responsemodifications:
- name: DeleteLocalStorageData
- name: DeleteSessionStorageData
- name: InjectScriptAfterDOMContentLoaded
params:
- |
const paywall = document.querySelectorAll('div.subscriber-offers');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll('div.subscriber-only');
for (const elem of subscriber_only) {
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
const parser = new DOMParser();
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
const content_new = doc.querySelector('div');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute('style');
elem.removeAttribute('class');
}
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
recommendations.forEach(el => { el.remove(); });