diff --git a/README.md b/README.md
index ca348f5..5e13b87 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Freedom of information is an essential pillar of democracy and informed decision
### Features
- [x] Bypass Paywalls
- [x] Remove CORS headers from responses, assets, and images ...
-- [x] Apply domain based ruleset/code to modify response
+- [x] Apply domain based ruleset/code to modify response / requested URL
- [x] Keep site browsable
- [x] API
- [x] Fetch RAW HTML
@@ -115,7 +115,7 @@ http://localhost:8080/ruleset
### Ruleset
-It is possible to apply custom rules to modify the response. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file that contains a list of rules for each domain and is loaded on startup
+It is possible to apply custom rules to modify the response or the requested URL. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file that contains a list of rules for each domain and is loaded on startup
See in [ruleset.yaml](ruleset.yaml) for an example.
@@ -155,4 +155,18 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
- position: .left-content article # Position where to inject the code into DOM
prepend: |
Suptitle
+- domain: tagesspiegel.de
+ headers:
+ content-security-policy: script-src 'self';
+ user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
+ urlMods: # Modify the URL
+ query:
+ - key: amp # (this will append ?amp=1 to the URL)
+ value: 1
+ domain:
+ - match: www # regex to match part of domain
+ replace: amp # (this would modify the domain from www.tagesspiegel.de to amp.tagesspielgel.de)
+ path:
+ - match: ^ # regex to match part of path
+ replace: /amp/ # (modify the url from https://www.tagesspiegel.de/internationales/ to https://www.tagesspiegel.de/amp/internationales/)
```
diff --git a/handlers/proxy.go b/handlers/proxy.go
index 2f89037..e9c3859 100644
--- a/handlers/proxy.go
+++ b/handlers/proxy.go
@@ -40,6 +40,41 @@ func ProxySite(c *fiber.Ctx) error {
return c.SendString(body)
}
+func modifyURL(uri string, rule Rule) (string, error) {
+ newUrl, err := url.Parse(uri)
+ if err != nil {
+ return "", err
+ }
+
+ for _, urlMod := range rule.UrlMods.Domain {
+ re := regexp.MustCompile(urlMod.Match)
+ newUrl.Host = re.ReplaceAllString(newUrl.Host, urlMod.Replace)
+ }
+
+ for _, urlMod := range rule.UrlMods.Path {
+ re := regexp.MustCompile(urlMod.Match)
+ newUrl.Path = re.ReplaceAllString(newUrl.Path, urlMod.Replace)
+ }
+
+ v := newUrl.Query()
+ for _, query := range rule.UrlMods.Query {
+ if query.Value == "" {
+ v.Del(query.Key)
+ continue
+ }
+ v.Set(query.Key, query.Value)
+ }
+ newUrl.RawQuery = v.Encode()
+
+ if rule.GoogleCache {
+ newUrl, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + newUrl.String())
+ if err != nil {
+ return "", err
+ }
+ }
+ return newUrl.String(), nil
+}
+
func fetchSite(urlpath string, queries map[string]string) (string, *http.Request, *http.Response, error) {
urlQuery := "?"
if len(queries) > 0 {
@@ -63,18 +98,17 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
log.Println(u.String() + urlQuery)
}
+ // Modify the URI according to ruleset
rule := fetchRule(u.Host, u.Path)
-
- if rule.GoogleCache {
- u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
- if err != nil {
- return "", nil, nil, err
- }
+ url, err := modifyURL(u.String()+urlQuery, rule)
+ if err != nil {
+ return "", nil, nil, err
}
+ log.Println("fetch URI: %", url)
// Fetch the site
client := &http.Client{}
- req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
+ req, _ := http.NewRequest("GET", url, nil)
if rule.Headers.UserAgent != "" {
req.Header.Set("User-Agent", rule.Headers.UserAgent)
diff --git a/handlers/types.go b/handlers/types.go
index 16e4c49..6c02086 100644
--- a/handlers/types.go
+++ b/handlers/types.go
@@ -4,6 +4,10 @@ type Regex struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
+type KV struct {
+ Key string `yaml:"key"`
+ Value string `yaml:"value"`
+}
type RuleSet []Rule
@@ -20,7 +24,14 @@ type Rule struct {
} `yaml:"headers,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Regex `yaml:"regexRules"`
- Injections []struct {
+
+ UrlMods struct {
+ Domain []Regex `yaml:"domain"`
+ Path []Regex `yaml:"path"`
+ Query []KV `yaml:"query"`
+ } `yaml:"urlMods"`
+
+ Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
diff --git a/ruleset.yaml b/ruleset.yaml
index d1c1e9f..834b606 100644
--- a/ruleset.yaml
+++ b/ruleset.yaml
@@ -162,4 +162,12 @@
x-forwarded-for: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
content-security-policy: script-src 'self';
- cookie:
\ No newline at end of file
+ cookie:
+- domain: tagesspiegel.de
+ headers:
+ content-security-policy: script-src 'self';
+ user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
+ urlMods:
+ query:
+ - key: amp
+ value: 1