8 Commits

Author SHA1 Message Date
Gianni C
ac44f12d85 Merge pull request #21 from joncrangle/csp-override
Allow the user to specify the Content Security Policy for a domain
2023-11-10 09:04:54 +01:00
joncrangle
b6f0c644f8 Undo prettier 2023-11-09 22:05:44 -05:00
joncrangle
66c4b3c911 Undo prettier 2023-11-09 22:03:37 -05:00
joncrangle
924696c015 Enable user to define their own content-security-policy 2023-11-09 21:50:46 -05:00
Gianni Carafa
81aa00c2ea include all subdomains 2023-11-09 23:51:36 +01:00
Gianni Carafa
6c1f58e2e7 Add headers field in ruleset. Enable Google Cache. 2023-11-09 23:32:43 +01:00
Gianni Carafa
d3c995df34 unblur main image 2023-11-09 12:09:09 +01:00
Gianni Carafa
6f4a2daeca fix LOG_URLS feature 2023-11-09 09:37:46 +01:00
6 changed files with 153 additions and 67 deletions

View File

@@ -120,10 +120,16 @@ It is possible to apply custom rules to modify the response. This can be used to
See in [ruleset.yaml](ruleset.yaml) for an example. See in [ruleset.yaml](ruleset.yaml) for an example.
```yaml ```yaml
- domain: www.example.com - domain: example.com # Inbcludes all subdomains
domains: # Additional domains to apply the rule domains: # Additional domains to apply the rule
- www.example.com - www.example.de
- www.beispiel.de - www.beispiel.de
headers:
x-forwarded-for: none # override X-Forwarded-For header or delete with none
referer: none # override Referer header or delete with none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
content-security-policy: script-src 'self'; # override response header
cookie: privacy=1
regexRules: regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)" - match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3" replace: <script $1 script="/https://www.example.com/$3"
@@ -138,7 +144,7 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
- domain: www.anotherdomain.com # Domain where the rule applies - domain: www.anotherdomain.com # Domain where the rule applies
paths: # Paths where the rule applies paths: # Paths where the rule applies
- /article - /article
googleCache: false # Search also in Google Cache googleCache: false # Use Google Cache to fetch the content
regexRules: # Regex rules to apply regexRules: # Regex rules to apply
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)" - match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3" replace: <script $1 script="/https://www.example.com/$3"

View File

@@ -47,6 +47,7 @@ func main() {
app := fiber.New( app := fiber.New(
fiber.Config{ fiber.Config{
Prefork: *prefork, Prefork: *prefork,
GETOnly: true,
}, },
) )

View File

@@ -33,6 +33,8 @@ func ProxySite(c *fiber.Ctx) error {
} }
c.Set("Content-Type", resp.Header.Get("Content-Type")) c.Set("Content-Type", resp.Header.Get("Content-Type"))
c.Set("Content-Security-Policy", resp.Header.Get("Content-Security-Policy"))
return c.SendString(body) return c.SendString(body)
} }
@@ -60,13 +62,45 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
log.Println(u.String() + urlQuery) log.Println(u.String() + urlQuery)
} }
rule := fetchRule(u.Host, u.Path)
if rule.GoogleCache {
u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
if err != nil {
return "", nil, nil, err
}
}
// Fetch the site // Fetch the site
client := &http.Client{} client := &http.Client{}
req, _ := http.NewRequest("GET", u.String()+urlQuery, nil) req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
if rule.Headers.UserAgent != "" {
req.Header.Set("User-Agent", rule.Headers.UserAgent)
} else {
req.Header.Set("User-Agent", UserAgent) req.Header.Set("User-Agent", UserAgent)
}
if rule.Headers.XForwardedFor != "" {
if rule.Headers.XForwardedFor != "none" {
req.Header.Set("X-Forwarded-For", rule.Headers.XForwardedFor)
}
} else {
req.Header.Set("X-Forwarded-For", ForwardedFor) req.Header.Set("X-Forwarded-For", ForwardedFor)
}
if rule.Headers.Referer != "" {
if rule.Headers.Referer != "none" {
req.Header.Set("Referer", rule.Headers.Referer)
}
} else {
req.Header.Set("Referer", u.String()) req.Header.Set("Referer", u.String())
req.Header.Set("Host", u.Host) }
if rule.Headers.Cookie != "" {
req.Header.Set("Cookie", rule.Headers.Cookie)
}
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
@@ -79,11 +113,16 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
return "", nil, nil, err return "", nil, nil, err
} }
body := rewriteHtml(bodyB, u) if rule.Headers.CSP != "" {
resp.Header.Set("Content-Security-Policy", rule.Headers.CSP)
}
log.Print("rule", rule)
body := rewriteHtml(bodyB, u, rule)
return body, req, resp, nil return body, req, resp, nil
} }
func rewriteHtml(bodyB []byte, u *url.URL) string { func rewriteHtml(bodyB []byte, u *url.URL, rule Rule) string {
// Rewrite the HTML // Rewrite the HTML
body := string(bodyB) body := string(bodyB)
@@ -104,7 +143,7 @@ func rewriteHtml(bodyB []byte, u *url.URL) string {
body = strings.ReplaceAll(body, "href=\"https://"+u.Host, "href=\"/https://"+u.Host+"/") body = strings.ReplaceAll(body, "href=\"https://"+u.Host, "href=\"/https://"+u.Host+"/")
if os.Getenv("RULESET") != "" { if os.Getenv("RULESET") != "" {
body = applyRules(u.Host, u.Path, body) body = applyRules(body, rule)
} }
return body return body
} }
@@ -169,21 +208,34 @@ func loadRules() RuleSet {
return ruleSet return ruleSet
} }
func applyRules(domain string, path string, body string) string { func fetchRule(domain string, path string) Rule {
if len(rulesSet) == 0 {
return Rule{}
}
rule := Rule{}
for _, rule := range rulesSet {
domains := rule.Domains
if rule.Domain != "" {
domains = append(domains, rule.Domain)
}
for _, ruleDomain := range domains {
if ruleDomain == domain || strings.HasSuffix(domain, ruleDomain) {
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
// return first match
return rule
}
}
}
return rule
}
func applyRules(body string, rule Rule) string {
if len(rulesSet) == 0 { if len(rulesSet) == 0 {
return body return body
} }
for _, rule := range rulesSet {
domains := rule.Domains
domains = append(domains, rule.Domain)
for _, ruleDomain := range domains {
if ruleDomain != domain {
continue
}
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
for _, regexRule := range rule.RegexRules { for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match) re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace) body = re.ReplaceAllString(body, regexRule.Replace)
@@ -207,31 +259,10 @@ func applyRules(domain string, path string, body string) string {
log.Fatal(err) log.Fatal(err)
} }
} }
}
}
return body return body
} }
type Rule struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
type RuleSet []struct {
Domain string `yaml:"domain"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Rule `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
}
func StringInSlice(s string, list []string) bool { func StringInSlice(s string, list []string) bool {
for _, x := range list { for _, x := range list {
if strings.HasPrefix(s, x) { if strings.HasPrefix(s, x) {

View File

@@ -51,7 +51,7 @@ func TestRewriteHtml(t *testing.T) {
</html> </html>
` `
actual := rewriteHtml(bodyB, u) actual := rewriteHtml(bodyB, u, Rule{})
assert.Equal(t, expected, actual) assert.Equal(t, expected, actual)
} }

29
handlers/types.go Normal file
View File

@@ -0,0 +1,29 @@
package handlers
type Regex struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
type RuleSet []Rule
type Rule struct {
Domain string `yaml:"domain,omitempty"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
Headers struct {
UserAgent string `yaml:"user-agent,omitempty"`
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
Referer string `yaml:"referer,omitempty"`
Cookie string `yaml:"cookie,omitempty"`
CSP string `yaml:"content-security-policy,omitempty"`
} `yaml:"headers,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Regex `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
}

View File

@@ -1,6 +1,12 @@
- domain: www.example.com - domain: example.com
domains: domains:
- www.beispiel.com - www.beispiel.de
googleCache: true
headers:
x-forwarded-for: none
referer: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
cookie: privacy=1
regexRules: regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)" - match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3" replace: <script $1 script="/https://www.example.com/$3"
@@ -77,6 +83,10 @@
- domains: - domains:
- www.nytimes.com - www.nytimes.com
- www.time.com - www.time.com
headers:
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
referer: https://www.google.com/
injections: injections:
- position: head - position: head
append: | append: |
@@ -142,5 +152,14 @@
paywall.forEach(el => { el.remove(); }); paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll('img'); const images = document.querySelectorAll('img');
images.forEach(image => { image.parentElement.style.filter = ''; }); images.forEach(image => { image.parentElement.style.filter = ''; });
const headimage = document.querySelectorAll('div .aspect-custom');
headimage.forEach(image => { image.style.filter = ''; });
}); });
</script> </script>
- domain: medium.com
headers:
referer: https://t.co/x?amp=1
x-forwarded-for: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
content-security-policy: script-src 'self';
cookie: