Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b07d49f230 | ||
|
|
af10efb7f2 | ||
|
|
6bff28e18d | ||
|
|
cdd429e4be | ||
|
|
11ee581fd4 | ||
|
|
4e44a24261 | ||
|
|
0ddb029aae | ||
|
|
a262afe035 | ||
|
|
fdca9d39d9 | ||
|
|
30a6ab501d | ||
|
|
7a51243ff4 | ||
|
|
c8b94dc702 | ||
|
|
fbc9567820 | ||
|
|
4d5c25c148 | ||
|
|
082868af2d | ||
|
|
a4abce78fb | ||
|
|
190de6d9c5 | ||
|
|
bdd19dcbb6 | ||
|
|
02e6b1c090 |
22
README.md
22
README.md
@@ -17,7 +17,7 @@ Freedom of information is an essential pillar of democracy and informed decision
|
||||
### Features
|
||||
- [x] Bypass Paywalls
|
||||
- [x] Remove CORS headers from responses, assets, and images ...
|
||||
- [x] Apply domain based ruleset/code to modify response
|
||||
- [x] Apply domain based ruleset/code to modify response / requested URL
|
||||
- [x] Keep site browsable
|
||||
- [x] API
|
||||
- [x] Fetch RAW HTML
|
||||
@@ -115,12 +115,12 @@ http://localhost:8080/ruleset
|
||||
|
||||
### Ruleset
|
||||
|
||||
It is possible to apply custom rules to modify the response. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file that contains a list of rules for each domain and is loaded on startup
|
||||
It is possible to apply custom rules to modify the response or the requested URL. This can be used to remove unwanted or modify elements from the page. The ruleset is a YAML file that contains a list of rules for each domain and is loaded on startup
|
||||
|
||||
See in [ruleset.yaml](ruleset.yaml) for an example.
|
||||
|
||||
```yaml
|
||||
- domain: example.com # Inbcludes all subdomains
|
||||
- domain: example.com # Includes all subdomains
|
||||
domains: # Additional domains to apply the rule
|
||||
- www.example.de
|
||||
- www.beispiel.de
|
||||
@@ -154,5 +154,19 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
|
||||
<h1>My Custom Title</h1>
|
||||
- position: .left-content article # Position where to inject the code into DOM
|
||||
prepend: |
|
||||
<h2>Suptitle</h2>
|
||||
<h2>Subtitle</h2>
|
||||
- domain: demo.com
|
||||
headers:
|
||||
content-security-policy: script-src 'self';
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
urlMods: # Modify the URL
|
||||
query:
|
||||
- key: amp # (this will append ?amp=1 to the URL)
|
||||
value: 1
|
||||
domain:
|
||||
- match: www # regex to match part of domain
|
||||
replace: amp # (this would modify the domain from www.demo.de to amp.demo.de)
|
||||
path:
|
||||
- match: ^ # regex to match part of path
|
||||
replace: /amp/ # (modify the url from https://www.demo.com/article/ to https://www.demo.de/amp/article/)
|
||||
```
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>ladder</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
</head>
|
||||
@@ -16,12 +17,15 @@
|
||||
<header>
|
||||
<h1 class="text-center text-3xl sm:text-4xl font-extrabold text-slate-900 tracking-tight dark:text-slate-200">ladddddddder</h1>
|
||||
</header>
|
||||
<form id="inputForm" method="get" class="mx-4">
|
||||
<form id="inputForm" method="get" class="mx-4 relative">
|
||||
<div>
|
||||
<input type="text" id="inputField" placeholder="Proxy Search" name="inputField" class="w-full text-sm leading-6 text-slate-400 rounded-md ring-1 ring-slate-900/10 shadow-sm py-1.5 pl-2 pr-3 hover:ring-slate-300 dark:bg-slate-800 dark:highlight-white/5 dark:hover:bg-slate-700" required autofocus>
|
||||
<button id="clearButton" type="button" aria-label="Clear Search" title="Clear Search" class="hidden absolute inset-y-0 right-0 flex items-center pr-2 hover:text-slate-400 hover:dark:text-slate-300" tabindex="-1">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round""><path d="M18 6 6 18"/><path d="m6 6 12 12"/></svg>
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
<footer class="mt-10 text-center text-slate-600 dark:text-slate-400">
|
||||
<footer class="mt-10 text-center text-slate-600 dark:text-slate-400 mx-4">
|
||||
<p>
|
||||
Code Licensed Under GPL v3.0 |
|
||||
<a href="https://github.com/everywall/ladder" class="hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300">View Source</a> |
|
||||
@@ -31,9 +35,6 @@
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
M.AutoInit();
|
||||
});
|
||||
document.getElementById('inputForm').addEventListener('submit', function (e) {
|
||||
e.preventDefault();
|
||||
let url = document.getElementById('inputField').value;
|
||||
@@ -43,6 +44,19 @@
|
||||
window.location.href = '/' + url;
|
||||
return false;
|
||||
});
|
||||
document.getElementById('inputField').addEventListener('input', function() {
|
||||
const clearButton = document.getElementById('clearButton');
|
||||
if (this.value.trim().length > 0) {
|
||||
clearButton.style.display = 'block';
|
||||
} else {
|
||||
clearButton.style.display = 'none';
|
||||
}
|
||||
});
|
||||
document.getElementById('clearButton').addEventListener('click', function() {
|
||||
document.getElementById('inputField').value = '';
|
||||
this.style.display = 'none';
|
||||
document.getElementById('inputField').focus();
|
||||
});
|
||||
</script>
|
||||
|
||||
<style>
|
||||
|
||||
@@ -22,9 +22,65 @@ var (
|
||||
allowedDomains = strings.Split(os.Getenv("ALLOWED_DOMAINS"), ",")
|
||||
)
|
||||
|
||||
// extracts a URL from the request ctx. If the URL in the request
|
||||
// is a relative path, it reconstructs the full URL using the referer header.
|
||||
func extractUrl(c *fiber.Ctx) (string, error) {
|
||||
// try to extract url-encoded
|
||||
reqUrl, err := url.QueryUnescape(c.Params("*"))
|
||||
if err != nil {
|
||||
// fallback
|
||||
reqUrl = c.Params("*")
|
||||
}
|
||||
|
||||
// Extract the actual path from req ctx
|
||||
urlQuery, err := url.Parse(reqUrl)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error parsing request URL '%s': %v", reqUrl, err)
|
||||
}
|
||||
|
||||
isRelativePath := urlQuery.Scheme == ""
|
||||
|
||||
// eg: https://localhost:8080/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
|
||||
if isRelativePath {
|
||||
// Parse the referer URL from the request header.
|
||||
refererUrl, err := url.Parse(c.Get("referer"))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error parsing referer URL from req: '%s': %v", reqUrl, err)
|
||||
}
|
||||
|
||||
// Extract the real url from referer path
|
||||
realUrl, err := url.Parse(strings.TrimPrefix(refererUrl.Path, "/"))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error parsing real URL from referer '%s': %v", refererUrl.Path, err)
|
||||
}
|
||||
|
||||
// reconstruct the full URL using the referer's scheme, host, and the relative path / queries
|
||||
fullUrl := &url.URL{
|
||||
Scheme: realUrl.Scheme,
|
||||
Host: realUrl.Host,
|
||||
Path: urlQuery.Path,
|
||||
RawQuery: urlQuery.RawQuery,
|
||||
}
|
||||
|
||||
if os.Getenv("LOG_URLS") == "true" {
|
||||
log.Printf("modified relative URL: '%s' -> '%s'", reqUrl, fullUrl.String())
|
||||
}
|
||||
return fullUrl.String(), nil
|
||||
|
||||
}
|
||||
|
||||
// default behavior:
|
||||
// eg: https://localhost:8080/https://realsite.com/images/foobar.jpg -> https://realsite.com/images/foobar.jpg
|
||||
return urlQuery.String(), nil
|
||||
|
||||
}
|
||||
|
||||
func ProxySite(c *fiber.Ctx) error {
|
||||
// Get the url from the URL
|
||||
url := c.Params("*")
|
||||
url, err := extractUrl(c)
|
||||
if err != nil {
|
||||
log.Println("ERROR In URL extraction:", err)
|
||||
}
|
||||
|
||||
queries := c.Queries()
|
||||
body, _, resp, err := fetchSite(url, queries)
|
||||
@@ -40,6 +96,42 @@ func ProxySite(c *fiber.Ctx) error {
|
||||
return c.SendString(body)
|
||||
}
|
||||
|
||||
func modifyURL(uri string, rule Rule) (string, error) {
|
||||
newUrl, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, urlMod := range rule.UrlMods.Domain {
|
||||
re := regexp.MustCompile(urlMod.Match)
|
||||
newUrl.Host = re.ReplaceAllString(newUrl.Host, urlMod.Replace)
|
||||
}
|
||||
|
||||
for _, urlMod := range rule.UrlMods.Path {
|
||||
re := regexp.MustCompile(urlMod.Match)
|
||||
newUrl.Path = re.ReplaceAllString(newUrl.Path, urlMod.Replace)
|
||||
}
|
||||
|
||||
v := newUrl.Query()
|
||||
for _, query := range rule.UrlMods.Query {
|
||||
if query.Value == "" {
|
||||
v.Del(query.Key)
|
||||
continue
|
||||
}
|
||||
v.Set(query.Key, query.Value)
|
||||
}
|
||||
newUrl.RawQuery = v.Encode()
|
||||
|
||||
if rule.GoogleCache {
|
||||
newUrl, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + newUrl.String())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return newUrl.String(), nil
|
||||
}
|
||||
|
||||
func fetchSite(urlpath string, queries map[string]string) (string, *http.Request, *http.Response, error) {
|
||||
urlQuery := "?"
|
||||
if len(queries) > 0 {
|
||||
@@ -63,18 +155,16 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
|
||||
log.Println(u.String() + urlQuery)
|
||||
}
|
||||
|
||||
// Modify the URI according to ruleset
|
||||
rule := fetchRule(u.Host, u.Path)
|
||||
|
||||
if rule.GoogleCache {
|
||||
u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
|
||||
url, err := modifyURL(u.String()+urlQuery, rule)
|
||||
if err != nil {
|
||||
return "", nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch the site
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
|
||||
req, _ := http.NewRequest("GET", url, nil)
|
||||
|
||||
if rule.Headers.UserAgent != "" {
|
||||
req.Header.Set("User-Agent", rule.Headers.UserAgent)
|
||||
@@ -114,6 +204,7 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
|
||||
}
|
||||
|
||||
if rule.Headers.CSP != "" {
|
||||
log.Println(rule.Headers.CSP)
|
||||
resp.Header.Set("Content-Security-Policy", rule.Headers.CSP)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ type Regex struct {
|
||||
Match string `yaml:"match"`
|
||||
Replace string `yaml:"replace"`
|
||||
}
|
||||
type KV struct {
|
||||
Key string `yaml:"key"`
|
||||
Value string `yaml:"value"`
|
||||
}
|
||||
|
||||
type RuleSet []Rule
|
||||
|
||||
@@ -20,6 +24,13 @@ type Rule struct {
|
||||
} `yaml:"headers,omitempty"`
|
||||
GoogleCache bool `yaml:"googleCache,omitempty"`
|
||||
RegexRules []Regex `yaml:"regexRules"`
|
||||
|
||||
UrlMods struct {
|
||||
Domain []Regex `yaml:"domain"`
|
||||
Path []Regex `yaml:"path"`
|
||||
Query []KV `yaml:"query"`
|
||||
} `yaml:"urlMods"`
|
||||
|
||||
Injections []struct {
|
||||
Position string `yaml:"position"`
|
||||
Append string `yaml:"append"`
|
||||
|
||||
29
ruleset.yaml
29
ruleset.yaml
@@ -163,3 +163,32 @@
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
content-security-policy: script-src 'self';
|
||||
cookie:
|
||||
- domain: tagesspiegel.de
|
||||
headers:
|
||||
content-security-policy: script-src 'self';
|
||||
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
|
||||
urlMods:
|
||||
query:
|
||||
- key: amp
|
||||
value: 1
|
||||
- domain: www.ft.com
|
||||
headers:
|
||||
referer: https://t.co/x?amp=1
|
||||
injections:
|
||||
- position: head
|
||||
append: |
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const styleTags = document.querySelectorAll('link[rel="stylesheet"]');
|
||||
styleTags.forEach(el => {
|
||||
const href = el.getAttribute('href').substring(1);
|
||||
const updatedHref = href.replace(/(https?:\/\/.+?)\/{2,}/, '$1/');
|
||||
el.setAttribute('href', updatedHref);
|
||||
});
|
||||
setTimeout(() => {
|
||||
const cookie = document.querySelectorAll('.o-cookie-message, .js-article-ribbon, .o-ads, .o-banner, .o-message, .article__content-sign-up');
|
||||
cookie.forEach(el => { el.remove(); });
|
||||
}, 1000);
|
||||
})
|
||||
</script>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user