diff --git a/proxychain/proxychain.go b/proxychain/proxychain.go index 1b2053b..f4f36da 100644 --- a/proxychain/proxychain.go +++ b/proxychain/proxychain.go @@ -103,7 +103,7 @@ type ProxyChain struct { Ruleset *ruleset.RuleSet debugMode bool abortErr error - _apiPrefix string + APIPrefix string } // a ProxyStrategy is a pre-built proxychain with purpose-built defaults @@ -172,9 +172,8 @@ func (chain *ProxyChain) AddResponseModifications(mods ...ResponseModification) // WithAPIPath trims the path during URL extraction. // example: using path = "api/outline/", a path like "http://localhost:8080/api/outline/https://example.com" becomes "https://example.com" func (chain *ProxyChain) WithAPIPath(path string) *ProxyChain { - fmt.Println("===================") - fmt.Printf("set path %s\n", path) - chain._apiPrefix = path + chain.APIPrefix = path + chain.APIPrefix = strings.TrimSuffix(chain.APIPrefix, "*") return chain } @@ -268,9 +267,9 @@ func (chain *ProxyChain) extractURL() (*url.URL, error) { fmt.Println("XXXXXXXXXXXXXXXX") fmt.Println(reqURL) - fmt.Println(chain._apiPrefix) + fmt.Println(chain.APIPrefix) - reqURL = strings.TrimPrefix(reqURL, chain._apiPrefix) + reqURL = strings.TrimPrefix(reqURL, chain.APIPrefix) // sometimes client requests doubleroot '//' // there is a bug somewhere else, but this is a workaround until we find it @@ -507,43 +506,12 @@ func (chain *ProxyChain) Execute() error { } // in case api user did not set or forward content-type, we do it for them - /* - if chain.Context.Get("content-type") == "" { - chain.Context.Set("content-type", chain.Response.Header.Get("content-type")) - } - */ + if chain.Context.Get("content-type") == "" { + chain.Context.Set("content-type", chain.Response.Header.Get("content-type")) + } // Return request back to client return chain.Context.SendStream(body) // return chain.Context.SendStream(body) } - -func (chain *ProxyChain) ExecuteForOutline() (string, error) { - defer chain._reset() - body, err := chain._execute() - if err != nil { - log.Println(err) - return "", err - } - if chain.Context == nil { - return "", errors.New("no context set") - } - - // in case api user did not set or forward content-type, we do it for them - /* - if chain.Context.Get("content-type") == "" { - chain.Context.Set("content-type", chain.Response.Header.Get("content-type")) - } - */ - - // Capture the HTML content in a variable - htmlContent, err := io.ReadAll(body) - if err != nil { - log.Println(err) - return "", err - } - - // Return the HTML content to the client - return string(htmlContent), nil -} diff --git a/proxychain/responsemodifers/generate_readable_outline.go b/proxychain/responsemodifers/generate_readable_outline.go index dc621fa..a73e963 100644 --- a/proxychain/responsemodifers/generate_readable_outline.go +++ b/proxychain/responsemodifers/generate_readable_outline.go @@ -4,12 +4,15 @@ import ( "bytes" "embed" "fmt" - "golang.org/x/net/html" - "golang.org/x/net/html/atom" "html/template" "io" "ladder/proxychain" "log" + "net/url" + "strings" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" //"github.com/go-shiori/dom" "github.com/markusmobius/go-trafilatura" @@ -35,7 +38,7 @@ func GenerateReadableOutline() proxychain.ResponseModification { // 1. extract dom contents using reading mode algo // =========================================================== opts := trafilatura.Options{ - IncludeImages: true, + IncludeImages: false, IncludeLinks: true, FavorRecall: true, Deduplicate: true, @@ -55,6 +58,8 @@ func GenerateReadableOutline() proxychain.ResponseModification { // render DOM to string without H1 title removeFirstH1(extract.ContentNode) + // rewrite all links to stay on /outline/ path + rewriteHrefLinks(extract.ContentNode, chain.Context.BaseURL(), chain.APIPrefix) var b bytes.Buffer html.Render(&b, extract.ContentNode) distilledHTML := b.String() @@ -62,11 +67,10 @@ func GenerateReadableOutline() proxychain.ResponseModification { // populate template parameters data := map[string]interface{}{ "Success": true, - "Footer": extract.Metadata.License, "Image": extract.Metadata.Image, "Description": extract.Metadata.Description, "Hostname": extract.Metadata.Hostname, - "Url": chain.Request.URL, + "Url": "/" + chain.Request.URL.String(), "Title": extract.Metadata.Title, // todo: modify CreateReadableDocument so we don't have