diff --git a/cmd/main.go b/cmd/main.go index 926fd97..e4faf49 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -183,12 +183,12 @@ func main() { RulesetPath: *ruleset, } - app.Get("api/outline/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts)) - app.Get("outline/*", handlers.Outline("outline/*", proxyOpts)) + app.Get("api/content/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts)) - app.Get("/*", handlers.NewProxySiteHandler(proxyOpts)) - app.Post("/*", handlers.NewProxySiteHandler(proxyOpts)) + app.Get("outline/*", handlers.NewOutlineHandler("outline/*", proxyOpts)) - fmt.Println(cli.StartupMessage("1.0.1", *port, *ruleset)) + app.All("/*", handlers.NewProxySiteHandler(proxyOpts)) + + fmt.Println(cli.StartupMessage("1.0.0", *port, *ruleset)) log.Fatal(app.Listen(":" + *port)) } diff --git a/handlers/api_outline.go b/handlers/api_outline.go index f14fe5a..acb4095 100644 --- a/handlers/api_outline.go +++ b/handlers/api_outline.go @@ -34,7 +34,7 @@ func NewAPIOutlineHandler(path string, opts *ProxyOptions) fiber.Handler { AddResponseModifications( tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), - tx.APIOutline(), + tx.APIContent(), ). SetFiberCtx(c). Execute() diff --git a/handlers/outline.go b/handlers/outline.go index 6f66c09..ecd46e9 100644 --- a/handlers/outline.go +++ b/handlers/outline.go @@ -4,27 +4,14 @@ import ( "ladder/proxychain" rx "ladder/proxychain/requestmodifers" tx "ladder/proxychain/responsemodifers" - "log" "github.com/gofiber/fiber/v2" ) -func Outline(path string, opts *ProxyOptions) fiber.Handler { - - // TODO: implement ruleset logic - /* - var rs ruleset.RuleSet - if opts.RulesetPath != "" { - r, err := ruleset.NewRuleset(opts.RulesetPath) - if err != nil { - panic(err) - } - rs = r - } - */ - +func NewOutlineHandler(path string, opts *ProxyOptions) fiber.Handler { return func(c *fiber.Ctx) error { - result, err := proxychain. + + return proxychain. NewProxyChain(). WithAPIPath(path). SetDebugLogging(opts.Verbose). @@ -36,20 +23,10 @@ func Outline(path string, opts *ProxyOptions) fiber.Handler { AddResponseModifications( tx.DeleteIncomingCookies(), tx.RewriteHTMLResourceURLs(), - tx.APIOutline(), + tx.GenerateReadableOutline(), // <-- this response modification does the outline rendering ). SetFiberCtx(c). - ExecuteForOutline() + Execute() - if err != nil { - log.Fatal(err) - } - - return c.Render("outline", fiber.Map{ - "Success": true, - "Params": c.Params("*"), - "Title": "Outline", - "Body": result, - }) } } diff --git a/proxychain/responsemodifers/outline.go b/proxychain/responsemodifers/api_content.go similarity index 74% rename from proxychain/responsemodifers/outline.go rename to proxychain/responsemodifers/api_content.go index b239229..9e12220 100644 --- a/proxychain/responsemodifers/outline.go +++ b/proxychain/responsemodifers/api_content.go @@ -1,21 +1,17 @@ package responsemodifers import ( - "io" - "strings" - - //"github.com/go-shiori/dom" - "github.com/go-shiori/dom" + "bytes" + "encoding/json" "github.com/markusmobius/go-trafilatura" - - //"golang.org/x/net/html" - + "io" "ladder/proxychain" "ladder/proxychain/responsemodifers/api" ) -// APIOutline creates an JSON representation of the article and returns it as an API response. -func APIOutline() proxychain.ResponseModification { +// APIContent creates an JSON representation of the article and returns it as an API response. +func APIContent() proxychain.ResponseModification { + return func(chain *proxychain.ProxyChain) error { // we set content-type twice here, in case another response modifier // tries to forward over the original headers @@ -38,9 +34,14 @@ func APIOutline() proxychain.ResponseModification { return nil } - doc := trafilatura.CreateReadableDocument(result) - reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc))) - chain.Response.Body = reader + res := api.ExtractResultToAPIResponse(result) + jsonData, err := json.MarshalIndent(res, "", " ") + if err != nil { + return err + } + + chain.Response.Body = io.NopCloser(bytes.NewReader(jsonData)) return nil } + } diff --git a/proxychain/responsemodifers/outline_test.go b/proxychain/responsemodifers/api_content_test.go similarity index 100% rename from proxychain/responsemodifers/outline_test.go rename to proxychain/responsemodifers/api_content_test.go diff --git a/proxychain/responsemodifers/generate_readable_outline.go b/proxychain/responsemodifers/generate_readable_outline.go new file mode 100644 index 0000000..67dae53 --- /dev/null +++ b/proxychain/responsemodifers/generate_readable_outline.go @@ -0,0 +1,84 @@ +package responsemodifers + +import ( + "embed" + "fmt" + "html/template" + "io" + "ladder/proxychain" + "log" + + "github.com/go-shiori/dom" + "github.com/markusmobius/go-trafilatura" +) + +//go:embed generate_readable_outline.html +var templateFS embed.FS + +// GenerateReadableOutline creates an reader-friendly distilled representation of the article. +// This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM. +func GenerateReadableOutline() proxychain.ResponseModification { + + // get template only once, and resuse for subsequent calls + f := "generate_readable_outline.html" + tmpl, err := template.ParseFS(templateFS, f) + if err != nil { + panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f)) + } + + return func(chain *proxychain.ProxyChain) error { + + // =========================================================== + // 1. extract dom contents using reading mode algo + // =========================================================== + opts := trafilatura.Options{ + IncludeImages: true, + IncludeLinks: true, + //FavorPrecision: true, + FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go + // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" + OriginalURL: chain.Request.URL, + } + + result, err := trafilatura.Extract(chain.Response.Body, opts) + if err != nil { + return err + } + + doc := trafilatura.CreateReadableDocument(result) + distilledHTML := dom.OuterHTML(doc) + + // ============================================================================ + // 2. render generate_readable_outline.html template using metadata from step 1 + // ============================================================================ + data := map[string]interface{}{ + "Success": true, + "Params": chain.Request.URL, + //"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have

titles duplicated? + "Date": result.Metadata.Date.String(), + "Author": result.Metadata.Author, + "Body": distilledHTML, + } + + // ============================================================================ + // 3. queue sending the response back to the client by replacing the response body + // (the response body will be read as a stream in proxychain.Execute() later on.) + // ============================================================================ + pr, pw := io.Pipe() // pipe io.writer contents into io.reader + + // Use a goroutine for writing to the pipe so we don't deadlock the request + go func() { + defer pw.Close() + + err := tmpl.Execute(pw, data) // <- render template + + if err != nil { + log.Printf("WARN: GenerateReadableOutline template rendering error: %s\n", err) + } + }() + + chain.Context.Set("content-type", "text/html") + chain.Response.Body = pr // <- replace reponse body reader with our new reader from pipe + return nil + } +} diff --git a/handlers/outline.html b/proxychain/responsemodifers/generate_readable_outline.html similarity index 99% rename from handlers/outline.html rename to proxychain/responsemodifers/generate_readable_outline.html index e0c31d2..ff4b034 100644 --- a/handlers/outline.html +++ b/proxychain/responsemodifers/generate_readable_outline.html @@ -356,7 +356,7 @@
{{ .Params }}
-
{{ unescape .Body }}
+
{{ .Body }}
{{ end }}