package responsemodifers import ( "embed" "fmt" "html/template" "io" "ladder/proxychain" "log" "github.com/go-shiori/dom" "github.com/markusmobius/go-trafilatura" ) //go:embed generate_readable_outline.html var templateFS embed.FS // GenerateReadableOutline creates an reader-friendly distilled representation of the article. // This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM. func GenerateReadableOutline() proxychain.ResponseModification { // get template only once, and resuse for subsequent calls f := "generate_readable_outline.html" tmpl, err := template.ParseFS(templateFS, f) if err != nil { panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f)) } return func(chain *proxychain.ProxyChain) error { // =========================================================== // 1. extract dom contents using reading mode algo // =========================================================== opts := trafilatura.Options{ IncludeImages: true, IncludeLinks: true, //FavorPrecision: true, FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" OriginalURL: chain.Request.URL, } result, err := trafilatura.Extract(chain.Response.Body, opts) if err != nil { return err } doc := trafilatura.CreateReadableDocument(result) distilledHTML := dom.OuterHTML(doc) // ============================================================================ // 2. render generate_readable_outline.html template using metadata from step 1 // ============================================================================ data := map[string]interface{}{ "Success": true, "Params": chain.Request.URL, //"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have