diff --git a/proxychain/responsemodifers/generate_readable_outline.go b/proxychain/responsemodifers/generate_readable_outline.go index 67dae53..dc621fa 100644 --- a/proxychain/responsemodifers/generate_readable_outline.go +++ b/proxychain/responsemodifers/generate_readable_outline.go @@ -1,14 +1,17 @@ package responsemodifers import ( + "bytes" "embed" "fmt" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" "html/template" "io" "ladder/proxychain" "log" - "github.com/go-shiori/dom" + //"github.com/go-shiori/dom" "github.com/markusmobius/go-trafilatura" ) @@ -32,32 +35,42 @@ func GenerateReadableOutline() proxychain.ResponseModification { // 1. extract dom contents using reading mode algo // =========================================================== opts := trafilatura.Options{ - IncludeImages: true, - IncludeLinks: true, - //FavorPrecision: true, + IncludeImages: true, + IncludeLinks: true, + FavorRecall: true, + Deduplicate: true, FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" OriginalURL: chain.Request.URL, } - result, err := trafilatura.Extract(chain.Response.Body, opts) + extract, err := trafilatura.Extract(chain.Response.Body, opts) if err != nil { return err } - doc := trafilatura.CreateReadableDocument(result) - distilledHTML := dom.OuterHTML(doc) - // ============================================================================ // 2. render generate_readable_outline.html template using metadata from step 1 // ============================================================================ + + // render DOM to string without H1 title + removeFirstH1(extract.ContentNode) + var b bytes.Buffer + html.Render(&b, extract.ContentNode) + distilledHTML := b.String() + + // populate template parameters data := map[string]interface{}{ - "Success": true, - "Params": chain.Request.URL, - //"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have