From b7a012d75bdc9f77ba52e7049e13ebf434cc0671 Mon Sep 17 00:00:00 2001 From: Kevin Pham Date: Tue, 28 Nov 2023 20:32:36 -0600 Subject: [PATCH] add outline test --- proxychain/responsemodifers/outline.go | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 proxychain/responsemodifers/outline.go diff --git a/proxychain/responsemodifers/outline.go b/proxychain/responsemodifers/outline.go new file mode 100644 index 0000000..a37c6d7 --- /dev/null +++ b/proxychain/responsemodifers/outline.go @@ -0,0 +1,35 @@ +package responsemodifers + +import ( + "github.com/go-shiori/dom" + "github.com/markusmobius/go-trafilatura" + "io" + "ladder/proxychain" + "strings" +) + +// Outline creates an JSON representation of the article +func Outline() proxychain.ResponseModification { + return func(chain *proxychain.ProxyChain) error { + // Use readability + opts := trafilatura.Options{ + IncludeImages: true, + IncludeLinks: true, + //FavorPrecision: true, + FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go + // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" + OriginalURL: chain.Request.URL, + } + + result, err := trafilatura.Extract(chain.Response.Body, opts) + if err != nil { + return err + } + + doc := trafilatura.CreateReadableDocument(result) + reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc))) + chain.Response.Body = reader + return nil + } + +}