refactor outline api to become a response modifier

This commit is contained in:
Kevin Pham
2023-11-30 15:50:02 -06:00
parent 1c810ad8e2
commit 1ec0d2c09e
7 changed files with 110 additions and 48 deletions

View File

@@ -183,12 +183,12 @@ func main() {
RulesetPath: *ruleset, RulesetPath: *ruleset,
} }
app.Get("api/outline/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts)) app.Get("api/content/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts))
app.Get("outline/*", handlers.Outline("outline/*", proxyOpts))
app.Get("/*", handlers.NewProxySiteHandler(proxyOpts)) app.Get("outline/*", handlers.NewOutlineHandler("outline/*", proxyOpts))
app.Post("/*", handlers.NewProxySiteHandler(proxyOpts))
fmt.Println(cli.StartupMessage("1.0.1", *port, *ruleset)) app.All("/*", handlers.NewProxySiteHandler(proxyOpts))
fmt.Println(cli.StartupMessage("1.0.0", *port, *ruleset))
log.Fatal(app.Listen(":" + *port)) log.Fatal(app.Listen(":" + *port))
} }

View File

@@ -34,7 +34,7 @@ func NewAPIOutlineHandler(path string, opts *ProxyOptions) fiber.Handler {
AddResponseModifications( AddResponseModifications(
tx.DeleteIncomingCookies(), tx.DeleteIncomingCookies(),
tx.RewriteHTMLResourceURLs(), tx.RewriteHTMLResourceURLs(),
tx.APIOutline(), tx.APIContent(),
). ).
SetFiberCtx(c). SetFiberCtx(c).
Execute() Execute()

View File

@@ -4,27 +4,14 @@ import (
"ladder/proxychain" "ladder/proxychain"
rx "ladder/proxychain/requestmodifers" rx "ladder/proxychain/requestmodifers"
tx "ladder/proxychain/responsemodifers" tx "ladder/proxychain/responsemodifers"
"log"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
func Outline(path string, opts *ProxyOptions) fiber.Handler { func NewOutlineHandler(path string, opts *ProxyOptions) fiber.Handler {
// TODO: implement ruleset logic
/*
var rs ruleset.RuleSet
if opts.RulesetPath != "" {
r, err := ruleset.NewRuleset(opts.RulesetPath)
if err != nil {
panic(err)
}
rs = r
}
*/
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
result, err := proxychain.
return proxychain.
NewProxyChain(). NewProxyChain().
WithAPIPath(path). WithAPIPath(path).
SetDebugLogging(opts.Verbose). SetDebugLogging(opts.Verbose).
@@ -36,20 +23,10 @@ func Outline(path string, opts *ProxyOptions) fiber.Handler {
AddResponseModifications( AddResponseModifications(
tx.DeleteIncomingCookies(), tx.DeleteIncomingCookies(),
tx.RewriteHTMLResourceURLs(), tx.RewriteHTMLResourceURLs(),
tx.APIOutline(), tx.GenerateReadableOutline(), // <-- this response modification does the outline rendering
). ).
SetFiberCtx(c). SetFiberCtx(c).
ExecuteForOutline() Execute()
if err != nil {
log.Fatal(err)
}
return c.Render("outline", fiber.Map{
"Success": true,
"Params": c.Params("*"),
"Title": "Outline",
"Body": result,
})
} }
} }

View File

@@ -1,21 +1,17 @@
package responsemodifers package responsemodifers
import ( import (
"io" "bytes"
"strings" "encoding/json"
//"github.com/go-shiori/dom"
"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura" "github.com/markusmobius/go-trafilatura"
"io"
//"golang.org/x/net/html"
"ladder/proxychain" "ladder/proxychain"
"ladder/proxychain/responsemodifers/api" "ladder/proxychain/responsemodifers/api"
) )
// APIOutline creates an JSON representation of the article and returns it as an API response. // APIContent creates an JSON representation of the article and returns it as an API response.
func APIOutline() proxychain.ResponseModification { func APIContent() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error { return func(chain *proxychain.ProxyChain) error {
// we set content-type twice here, in case another response modifier // we set content-type twice here, in case another response modifier
// tries to forward over the original headers // tries to forward over the original headers
@@ -38,9 +34,14 @@ func APIOutline() proxychain.ResponseModification {
return nil return nil
} }
doc := trafilatura.CreateReadableDocument(result) res := api.ExtractResultToAPIResponse(result)
reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc))) jsonData, err := json.MarshalIndent(res, "", " ")
chain.Response.Body = reader if err != nil {
return err
}
chain.Response.Body = io.NopCloser(bytes.NewReader(jsonData))
return nil return nil
} }
} }

View File

@@ -0,0 +1,84 @@
package responsemodifers
import (
"embed"
"fmt"
"html/template"
"io"
"ladder/proxychain"
"log"
"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura"
)
//go:embed generate_readable_outline.html
var templateFS embed.FS
// GenerateReadableOutline creates an reader-friendly distilled representation of the article.
// This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM.
func GenerateReadableOutline() proxychain.ResponseModification {
// get template only once, and resuse for subsequent calls
f := "generate_readable_outline.html"
tmpl, err := template.ParseFS(templateFS, f)
if err != nil {
panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f))
}
return func(chain *proxychain.ProxyChain) error {
// ===========================================================
// 1. extract dom contents using reading mode algo
// ===========================================================
opts := trafilatura.Options{
IncludeImages: true,
IncludeLinks: true,
//FavorPrecision: true,
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
OriginalURL: chain.Request.URL,
}
result, err := trafilatura.Extract(chain.Response.Body, opts)
if err != nil {
return err
}
doc := trafilatura.CreateReadableDocument(result)
distilledHTML := dom.OuterHTML(doc)
// ============================================================================
// 2. render generate_readable_outline.html template using metadata from step 1
// ============================================================================
data := map[string]interface{}{
"Success": true,
"Params": chain.Request.URL,
//"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
"Date": result.Metadata.Date.String(),
"Author": result.Metadata.Author,
"Body": distilledHTML,
}
// ============================================================================
// 3. queue sending the response back to the client by replacing the response body
// (the response body will be read as a stream in proxychain.Execute() later on.)
// ============================================================================
pr, pw := io.Pipe() // pipe io.writer contents into io.reader
// Use a goroutine for writing to the pipe so we don't deadlock the request
go func() {
defer pw.Close()
err := tmpl.Execute(pw, data) // <- render template
if err != nil {
log.Printf("WARN: GenerateReadableOutline template rendering error: %s\n", err)
}
}()
chain.Context.Set("content-type", "text/html")
chain.Response.Body = pr // <- replace reponse body reader with our new reader from pipe
return nil
}
}

View File

@@ -356,7 +356,7 @@
<div class="flex flex-col space-y-3"> <div class="flex flex-col space-y-3">
<div>{{ .Params }}</div> <div>{{ .Params }}</div>
<div>{{ unescape .Body }}</div> <div>{{ .Body }}</div>
{{ end }} {{ end }}
<!-- Trick Tailwind into compiling these styles into styles.css --> <!-- Trick Tailwind into compiling these styles into styles.css -->
<!-- <div class="hidden text-xs text-sm text-base text-xl text-2xl text-3xl text-4xl sm:text-3xl sm:text-4xl sm:text-5xl"></div> --> <!-- <div class="hidden text-xs text-sm text-base text-xl text-2xl text-3xl text-4xl sm:text-3xl sm:text-4xl sm:text-5xl"></div> -->