refactor outline api to become a response modifier
This commit is contained in:
10
cmd/main.go
10
cmd/main.go
@@ -183,12 +183,12 @@ func main() {
|
|||||||
RulesetPath: *ruleset,
|
RulesetPath: *ruleset,
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("api/outline/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts))
|
app.Get("api/content/*", handlers.NewAPIOutlineHandler("api/outline/*", proxyOpts))
|
||||||
app.Get("outline/*", handlers.Outline("outline/*", proxyOpts))
|
|
||||||
|
|
||||||
app.Get("/*", handlers.NewProxySiteHandler(proxyOpts))
|
app.Get("outline/*", handlers.NewOutlineHandler("outline/*", proxyOpts))
|
||||||
app.Post("/*", handlers.NewProxySiteHandler(proxyOpts))
|
|
||||||
|
|
||||||
fmt.Println(cli.StartupMessage("1.0.1", *port, *ruleset))
|
app.All("/*", handlers.NewProxySiteHandler(proxyOpts))
|
||||||
|
|
||||||
|
fmt.Println(cli.StartupMessage("1.0.0", *port, *ruleset))
|
||||||
log.Fatal(app.Listen(":" + *port))
|
log.Fatal(app.Listen(":" + *port))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func NewAPIOutlineHandler(path string, opts *ProxyOptions) fiber.Handler {
|
|||||||
AddResponseModifications(
|
AddResponseModifications(
|
||||||
tx.DeleteIncomingCookies(),
|
tx.DeleteIncomingCookies(),
|
||||||
tx.RewriteHTMLResourceURLs(),
|
tx.RewriteHTMLResourceURLs(),
|
||||||
tx.APIOutline(),
|
tx.APIContent(),
|
||||||
).
|
).
|
||||||
SetFiberCtx(c).
|
SetFiberCtx(c).
|
||||||
Execute()
|
Execute()
|
||||||
|
|||||||
@@ -4,27 +4,14 @@ import (
|
|||||||
"ladder/proxychain"
|
"ladder/proxychain"
|
||||||
rx "ladder/proxychain/requestmodifers"
|
rx "ladder/proxychain/requestmodifers"
|
||||||
tx "ladder/proxychain/responsemodifers"
|
tx "ladder/proxychain/responsemodifers"
|
||||||
"log"
|
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Outline(path string, opts *ProxyOptions) fiber.Handler {
|
func NewOutlineHandler(path string, opts *ProxyOptions) fiber.Handler {
|
||||||
|
|
||||||
// TODO: implement ruleset logic
|
|
||||||
/*
|
|
||||||
var rs ruleset.RuleSet
|
|
||||||
if opts.RulesetPath != "" {
|
|
||||||
r, err := ruleset.NewRuleset(opts.RulesetPath)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
rs = r
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
result, err := proxychain.
|
|
||||||
|
return proxychain.
|
||||||
NewProxyChain().
|
NewProxyChain().
|
||||||
WithAPIPath(path).
|
WithAPIPath(path).
|
||||||
SetDebugLogging(opts.Verbose).
|
SetDebugLogging(opts.Verbose).
|
||||||
@@ -36,20 +23,10 @@ func Outline(path string, opts *ProxyOptions) fiber.Handler {
|
|||||||
AddResponseModifications(
|
AddResponseModifications(
|
||||||
tx.DeleteIncomingCookies(),
|
tx.DeleteIncomingCookies(),
|
||||||
tx.RewriteHTMLResourceURLs(),
|
tx.RewriteHTMLResourceURLs(),
|
||||||
tx.APIOutline(),
|
tx.GenerateReadableOutline(), // <-- this response modification does the outline rendering
|
||||||
).
|
).
|
||||||
SetFiberCtx(c).
|
SetFiberCtx(c).
|
||||||
ExecuteForOutline()
|
Execute()
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.Render("outline", fiber.Map{
|
|
||||||
"Success": true,
|
|
||||||
"Params": c.Params("*"),
|
|
||||||
"Title": "Outline",
|
|
||||||
"Body": result,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,21 +1,17 @@
|
|||||||
package responsemodifers
|
package responsemodifers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"bytes"
|
||||||
"strings"
|
"encoding/json"
|
||||||
|
|
||||||
//"github.com/go-shiori/dom"
|
|
||||||
"github.com/go-shiori/dom"
|
|
||||||
"github.com/markusmobius/go-trafilatura"
|
"github.com/markusmobius/go-trafilatura"
|
||||||
|
"io"
|
||||||
//"golang.org/x/net/html"
|
|
||||||
|
|
||||||
"ladder/proxychain"
|
"ladder/proxychain"
|
||||||
"ladder/proxychain/responsemodifers/api"
|
"ladder/proxychain/responsemodifers/api"
|
||||||
)
|
)
|
||||||
|
|
||||||
// APIOutline creates an JSON representation of the article and returns it as an API response.
|
// APIContent creates an JSON representation of the article and returns it as an API response.
|
||||||
func APIOutline() proxychain.ResponseModification {
|
func APIContent() proxychain.ResponseModification {
|
||||||
|
|
||||||
return func(chain *proxychain.ProxyChain) error {
|
return func(chain *proxychain.ProxyChain) error {
|
||||||
// we set content-type twice here, in case another response modifier
|
// we set content-type twice here, in case another response modifier
|
||||||
// tries to forward over the original headers
|
// tries to forward over the original headers
|
||||||
@@ -38,9 +34,14 @@ func APIOutline() proxychain.ResponseModification {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
doc := trafilatura.CreateReadableDocument(result)
|
res := api.ExtractResultToAPIResponse(result)
|
||||||
reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc)))
|
jsonData, err := json.MarshalIndent(res, "", " ")
|
||||||
chain.Response.Body = reader
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
chain.Response.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
84
proxychain/responsemodifers/generate_readable_outline.go
Normal file
84
proxychain/responsemodifers/generate_readable_outline.go
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
package responsemodifers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
"fmt"
|
||||||
|
"html/template"
|
||||||
|
"io"
|
||||||
|
"ladder/proxychain"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/go-shiori/dom"
|
||||||
|
"github.com/markusmobius/go-trafilatura"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed generate_readable_outline.html
|
||||||
|
var templateFS embed.FS
|
||||||
|
|
||||||
|
// GenerateReadableOutline creates an reader-friendly distilled representation of the article.
|
||||||
|
// This is a reliable way of bypassing soft-paywalled articles, where the content is hidden, but still present in the DOM.
|
||||||
|
func GenerateReadableOutline() proxychain.ResponseModification {
|
||||||
|
|
||||||
|
// get template only once, and resuse for subsequent calls
|
||||||
|
f := "generate_readable_outline.html"
|
||||||
|
tmpl, err := template.ParseFS(templateFS, f)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("tx.GenerateReadableOutline Error: %s not found", f))
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(chain *proxychain.ProxyChain) error {
|
||||||
|
|
||||||
|
// ===========================================================
|
||||||
|
// 1. extract dom contents using reading mode algo
|
||||||
|
// ===========================================================
|
||||||
|
opts := trafilatura.Options{
|
||||||
|
IncludeImages: true,
|
||||||
|
IncludeLinks: true,
|
||||||
|
//FavorPrecision: true,
|
||||||
|
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
|
||||||
|
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
|
||||||
|
OriginalURL: chain.Request.URL,
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := trafilatura.Extract(chain.Response.Body, opts)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
doc := trafilatura.CreateReadableDocument(result)
|
||||||
|
distilledHTML := dom.OuterHTML(doc)
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// 2. render generate_readable_outline.html template using metadata from step 1
|
||||||
|
// ============================================================================
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"Success": true,
|
||||||
|
"Params": chain.Request.URL,
|
||||||
|
//"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
|
||||||
|
"Date": result.Metadata.Date.String(),
|
||||||
|
"Author": result.Metadata.Author,
|
||||||
|
"Body": distilledHTML,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// 3. queue sending the response back to the client by replacing the response body
|
||||||
|
// (the response body will be read as a stream in proxychain.Execute() later on.)
|
||||||
|
// ============================================================================
|
||||||
|
pr, pw := io.Pipe() // pipe io.writer contents into io.reader
|
||||||
|
|
||||||
|
// Use a goroutine for writing to the pipe so we don't deadlock the request
|
||||||
|
go func() {
|
||||||
|
defer pw.Close()
|
||||||
|
|
||||||
|
err := tmpl.Execute(pw, data) // <- render template
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("WARN: GenerateReadableOutline template rendering error: %s\n", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
chain.Context.Set("content-type", "text/html")
|
||||||
|
chain.Response.Body = pr // <- replace reponse body reader with our new reader from pipe
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -356,7 +356,7 @@
|
|||||||
|
|
||||||
<div class="flex flex-col space-y-3">
|
<div class="flex flex-col space-y-3">
|
||||||
<div>{{ .Params }}</div>
|
<div>{{ .Params }}</div>
|
||||||
<div>{{ unescape .Body }}</div>
|
<div>{{ .Body }}</div>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
<!-- Trick Tailwind into compiling these styles into styles.css -->
|
<!-- Trick Tailwind into compiling these styles into styles.css -->
|
||||||
<!-- <div class="hidden text-xs text-sm text-base text-xl text-2xl text-3xl text-4xl sm:text-3xl sm:text-4xl sm:text-5xl"></div> -->
|
<!-- <div class="hidden text-xs text-sm text-base text-xl text-2xl text-3xl text-4xl sm:text-3xl sm:text-4xl sm:text-5xl"></div> -->
|
||||||
Reference in New Issue
Block a user