tweak outline template rendering

This commit is contained in:
Kevin Pham
2023-11-30 19:06:33 -06:00
parent 1ec0d2c09e
commit cd891d88a8
2 changed files with 54 additions and 27 deletions

View File

@@ -1,14 +1,17 @@
package responsemodifers package responsemodifers
import ( import (
"bytes"
"embed" "embed"
"fmt" "fmt"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"html/template" "html/template"
"io" "io"
"ladder/proxychain" "ladder/proxychain"
"log" "log"
"github.com/go-shiori/dom" //"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura" "github.com/markusmobius/go-trafilatura"
) )
@@ -32,32 +35,42 @@ func GenerateReadableOutline() proxychain.ResponseModification {
// 1. extract dom contents using reading mode algo // 1. extract dom contents using reading mode algo
// =========================================================== // ===========================================================
opts := trafilatura.Options{ opts := trafilatura.Options{
IncludeImages: true, IncludeImages: true,
IncludeLinks: true, IncludeLinks: true,
//FavorPrecision: true, FavorRecall: true,
Deduplicate: true,
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability" // implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
OriginalURL: chain.Request.URL, OriginalURL: chain.Request.URL,
} }
result, err := trafilatura.Extract(chain.Response.Body, opts) extract, err := trafilatura.Extract(chain.Response.Body, opts)
if err != nil { if err != nil {
return err return err
} }
doc := trafilatura.CreateReadableDocument(result)
distilledHTML := dom.OuterHTML(doc)
// ============================================================================ // ============================================================================
// 2. render generate_readable_outline.html template using metadata from step 1 // 2. render generate_readable_outline.html template using metadata from step 1
// ============================================================================ // ============================================================================
// render DOM to string without H1 title
removeFirstH1(extract.ContentNode)
var b bytes.Buffer
html.Render(&b, extract.ContentNode)
distilledHTML := b.String()
// populate template parameters
data := map[string]interface{}{ data := map[string]interface{}{
"Success": true, "Success": true,
"Params": chain.Request.URL, "Footer": extract.Metadata.License,
//"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated? "Image": extract.Metadata.Image,
"Date": result.Metadata.Date.String(), "Description": extract.Metadata.Description,
"Author": result.Metadata.Author, "Hostname": extract.Metadata.Hostname,
"Body": distilledHTML, "Url": chain.Request.URL,
"Title": extract.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
"Date": extract.Metadata.Date.String(),
"Author": extract.Metadata.Author,
"Body": distilledHTML,
} }
// ============================================================================ // ============================================================================
@@ -82,3 +95,20 @@ func GenerateReadableOutline() proxychain.ResponseModification {
return nil return nil
} }
} }
func removeFirstH1(n *html.Node) {
var recurse func(*html.Node) bool
recurse = func(n *html.Node) bool {
if n.Type == html.ElementNode && n.DataAtom == atom.H1 {
return true // Found the first H1, return true to stop
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if recurse(c) {
n.RemoveChild(c)
return false // Removed first H1, no need to continue
}
}
return false
}
recurse(n)
}

View File

@@ -4,6 +4,7 @@
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/styles.css" /> <link rel="stylesheet" href="/styles.css" />
<script src="/script.js" defer></script>
<script> <script>
const handleThemeChange = () => { const handleThemeChange = () => {
const theme = localStorage.getItem("theme"); const theme = localStorage.getItem("theme");
@@ -50,7 +51,7 @@
/> />
</svg> </svg>
<span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle" <span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle"
>ladder</span >ladder | {{.Hostname}}</span
> >
</a> </a>
</div> </div>
@@ -327,19 +328,10 @@
</code> </code>
{{else}} {{else}}
<div class="flex flex-col gap-1 mt-3"> <div class="flex flex-col gap-1 mt-3">
<div>
<small class="text-sm font-medium leading-none">
<a
href="{{.Url}}"
class="text-slate-600 dark:text-slate-400 hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
>{{.Url}}</a
>
</small>
</div>
<h1 <h1
class="text-3xl sm:text-4xl font-extrabold text-slate-900 tracking-tight dark:text-slate-200" class="text-3xl sm:text-4xl font-extrabold text-slate-900 tracking-tight dark:text-slate-200"
> >
{{.Title}} <a href="{{.Url}}"> {{.Title}} </a>
</h1> </h1>
{{ if ne .Date "" }} {{ if ne .Date "" }}
<small <small
@@ -355,7 +347,13 @@
</div> </div>
<div class="flex flex-col space-y-3"> <div class="flex flex-col space-y-3">
<div>{{ .Params }}</div> <div>
<div class="grid grid-cols-1 justify-items-center">
<div><img src="{{.Image}}"/></div>
<div class="text-xs text-gray-800">{{.Description}}</div>
</div>
</div>
<div>{{ .Body }}</div> <div>{{ .Body }}</div>
{{ end }} {{ end }}
<!-- Trick Tailwind into compiling these styles into styles.css --> <!-- Trick Tailwind into compiling these styles into styles.css -->
@@ -382,6 +380,5 @@
<div class="my-2"></div> <div class="my-2"></div>
</div> </div>
<script src="/script.js"></script>
</body> </body>
</html> </html>