tweak outline template rendering
This commit is contained in:
@@ -1,14 +1,17 @@
|
|||||||
package responsemodifers
|
package responsemodifers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"embed"
|
"embed"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/atom"
|
||||||
"html/template"
|
"html/template"
|
||||||
"io"
|
"io"
|
||||||
"ladder/proxychain"
|
"ladder/proxychain"
|
||||||
"log"
|
"log"
|
||||||
|
|
||||||
"github.com/go-shiori/dom"
|
//"github.com/go-shiori/dom"
|
||||||
"github.com/markusmobius/go-trafilatura"
|
"github.com/markusmobius/go-trafilatura"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,32 +35,42 @@ func GenerateReadableOutline() proxychain.ResponseModification {
|
|||||||
// 1. extract dom contents using reading mode algo
|
// 1. extract dom contents using reading mode algo
|
||||||
// ===========================================================
|
// ===========================================================
|
||||||
opts := trafilatura.Options{
|
opts := trafilatura.Options{
|
||||||
IncludeImages: true,
|
IncludeImages: true,
|
||||||
IncludeLinks: true,
|
IncludeLinks: true,
|
||||||
//FavorPrecision: true,
|
FavorRecall: true,
|
||||||
|
Deduplicate: true,
|
||||||
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
|
FallbackCandidates: nil, // TODO: https://github.com/markusmobius/go-trafilatura/blob/main/examples/chained/main.go
|
||||||
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
|
// implement fallbacks from "github.com/markusmobius/go-domdistiller" and "github.com/go-shiori/go-readability"
|
||||||
OriginalURL: chain.Request.URL,
|
OriginalURL: chain.Request.URL,
|
||||||
}
|
}
|
||||||
|
|
||||||
result, err := trafilatura.Extract(chain.Response.Body, opts)
|
extract, err := trafilatura.Extract(chain.Response.Body, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
doc := trafilatura.CreateReadableDocument(result)
|
|
||||||
distilledHTML := dom.OuterHTML(doc)
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// 2. render generate_readable_outline.html template using metadata from step 1
|
// 2. render generate_readable_outline.html template using metadata from step 1
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
|
// render DOM to string without H1 title
|
||||||
|
removeFirstH1(extract.ContentNode)
|
||||||
|
var b bytes.Buffer
|
||||||
|
html.Render(&b, extract.ContentNode)
|
||||||
|
distilledHTML := b.String()
|
||||||
|
|
||||||
|
// populate template parameters
|
||||||
data := map[string]interface{}{
|
data := map[string]interface{}{
|
||||||
"Success": true,
|
"Success": true,
|
||||||
"Params": chain.Request.URL,
|
"Footer": extract.Metadata.License,
|
||||||
//"Title": result.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
|
"Image": extract.Metadata.Image,
|
||||||
"Date": result.Metadata.Date.String(),
|
"Description": extract.Metadata.Description,
|
||||||
"Author": result.Metadata.Author,
|
"Hostname": extract.Metadata.Hostname,
|
||||||
"Body": distilledHTML,
|
"Url": chain.Request.URL,
|
||||||
|
"Title": extract.Metadata.Title, // todo: modify CreateReadableDocument so we don't have <h1> titles duplicated?
|
||||||
|
"Date": extract.Metadata.Date.String(),
|
||||||
|
"Author": extract.Metadata.Author,
|
||||||
|
"Body": distilledHTML,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -82,3 +95,20 @@ func GenerateReadableOutline() proxychain.ResponseModification {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func removeFirstH1(n *html.Node) {
|
||||||
|
var recurse func(*html.Node) bool
|
||||||
|
recurse = func(n *html.Node) bool {
|
||||||
|
if n.Type == html.ElementNode && n.DataAtom == atom.H1 {
|
||||||
|
return true // Found the first H1, return true to stop
|
||||||
|
}
|
||||||
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
if recurse(c) {
|
||||||
|
n.RemoveChild(c)
|
||||||
|
return false // Removed first H1, no need to continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
recurse(n)
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<link rel="stylesheet" href="/styles.css" />
|
<link rel="stylesheet" href="/styles.css" />
|
||||||
|
<script src="/script.js" defer></script>
|
||||||
<script>
|
<script>
|
||||||
const handleThemeChange = () => {
|
const handleThemeChange = () => {
|
||||||
const theme = localStorage.getItem("theme");
|
const theme = localStorage.getItem("theme");
|
||||||
@@ -50,7 +51,7 @@
|
|||||||
/>
|
/>
|
||||||
</svg>
|
</svg>
|
||||||
<span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle"
|
<span class="text-3xl mr-1 text-[#7AA7D1] leading-8 align-middle"
|
||||||
>ladder</span
|
>ladder | {{.Hostname}}</span
|
||||||
>
|
>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
@@ -327,19 +328,10 @@
|
|||||||
</code>
|
</code>
|
||||||
{{else}}
|
{{else}}
|
||||||
<div class="flex flex-col gap-1 mt-3">
|
<div class="flex flex-col gap-1 mt-3">
|
||||||
<div>
|
|
||||||
<small class="text-sm font-medium leading-none">
|
|
||||||
<a
|
|
||||||
href="{{.Url}}"
|
|
||||||
class="text-slate-600 dark:text-slate-400 hover:text-blue-500 hover:underline underline-offset-2 transition-colors duration-300"
|
|
||||||
>{{.Url}}</a
|
|
||||||
>
|
|
||||||
</small>
|
|
||||||
</div>
|
|
||||||
<h1
|
<h1
|
||||||
class="text-3xl sm:text-4xl font-extrabold text-slate-900 tracking-tight dark:text-slate-200"
|
class="text-3xl sm:text-4xl font-extrabold text-slate-900 tracking-tight dark:text-slate-200"
|
||||||
>
|
>
|
||||||
{{.Title}}
|
<a href="{{.Url}}"> {{.Title}} </a>
|
||||||
</h1>
|
</h1>
|
||||||
{{ if ne .Date "" }}
|
{{ if ne .Date "" }}
|
||||||
<small
|
<small
|
||||||
@@ -355,7 +347,13 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="flex flex-col space-y-3">
|
<div class="flex flex-col space-y-3">
|
||||||
<div>{{ .Params }}</div>
|
<div>
|
||||||
|
<div class="grid grid-cols-1 justify-items-center">
|
||||||
|
<div><img src="{{.Image}}"/></div>
|
||||||
|
<div class="text-xs text-gray-800">{{.Description}}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div>{{ .Body }}</div>
|
<div>{{ .Body }}</div>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
<!-- Trick Tailwind into compiling these styles into styles.css -->
|
<!-- Trick Tailwind into compiling these styles into styles.css -->
|
||||||
@@ -382,6 +380,5 @@
|
|||||||
<div class="my-2"></div>
|
<div class="my-2"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/script.js"></script>
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
Reference in New Issue
Block a user