improve outline api with error handling and proper content-type response
This commit is contained in:
@@ -487,9 +487,11 @@ func (chain *ProxyChain) Execute() error {
|
||||
}
|
||||
|
||||
// in case api user did not set or forward content-type, we do it for them
|
||||
if chain.Context.Get("content-type") == "" {
|
||||
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
||||
}
|
||||
/*
|
||||
if chain.Context.Get("content-type") == "" {
|
||||
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
|
||||
}
|
||||
*/
|
||||
|
||||
// Return request back to client
|
||||
return chain.Context.SendStream(body)
|
||||
|
||||
55
proxychain/responsemodifers/api/error_api.go
Normal file
55
proxychain/responsemodifers/api/error_api.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
type APIError struct {
|
||||
Success bool `json:"success"`
|
||||
Error ErrorDetails `json:"error"`
|
||||
}
|
||||
|
||||
type ErrorDetails struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Cause string `json:"cause"`
|
||||
}
|
||||
|
||||
func CreateAPIErrReader(err error) io.ReadCloser {
|
||||
if err == nil {
|
||||
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "No error provided"}`))
|
||||
}
|
||||
|
||||
baseErr := getBaseError(err)
|
||||
apiErr := APIError{
|
||||
Success: false,
|
||||
Error: ErrorDetails{
|
||||
Message: err.Error(),
|
||||
Type: reflect.TypeOf(err).String(),
|
||||
Cause: baseErr.Error(),
|
||||
},
|
||||
}
|
||||
|
||||
// Serialize the APIError into JSON
|
||||
jsonData, jsonErr := json.Marshal(apiErr)
|
||||
if jsonErr != nil {
|
||||
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "Failed to serialize error"}`))
|
||||
}
|
||||
|
||||
// Return the JSON data as an io.ReadCloser
|
||||
return io.NopCloser(bytes.NewBuffer(jsonData))
|
||||
}
|
||||
|
||||
func getBaseError(err error) error {
|
||||
for {
|
||||
unwrapped := errors.Unwrap(err)
|
||||
if unwrapped == nil {
|
||||
return err
|
||||
}
|
||||
err = unwrapped
|
||||
}
|
||||
}
|
||||
135
proxychain/responsemodifers/api/outline_api.go
Normal file
135
proxychain/responsemodifers/api/outline_api.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"github.com/go-shiori/dom"
|
||||
"github.com/markusmobius/go-trafilatura"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// =======================================================================================
|
||||
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
|
||||
|
||||
type ImageContent struct {
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Alt string `json:"alt"`
|
||||
Caption string `json:"caption"`
|
||||
}
|
||||
|
||||
type LinkContent struct {
|
||||
Type string `json:"type"`
|
||||
Href string `json:"href"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type TextContent struct {
|
||||
Type string `json:"type"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type JSONDocument struct {
|
||||
Success bool `json:"success"`
|
||||
Error ErrorDetails `json:"error"`
|
||||
Metadata struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
URL string `json:"url"`
|
||||
Hostname string `json:"hostname"`
|
||||
Description string `json:"description"`
|
||||
Sitename string `json:"sitename"`
|
||||
Date string `json:"date"`
|
||||
Categories []string `json:"categories"`
|
||||
Tags []string `json:"tags"`
|
||||
License string `json:"license"`
|
||||
} `json:"metadata"`
|
||||
Content []interface{} `json:"content"`
|
||||
Comments string `json:"comments"`
|
||||
}
|
||||
|
||||
func ExtractResultToAPIResponse(extract *trafilatura.ExtractResult) *JSONDocument {
|
||||
jsonDoc := &JSONDocument{}
|
||||
|
||||
// Populate success
|
||||
jsonDoc.Success = true
|
||||
|
||||
// Populate metadata
|
||||
jsonDoc.Metadata.Title = extract.Metadata.Title
|
||||
jsonDoc.Metadata.Author = extract.Metadata.Author
|
||||
jsonDoc.Metadata.URL = extract.Metadata.URL
|
||||
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
|
||||
jsonDoc.Metadata.Description = extract.Metadata.Description
|
||||
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
|
||||
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
|
||||
jsonDoc.Metadata.Categories = extract.Metadata.Categories
|
||||
jsonDoc.Metadata.Tags = extract.Metadata.Tags
|
||||
jsonDoc.Metadata.License = extract.Metadata.License
|
||||
|
||||
// Populate content
|
||||
if extract.ContentNode != nil {
|
||||
jsonDoc.Content = parseContent(extract.ContentNode)
|
||||
}
|
||||
|
||||
// Populate comments
|
||||
if extract.CommentsNode != nil {
|
||||
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
|
||||
}
|
||||
|
||||
return jsonDoc
|
||||
}
|
||||
|
||||
func parseContent(node *html.Node) []interface{} {
|
||||
var content []interface{}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Data {
|
||||
case "img":
|
||||
image := ImageContent{
|
||||
Type: "img",
|
||||
URL: dom.GetAttribute(child, "src"),
|
||||
Alt: dom.GetAttribute(child, "alt"),
|
||||
Caption: dom.GetAttribute(child, "caption"),
|
||||
}
|
||||
content = append(content, image)
|
||||
|
||||
case "a":
|
||||
link := LinkContent{
|
||||
Type: "a",
|
||||
Href: dom.GetAttribute(child, "href"),
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, link)
|
||||
|
||||
case "h1":
|
||||
text := TextContent{
|
||||
Type: "h1",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h2":
|
||||
text := TextContent{
|
||||
Type: "h2",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h3":
|
||||
text := TextContent{
|
||||
Type: "h3",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
// continue with other tags
|
||||
|
||||
default:
|
||||
text := TextContent{
|
||||
Type: "p",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
@@ -3,18 +3,23 @@ package responsemodifers
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"github.com/go-shiori/dom"
|
||||
//"github.com/go-shiori/dom"
|
||||
"github.com/markusmobius/go-trafilatura"
|
||||
"golang.org/x/net/html"
|
||||
//"golang.org/x/net/html"
|
||||
"io"
|
||||
"ladder/proxychain"
|
||||
//"strings"
|
||||
"ladder/proxychain/responsemodifers/api"
|
||||
)
|
||||
|
||||
// Outline creates an JSON representation of the article
|
||||
func Outline() proxychain.ResponseModification {
|
||||
// APIOutline creates an JSON representation of the article and returns it as an API response.
|
||||
func APIOutline() proxychain.ResponseModification {
|
||||
return func(chain *proxychain.ProxyChain) error {
|
||||
// Use readability
|
||||
// we set content-type twice here, in case another response modifier
|
||||
// tries to forward over the original headers
|
||||
chain.Context.Set("content-type", "application/json")
|
||||
chain.Response.Header.Set("content-type", "application/json")
|
||||
|
||||
// extract dom contents
|
||||
opts := trafilatura.Options{
|
||||
IncludeImages: true,
|
||||
IncludeLinks: true,
|
||||
@@ -26,151 +31,20 @@ func Outline() proxychain.ResponseModification {
|
||||
|
||||
result, err := trafilatura.Extract(chain.Response.Body, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
chain.Response.Body = api.CreateAPIErrReader(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
doc := createJSONDocument(result)
|
||||
jsonData, err := json.MarshalIndent(doc, "", " ")
|
||||
doc := api.ExtractResultToAPIResponse(result)
|
||||
jsonData, err := json.MarshalIndent(doc, "", "\t")
|
||||
if err != nil {
|
||||
return err
|
||||
chain.Response.Body = api.CreateAPIErrReader(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(jsonData)
|
||||
//doc := trafilatura.CreateReadableDocument(result)
|
||||
//reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc)))
|
||||
chain.Response.Body = io.NopCloser(buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// =======================================================================================
|
||||
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
|
||||
|
||||
type ImageContent struct {
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Alt string `json:"alt"`
|
||||
Caption string `json:"caption"`
|
||||
}
|
||||
|
||||
type LinkContent struct {
|
||||
Type string `json:"type"`
|
||||
Href string `json:"href"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type TextContent struct {
|
||||
Type string `json:"type"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type JSONDocument struct {
|
||||
Success bool `json:"success"`
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
Cause string `json:"cause"`
|
||||
} `json:"error"`
|
||||
Metadata struct {
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
URL string `json:"url"`
|
||||
Hostname string `json:"hostname"`
|
||||
Description string `json:"description"`
|
||||
Sitename string `json:"sitename"`
|
||||
Date string `json:"date"`
|
||||
Categories []string `json:"categories"`
|
||||
Tags []string `json:"tags"`
|
||||
License string `json:"license"`
|
||||
} `json:"metadata"`
|
||||
Content []interface{} `json:"content"`
|
||||
Comments string `json:"comments"`
|
||||
}
|
||||
|
||||
func createJSONDocument(extract *trafilatura.ExtractResult) *JSONDocument {
|
||||
jsonDoc := &JSONDocument{}
|
||||
|
||||
// Populate success
|
||||
jsonDoc.Success = true
|
||||
|
||||
// Populate metadata
|
||||
jsonDoc.Metadata.Title = extract.Metadata.Title
|
||||
jsonDoc.Metadata.Author = extract.Metadata.Author
|
||||
jsonDoc.Metadata.URL = extract.Metadata.URL
|
||||
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
|
||||
jsonDoc.Metadata.Description = extract.Metadata.Description
|
||||
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
|
||||
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
|
||||
jsonDoc.Metadata.Categories = extract.Metadata.Categories
|
||||
jsonDoc.Metadata.Tags = extract.Metadata.Tags
|
||||
jsonDoc.Metadata.License = extract.Metadata.License
|
||||
|
||||
// Populate content
|
||||
if extract.ContentNode != nil {
|
||||
jsonDoc.Content = parseContent(extract.ContentNode)
|
||||
}
|
||||
|
||||
// Populate comments
|
||||
if extract.CommentsNode != nil {
|
||||
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
|
||||
}
|
||||
|
||||
return jsonDoc
|
||||
}
|
||||
|
||||
func parseContent(node *html.Node) []interface{} {
|
||||
var content []interface{}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Data {
|
||||
case "img":
|
||||
image := ImageContent{
|
||||
Type: "img",
|
||||
URL: dom.GetAttribute(child, "src"),
|
||||
Alt: dom.GetAttribute(child, "alt"),
|
||||
Caption: dom.GetAttribute(child, "caption"),
|
||||
}
|
||||
content = append(content, image)
|
||||
|
||||
case "a":
|
||||
link := LinkContent{
|
||||
Type: "a",
|
||||
Href: dom.GetAttribute(child, "href"),
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, link)
|
||||
|
||||
case "h1":
|
||||
text := TextContent{
|
||||
Type: "h1",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h2":
|
||||
text := TextContent{
|
||||
Type: "h2",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
case "h3":
|
||||
text := TextContent{
|
||||
Type: "h3",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
|
||||
// continue with other tags
|
||||
|
||||
default:
|
||||
text := TextContent{
|
||||
Type: "p",
|
||||
Data: dom.InnerText(child),
|
||||
}
|
||||
content = append(content, text)
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
68
proxychain/responsemodifers/outline_test.go
Normal file
68
proxychain/responsemodifers/outline_test.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package responsemodifers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCreateAPIErrReader(t *testing.T) {
|
||||
_, baseErr := url.Parse("://this is an invalid url")
|
||||
wrappedErr := fmt.Errorf("wrapped error: %w", baseErr)
|
||||
|
||||
readCloser := CreateAPIErrReader(wrappedErr)
|
||||
defer readCloser.Close()
|
||||
|
||||
// Read and unmarshal the JSON output
|
||||
data, err := io.ReadAll(readCloser)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from ReadCloser: %v", err)
|
||||
}
|
||||
fmt.Println(string(data))
|
||||
|
||||
var apiErr APIError
|
||||
err = json.Unmarshal(data, &apiErr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
|
||||
// Verify the structure of the APIError
|
||||
if apiErr.Success {
|
||||
t.Errorf("Expected Success to be false, got true")
|
||||
}
|
||||
|
||||
if apiErr.Error.Message != wrappedErr.Error() {
|
||||
t.Errorf("Expected error message to be '%v', got '%v'", wrappedErr.Error(), apiErr.Error.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateAPIErrReader2(t *testing.T) {
|
||||
_, baseErr := url.Parse("://this is an invalid url")
|
||||
|
||||
readCloser := CreateAPIErrReader(baseErr)
|
||||
defer readCloser.Close()
|
||||
|
||||
// Read and unmarshal the JSON output
|
||||
data, err := io.ReadAll(readCloser)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read from ReadCloser: %v", err)
|
||||
}
|
||||
fmt.Println(string(data))
|
||||
|
||||
var apiErr APIError
|
||||
err = json.Unmarshal(data, &apiErr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
|
||||
// Verify the structure of the APIError
|
||||
if apiErr.Success {
|
||||
t.Errorf("Expected Success to be false, got true")
|
||||
}
|
||||
|
||||
if apiErr.Error.Message != baseErr.Error() {
|
||||
t.Errorf("Expected error message to be '%v', got '%v'", baseErr.Error(), apiErr.Error.Message)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user