improve outline api with error handling and proper content-type response

This commit is contained in:
Kevin Pham
2023-11-29 09:50:02 -06:00
parent e5bd5df2c0
commit 909d29230f
8 changed files with 405 additions and 191 deletions

View File

@@ -487,9 +487,11 @@ func (chain *ProxyChain) Execute() error {
}
// in case api user did not set or forward content-type, we do it for them
if chain.Context.Get("content-type") == "" {
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
}
/*
if chain.Context.Get("content-type") == "" {
chain.Context.Set("content-type", chain.Response.Header.Get("content-type"))
}
*/
// Return request back to client
return chain.Context.SendStream(body)

View File

@@ -0,0 +1,55 @@
package api
import (
"bytes"
"encoding/json"
"errors"
"io"
"reflect"
)
type APIError struct {
Success bool `json:"success"`
Error ErrorDetails `json:"error"`
}
type ErrorDetails struct {
Message string `json:"message"`
Type string `json:"type"`
Cause string `json:"cause"`
}
func CreateAPIErrReader(err error) io.ReadCloser {
if err == nil {
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "No error provided"}`))
}
baseErr := getBaseError(err)
apiErr := APIError{
Success: false,
Error: ErrorDetails{
Message: err.Error(),
Type: reflect.TypeOf(err).String(),
Cause: baseErr.Error(),
},
}
// Serialize the APIError into JSON
jsonData, jsonErr := json.Marshal(apiErr)
if jsonErr != nil {
return io.NopCloser(bytes.NewBufferString(`{"success":false, "error": "Failed to serialize error"}`))
}
// Return the JSON data as an io.ReadCloser
return io.NopCloser(bytes.NewBuffer(jsonData))
}
func getBaseError(err error) error {
for {
unwrapped := errors.Unwrap(err)
if unwrapped == nil {
return err
}
err = unwrapped
}
}

View File

@@ -0,0 +1,135 @@
package api
import (
"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura"
"golang.org/x/net/html"
)
// =======================================================================================
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
type ImageContent struct {
Type string `json:"type"`
URL string `json:"url"`
Alt string `json:"alt"`
Caption string `json:"caption"`
}
type LinkContent struct {
Type string `json:"type"`
Href string `json:"href"`
Data string `json:"data"`
}
type TextContent struct {
Type string `json:"type"`
Data string `json:"data"`
}
type JSONDocument struct {
Success bool `json:"success"`
Error ErrorDetails `json:"error"`
Metadata struct {
Title string `json:"title"`
Author string `json:"author"`
URL string `json:"url"`
Hostname string `json:"hostname"`
Description string `json:"description"`
Sitename string `json:"sitename"`
Date string `json:"date"`
Categories []string `json:"categories"`
Tags []string `json:"tags"`
License string `json:"license"`
} `json:"metadata"`
Content []interface{} `json:"content"`
Comments string `json:"comments"`
}
func ExtractResultToAPIResponse(extract *trafilatura.ExtractResult) *JSONDocument {
jsonDoc := &JSONDocument{}
// Populate success
jsonDoc.Success = true
// Populate metadata
jsonDoc.Metadata.Title = extract.Metadata.Title
jsonDoc.Metadata.Author = extract.Metadata.Author
jsonDoc.Metadata.URL = extract.Metadata.URL
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
jsonDoc.Metadata.Description = extract.Metadata.Description
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
jsonDoc.Metadata.Categories = extract.Metadata.Categories
jsonDoc.Metadata.Tags = extract.Metadata.Tags
jsonDoc.Metadata.License = extract.Metadata.License
// Populate content
if extract.ContentNode != nil {
jsonDoc.Content = parseContent(extract.ContentNode)
}
// Populate comments
if extract.CommentsNode != nil {
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
}
return jsonDoc
}
func parseContent(node *html.Node) []interface{} {
var content []interface{}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Data {
case "img":
image := ImageContent{
Type: "img",
URL: dom.GetAttribute(child, "src"),
Alt: dom.GetAttribute(child, "alt"),
Caption: dom.GetAttribute(child, "caption"),
}
content = append(content, image)
case "a":
link := LinkContent{
Type: "a",
Href: dom.GetAttribute(child, "href"),
Data: dom.InnerText(child),
}
content = append(content, link)
case "h1":
text := TextContent{
Type: "h1",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h2":
text := TextContent{
Type: "h2",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h3":
text := TextContent{
Type: "h3",
Data: dom.InnerText(child),
}
content = append(content, text)
// continue with other tags
default:
text := TextContent{
Type: "p",
Data: dom.InnerText(child),
}
content = append(content, text)
}
}
return content
}

View File

@@ -3,18 +3,23 @@ package responsemodifers
import (
"bytes"
"encoding/json"
"github.com/go-shiori/dom"
//"github.com/go-shiori/dom"
"github.com/markusmobius/go-trafilatura"
"golang.org/x/net/html"
//"golang.org/x/net/html"
"io"
"ladder/proxychain"
//"strings"
"ladder/proxychain/responsemodifers/api"
)
// Outline creates an JSON representation of the article
func Outline() proxychain.ResponseModification {
// APIOutline creates an JSON representation of the article and returns it as an API response.
func APIOutline() proxychain.ResponseModification {
return func(chain *proxychain.ProxyChain) error {
// Use readability
// we set content-type twice here, in case another response modifier
// tries to forward over the original headers
chain.Context.Set("content-type", "application/json")
chain.Response.Header.Set("content-type", "application/json")
// extract dom contents
opts := trafilatura.Options{
IncludeImages: true,
IncludeLinks: true,
@@ -26,151 +31,20 @@ func Outline() proxychain.ResponseModification {
result, err := trafilatura.Extract(chain.Response.Body, opts)
if err != nil {
return err
chain.Response.Body = api.CreateAPIErrReader(err)
return nil
}
doc := createJSONDocument(result)
jsonData, err := json.MarshalIndent(doc, "", " ")
doc := api.ExtractResultToAPIResponse(result)
jsonData, err := json.MarshalIndent(doc, "", "\t")
if err != nil {
return err
chain.Response.Body = api.CreateAPIErrReader(err)
return nil
}
buf := bytes.NewBuffer(jsonData)
//doc := trafilatura.CreateReadableDocument(result)
//reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc)))
chain.Response.Body = io.NopCloser(buf)
return nil
}
}
// =======================================================================================
// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934
type ImageContent struct {
Type string `json:"type"`
URL string `json:"url"`
Alt string `json:"alt"`
Caption string `json:"caption"`
}
type LinkContent struct {
Type string `json:"type"`
Href string `json:"href"`
Data string `json:"data"`
}
type TextContent struct {
Type string `json:"type"`
Data string `json:"data"`
}
type JSONDocument struct {
Success bool `json:"success"`
Error struct {
Message string `json:"message"`
Type string `json:"type"`
Cause string `json:"cause"`
} `json:"error"`
Metadata struct {
Title string `json:"title"`
Author string `json:"author"`
URL string `json:"url"`
Hostname string `json:"hostname"`
Description string `json:"description"`
Sitename string `json:"sitename"`
Date string `json:"date"`
Categories []string `json:"categories"`
Tags []string `json:"tags"`
License string `json:"license"`
} `json:"metadata"`
Content []interface{} `json:"content"`
Comments string `json:"comments"`
}
func createJSONDocument(extract *trafilatura.ExtractResult) *JSONDocument {
jsonDoc := &JSONDocument{}
// Populate success
jsonDoc.Success = true
// Populate metadata
jsonDoc.Metadata.Title = extract.Metadata.Title
jsonDoc.Metadata.Author = extract.Metadata.Author
jsonDoc.Metadata.URL = extract.Metadata.URL
jsonDoc.Metadata.Hostname = extract.Metadata.Hostname
jsonDoc.Metadata.Description = extract.Metadata.Description
jsonDoc.Metadata.Sitename = extract.Metadata.Sitename
jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02")
jsonDoc.Metadata.Categories = extract.Metadata.Categories
jsonDoc.Metadata.Tags = extract.Metadata.Tags
jsonDoc.Metadata.License = extract.Metadata.License
// Populate content
if extract.ContentNode != nil {
jsonDoc.Content = parseContent(extract.ContentNode)
}
// Populate comments
if extract.CommentsNode != nil {
jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode)
}
return jsonDoc
}
func parseContent(node *html.Node) []interface{} {
var content []interface{}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Data {
case "img":
image := ImageContent{
Type: "img",
URL: dom.GetAttribute(child, "src"),
Alt: dom.GetAttribute(child, "alt"),
Caption: dom.GetAttribute(child, "caption"),
}
content = append(content, image)
case "a":
link := LinkContent{
Type: "a",
Href: dom.GetAttribute(child, "href"),
Data: dom.InnerText(child),
}
content = append(content, link)
case "h1":
text := TextContent{
Type: "h1",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h2":
text := TextContent{
Type: "h2",
Data: dom.InnerText(child),
}
content = append(content, text)
case "h3":
text := TextContent{
Type: "h3",
Data: dom.InnerText(child),
}
content = append(content, text)
// continue with other tags
default:
text := TextContent{
Type: "p",
Data: dom.InnerText(child),
}
content = append(content, text)
}
}
return content
}

View File

@@ -0,0 +1,68 @@
package responsemodifers
import (
"encoding/json"
"fmt"
"io"
"net/url"
"testing"
)
func TestCreateAPIErrReader(t *testing.T) {
_, baseErr := url.Parse("://this is an invalid url")
wrappedErr := fmt.Errorf("wrapped error: %w", baseErr)
readCloser := CreateAPIErrReader(wrappedErr)
defer readCloser.Close()
// Read and unmarshal the JSON output
data, err := io.ReadAll(readCloser)
if err != nil {
t.Fatalf("Failed to read from ReadCloser: %v", err)
}
fmt.Println(string(data))
var apiErr APIError
err = json.Unmarshal(data, &apiErr)
if err != nil {
t.Fatalf("Failed to unmarshal JSON: %v", err)
}
// Verify the structure of the APIError
if apiErr.Success {
t.Errorf("Expected Success to be false, got true")
}
if apiErr.Error.Message != wrappedErr.Error() {
t.Errorf("Expected error message to be '%v', got '%v'", wrappedErr.Error(), apiErr.Error.Message)
}
}
func TestCreateAPIErrReader2(t *testing.T) {
_, baseErr := url.Parse("://this is an invalid url")
readCloser := CreateAPIErrReader(baseErr)
defer readCloser.Close()
// Read and unmarshal the JSON output
data, err := io.ReadAll(readCloser)
if err != nil {
t.Fatalf("Failed to read from ReadCloser: %v", err)
}
fmt.Println(string(data))
var apiErr APIError
err = json.Unmarshal(data, &apiErr)
if err != nil {
t.Fatalf("Failed to unmarshal JSON: %v", err)
}
// Verify the structure of the APIError
if apiErr.Success {
t.Errorf("Expected Success to be false, got true")
}
if apiErr.Error.Message != baseErr.Error() {
t.Errorf("Expected error message to be '%v', got '%v'", baseErr.Error(), apiErr.Error.Message)
}
}