diff --git a/proxychain/requestmodifers/masquerade_as_trusted_bot.go b/proxychain/requestmodifers/masquerade_as_trusted_bot.go index 7ef0e2f..4f1c75a 100644 --- a/proxychain/requestmodifers/masquerade_as_trusted_bot.go +++ b/proxychain/requestmodifers/masquerade_as_trusted_bot.go @@ -89,11 +89,13 @@ func masqueradeAsTrustedBot(botUA string, botIP string, ja3 string) proxychain.R DeleteRequestHeader("origin"), ) - if ja3 != "" { - chain.AddOnceRequestModifications( - SpoofJA3fingerprint(ja3, botUA), - ) - } + /* + if ja3 != "" { + chain.AddOnceRequestModifications( + SpoofJA3fingerprint(ja3, botUA), + ) + } + */ return nil } diff --git a/proxychain/responsemodifers/bypass_cors.go b/proxychain/responsemodifers/bypass_cors.go index 23dad06..46c0e04 100644 --- a/proxychain/responsemodifers/bypass_cors.go +++ b/proxychain/responsemodifers/bypass_cors.go @@ -8,7 +8,7 @@ import ( // from enforcing any CORS restrictions. This should run at the end of the chain. func BypassCORS() proxychain.ResponseModification { return func(chain *proxychain.ProxyChain) error { - chain.AddResponseModifications( + chain.AddOnceResponseModifications( SetResponseHeader("Access-Control-Allow-Origin", "*"), SetResponseHeader("Access-Control-Expose-Headers", "*"), SetResponseHeader("Access-Control-Allow-Credentials", "true"), diff --git a/proxychain/responsemodifers/outline.go b/proxychain/responsemodifers/outline.go index a37c6d7..43fb91c 100644 --- a/proxychain/responsemodifers/outline.go +++ b/proxychain/responsemodifers/outline.go @@ -1,11 +1,14 @@ package responsemodifers import ( + "bytes" + "encoding/json" "github.com/go-shiori/dom" "github.com/markusmobius/go-trafilatura" + "golang.org/x/net/html" "io" "ladder/proxychain" - "strings" + //"strings" ) // Outline creates an JSON representation of the article @@ -26,10 +29,148 @@ func Outline() proxychain.ResponseModification { return err } - doc := trafilatura.CreateReadableDocument(result) - reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc))) - chain.Response.Body = reader + doc := createJSONDocument(result) + jsonData, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return err + } + buf := bytes.NewBuffer(jsonData) + //doc := trafilatura.CreateReadableDocument(result) + //reader := io.NopCloser(strings.NewReader(dom.OuterHTML(doc))) + chain.Response.Body = io.NopCloser(buf) return nil } } + +// ======================================================================================= +// credit @joncrangle https://github.com/everywall/ladder/issues/38#issuecomment-1831252934 + +type ImageContent struct { + Type string `json:"type"` + URL string `json:"url"` + Alt string `json:"alt"` + Caption string `json:"caption"` +} + +type LinkContent struct { + Type string `json:"type"` + Href string `json:"href"` + Data string `json:"data"` +} + +type TextContent struct { + Type string `json:"type"` + Data string `json:"data"` +} + +type JSONDocument struct { + Success bool `json:"success"` + Error struct { + Message string `json:"message"` + Type string `json:"type"` + Cause string `json:"cause"` + } `json:"error"` + Metadata struct { + Title string `json:"title"` + Author string `json:"author"` + URL string `json:"url"` + Hostname string `json:"hostname"` + Description string `json:"description"` + Sitename string `json:"sitename"` + Date string `json:"date"` + Categories []string `json:"categories"` + Tags []string `json:"tags"` + License string `json:"license"` + } `json:"metadata"` + Content []interface{} `json:"content"` + Comments string `json:"comments"` +} + +func createJSONDocument(extract *trafilatura.ExtractResult) *JSONDocument { + jsonDoc := &JSONDocument{} + + // Populate success + jsonDoc.Success = true + + // Populate metadata + jsonDoc.Metadata.Title = extract.Metadata.Title + jsonDoc.Metadata.Author = extract.Metadata.Author + jsonDoc.Metadata.URL = extract.Metadata.URL + jsonDoc.Metadata.Hostname = extract.Metadata.Hostname + jsonDoc.Metadata.Description = extract.Metadata.Description + jsonDoc.Metadata.Sitename = extract.Metadata.Sitename + jsonDoc.Metadata.Date = extract.Metadata.Date.Format("2006-01-02") + jsonDoc.Metadata.Categories = extract.Metadata.Categories + jsonDoc.Metadata.Tags = extract.Metadata.Tags + jsonDoc.Metadata.License = extract.Metadata.License + + // Populate content + if extract.ContentNode != nil { + jsonDoc.Content = parseContent(extract.ContentNode) + } + + // Populate comments + if extract.CommentsNode != nil { + jsonDoc.Comments = dom.OuterHTML(extract.CommentsNode) + } + + return jsonDoc +} + +func parseContent(node *html.Node) []interface{} { + var content []interface{} + + for child := node.FirstChild; child != nil; child = child.NextSibling { + switch child.Data { + case "img": + image := ImageContent{ + Type: "img", + URL: dom.GetAttribute(child, "src"), + Alt: dom.GetAttribute(child, "alt"), + Caption: dom.GetAttribute(child, "caption"), + } + content = append(content, image) + + case "a": + link := LinkContent{ + Type: "a", + Href: dom.GetAttribute(child, "href"), + Data: dom.InnerText(child), + } + content = append(content, link) + + case "h1": + text := TextContent{ + Type: "h1", + Data: dom.InnerText(child), + } + content = append(content, text) + + case "h2": + text := TextContent{ + Type: "h2", + Data: dom.InnerText(child), + } + content = append(content, text) + + case "h3": + text := TextContent{ + Type: "h3", + Data: dom.InnerText(child), + } + content = append(content, text) + + // continue with other tags + + default: + text := TextContent{ + Type: "p", + Data: dom.InnerText(child), + } + content = append(content, text) + } + } + + return content +}