Files
foldsite/src/rendering/helpers.py
Tanishq Dubey fa46d82874
All checks were successful
Datadog Software Composition Analysis / Datadog SBOM Generation and Upload (push) Successful in 24s
Datadog Secrets Scanning / Datadog Static Analyzer (push) Successful in 39s
Datadog Static Analysis / Datadog Static Analyzer (push) Successful in 25s
exif error handling, release action
2025-03-14 22:31:12 -04:00

259 lines
9.8 KiB
Python

from dataclasses import dataclass
from src.config.config import Configuration
from src.rendering import GENERIC_FILE_MAPPING
from src.rendering.markdown import (
render_markdown,
read_raw_markdown,
rendered_markdown_to_plain_text,
)
from enum import Enum
from PIL import Image, ExifTags
from datetime import datetime
import frontmatter
@dataclass
class ImageMetadata:
width: int
height: int
alt: str
exif: dict
@dataclass
class MarkdownMetadata:
"""
A class to represent metadata for a Markdown file.
Attributes:
----------
frontmatter : dict
A dictionary containing the front matter of the Markdown file.
content : str
The main content of the Markdown file.
preview : str
A preview or summary of the Markdown content.
"""
frontmatter: dict
content: str
preview: str
@dataclass
class FileMetadata:
typeMeta: MarkdownMetadata | None
@dataclass
class TemplateFile:
"""
A class to represent a template file with its associated metadata.
Attributes:
----------
name (str): The name of the file.
path (str): The file path.
proper_name (str): The proper name of the file.
extension (str): The file extension.
categories (list[str]): A list of categories associated with the file.
date_modified (str): The date the file was last modified.
date_created (str): The date the file was created.
size_kb (int): The size of the file in kilobytes.
metadata (ImageMetadata | FileMetadata | None): Metadata associated with the file,
which can be either image metadata, file metadata, or None.
dir_item_count (int): The number of items in the directory if the file is a directory.
is_dir (bool): A flag indicating whether the file is a directory.
"""
name: str
path: str
proper_name: str
extension: str
categories: list[str]
date_modified: str
date_created: str
size_kb: int
metadata: ImageMetadata | FileMetadata | None
dir_item_count: int
is_dir: bool
def format_date(timestamp):
return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")
class TemplateHelpers:
def __init__(self, config: Configuration):
self.config: Configuration = config
def _filter_hidden_files(self, files):
return [f for f in files if not f.name.startswith("___")]
def _build_metadata_for_file(self, path: str, categories: list[str] = []):
file_path = self.config.content_dir / path
for k in categories:
if k == "image":
img = Image.open(file_path)
exif = img._getexif()
# Conver exif to dict
orientation = exif.get(274, 1) if exif else 1
width, height = img.width, img.height
if orientation in [5, 6, 7, 8]:
width, height = height, width
exif = {}
try:
img = Image.open(file_path)
exif_raw = img._getexif()
if exif_raw:
exif = {
ExifTags.TAGS[k]: v
for k, v in exif_raw.items()
if k in ExifTags.TAGS
}
except Exception as e:
print(f"Error processing image {file_path}: {e}")
date_taken = exif.get("DateTimeOriginal")
if not date_taken:
date_taken = format_date(file_path.stat().st_ctime)
return ImageMetadata(
width=width,
height=height,
alt=file_path.name,
exif=exif,
)
elif k == "document":
ret = None
with open(file_path, "r") as fdoc:
ret = FileMetadata(None)
if file_path.suffix[1:].lower() == "md":
ret.typeMeta = MarkdownMetadata({}, "", "")
content, c_frontmatter, obj = render_markdown(file_path)
ret.typeMeta.frontmatter = c_frontmatter
ret.typeMeta.content = content
ret.typeMeta.rawContent = read_raw_markdown(file_path)
ret.typeMeta.rawText = rendered_markdown_to_plain_text(
ret.typeMeta.content
)
ret.typeMeta.preview = ret.typeMeta.rawText[:500] + "..."
return ret
return None
def get_folder_contents(self, path: str = ""):
"""
Retrieve the contents of a folder and return a list of TemplateFile objects.
Args:
path (str): The relative path to the folder within the content directory. Defaults to an empty string,
which refers to the root content directory.
Returns:
list: A list of TemplateFile objects representing the files and directories within the specified folder.
The function performs the following steps:
1. Constructs the full path to the folder by combining the content directory with the provided path.
2. Retrieves all files and directories within the specified folder.
3. Iterates over each file and directory, creating a TemplateFile object with metadata such as name,
path, proper name, extension, categories, date modified, date created, size in KB, metadata, directory
item count, and whether it is a directory.
4. If the item is a file, it assigns categories based on the file extension using a predefined mapping.
5. Builds additional metadata for each file.
6. Filters out hidden files from the list.
7. Returns the list of TemplateFile objects.
"""
search_contnet_path = self.config.content_dir / path
files = search_contnet_path.glob("*")
ret = []
for f in files:
t = TemplateFile(
name=f.name,
path=str(f.relative_to(self.config.content_dir)),
proper_name=f.stem,
extension=f.suffix.lower(),
categories=[],
date_modified=format_date(f.stat().st_mtime),
date_created=format_date(f.stat().st_ctime),
size_kb=f.stat().st_size / 1024,
metadata=None,
dir_item_count=len(list(f.glob("*"))) if f.is_dir() else 0,
is_dir=f.is_dir(),
)
if f.is_file():
for k, v in GENERIC_FILE_MAPPING.items():
if f.suffix[1:].lower() in v:
t.categories.append(k)
t.metadata = self._build_metadata_for_file(f, t.categories)
if "image" in t.categories:
# Adjust date_modified and date_created to be the date the image was taken from exif if available
if t.metadata.exif and "DateTimeOriginal" in t.metadata.exif:
t.date_modified = t.metadata.exif["DateTimeOriginal"]
t.date_created = t.metadata.exif["DateTimeOriginal"]
ret.append(t)
ret = self._filter_hidden_files(ret)
return ret
def get_sibling_content_files(self, path: str = ""):
"""
Retrieves a list of sibling content files in the specified directory.
Args:
path (str): The relative path within the content directory to search for files.
Defaults to an empty string, which means the root of the content directory.
Returns:
list: A list of tuples, where each tuple contains the file name and its relative path
to the content directory. Only files that do not start with "___" are included.
"""
search_contnet_path = self.config.content_dir / path
files = search_contnet_path.glob("*")
return [
(file.name, str(file.relative_to(self.config.content_dir)))
for file in files
if file.is_file() and not file.name.startswith("___")
]
def get_text_document_preview(self, path: str):
"""
Generates a preview of the text document located at the given path.
This method reads the first 100 characters from the specified text file
and returns it as a string. The file path is constructed by combining
the content directory from the configuration with the provided path.
Args:
path (str): The relative path to the text document within the content directory.
Returns:
str: A string containing the first 100 characters of the text document.
Raises:
FileNotFoundError: If the file at the specified path does not exist.
IOError: If an I/O error occurs while reading the file.
"""
file_path = self.config.content_dir / path
with open(file_path, "r") as f:
content = f.read(100)
return content
def get_sibling_content_folders(self, path: str = ""):
"""
Retrieves a list of sibling content folders within a specified directory.
Args:
path (str): A relative path from the content directory to search within. Defaults to an empty string,
which means the search will be conducted in the content directory itself.
Returns:
list of tuple: A list of tuples where each tuple contains the folder name and its relative path
to the content directory. Only directories that do not start with "___" are included.
"""
search_contnet_path = self.config.content_dir / path
files = search_contnet_path.glob("*")
return [
(file.name, str(file.relative_to(self.config.content_dir)))
for file in files
if file.is_dir() and not file.name.startswith("___")
]