From c99bced56e3e640b1f10b8f4a420199f4420b642 Mon Sep 17 00:00:00 2001 From: Tanishq Dubey Date: Wed, 9 Jul 2025 18:28:27 -0400 Subject: [PATCH] Many fixes? --- src/config/config.py | 48 +++++++-- src/rendering/helpers.py | 74 ++++++++------ src/rendering/image.py | 4 +- src/rendering/renderer.py | 32 +++--- src/routes/routes.py | 202 +++++++++++++++++++++++--------------- src/server/server.py | 72 +++++++++++++- 6 files changed, 300 insertions(+), 132 deletions(-) diff --git a/src/config/config.py b/src/config/config.py index 44674ba..07ee16b 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -6,6 +6,34 @@ TEMPLATES_DIR = None STYLES_DIR = None class Configuration: + """ + Configuration class for loading and validating application settings from a TOML file. + This class encapsulates the logic for reading configuration data from a specified TOML file, + validating the presence of required sections and keys, and exposing configuration values as + instance attributes. The configuration file is expected to contain at least two sections: + 'paths' (with 'content_dir', 'templates_dir', and 'styles_dir') and 'server' (with optional + server-related settings). + Attributes: + config_path (str or Path): Path to the TOML configuration file. + content_dir (Path): Directory containing content files (required). + templates_dir (Path): Directory containing template files (required). + styles_dir (Path): Directory containing style files (required). + listen_address (str): Address for the server to listen on (default: "127.0.0.1"). + listen_port (int): Port for the server to listen on (default: 8080). + debug (bool): Enable or disable debug mode (default: False). + access_log (bool): Enable or disable access logging (default: True). + max_threads (int): Maximum number of server threads (default: 4). + admin_browser (bool): Enable or disable admin browser access (default: False). + admin_password (str): Password for admin access (optional). + Methods: + load_config(): + Loads and validates configuration data from the TOML file specified by `config_path`. + Raises FileNotFoundError if the file does not exist, tomllib.TOMLDecodeError if the file + is not valid TOML, or ValueError if required sections or keys are missing. + set_globals(): + Sets global variables CONTENT_DIR, TEMPLATES_DIR, and STYLES_DIR based on the loaded + configuration values. + """ def __init__(self, config_path): self.config_path = config_path @@ -23,6 +51,19 @@ class Configuration: self.admin_password: str = None def load_config(self): + """ + Loads and validates configuration data from a TOML file specified by `self.config_path`. + This method reads the configuration file, parses its contents, and sets various instance attributes + based on the configuration values. It expects the configuration file to contain at least two sections: + 'paths' and 'server'. The 'paths' section must include 'content_dir', 'templates_dir', and 'styles_dir'. + The 'server' section may include 'listen_address', 'listen_port', 'debug', 'access_log', 'max_threads', + 'admin_browser', and 'admin_password'. If any required section or key is missing, or if the file is + not found or is invalid TOML, an appropriate exception is raised. + Raises: + FileNotFoundError: If the configuration file does not exist. + tomllib.TOMLDecodeError: If the configuration file is not valid TOML. + ValueError: If required sections or keys are missing in the configuration file. + """ try: with open(self.config_path, "rb") as f: self.config_data = tomllib.load(f) @@ -61,11 +102,4 @@ class Configuration: self.max_threads = server.get("max_threads", self.max_threads) self.admin_browser = server.get("admin_browser", self.admin_browser) self.admin_password = server.get("admin_password", self.admin_password) - - def set_globals(self): - global CONTENT_DIR, TEMPLATES_DIR, STYLES_DIR - CONTENT_DIR = self.content_dir - TEMPLATES_DIR = self.templates_dir - STYLES_DIR = self.styles_dir - diff --git a/src/rendering/helpers.py b/src/rendering/helpers.py index 887f74b..3d7e328 100644 --- a/src/rendering/helpers.py +++ b/src/rendering/helpers.py @@ -90,39 +90,55 @@ class TemplateHelpers: return [f for f in files if not f.name.startswith("___")] def _build_metadata_for_file(self, path: str, categories: list[str] = []): + """ + Builds and returns metadata for a given file based on specified categories. + + Args: + path (str): The relative path to the file within the content directory. + categories (list[str], optional): A list of category strings to determine the type of metadata to extract. + Supported categories include "image" and "document". + + Returns: + ImageMetadata | FileMetadata | None: + - If "image" is in categories and the file is a valid image, returns an ImageMetadata object containing + width, height, alt text, and EXIF data. + - If "document" is in categories and the file is a document (e.g., Markdown), returns a FileMetadata object + with type-specific metadata such as frontmatter, content, raw content, plain text, and preview. + - Returns None if the file cannot be processed or if no supported category matches. + + Notes: + - For images, EXIF orientation is handled to ensure correct width and height. + - For Markdown documents, frontmatter and content are extracted and a text preview is generated. + - Prints an error message and returns None if image processing fails. + """ file_path = self.config.content_dir / path for k in categories: if k == "image": - img = Image.open(file_path) - exif = img._getexif() - # Conver exif to dict - orientation = exif.get(274, 1) if exif else 1 - width, height = img.width, img.height - if orientation in [5, 6, 7, 8]: - width, height = height, width - - exif = {} try: - img = Image.open(file_path) - exif_raw = img._getexif() - if exif_raw: - exif = { - ExifTags.TAGS[k]: v - for k, v in exif_raw.items() - if k in ExifTags.TAGS - } + with Image.open(file_path) as img: + width, height = img.width, img.height + exif_raw = img._getexif() + + exif = {} + if exif_raw: + orientation = exif_raw.get(0x0112, 1) + if orientation in [5, 6, 7, 8]: + width, height = height, width + exif = { + ExifTags.TAGS[k]: v + for k, v in exif_raw.items() + if k in ExifTags.TAGS + } + + return ImageMetadata( + width=width, + height=height, + alt=file_path.name, + exif=exif, + ) except Exception as e: print(f"Error processing image {file_path}: {e}") - - date_taken = exif.get("DateTimeOriginal") - if not date_taken: - date_taken = format_date(file_path.stat().st_ctime) - return ImageMetadata( - width=width, - height=height, - alt=file_path.name, - exif=exif, - ) + return None elif k == "document": ret = None with open(file_path, "r") as fdoc: @@ -174,7 +190,7 @@ class TemplateHelpers: categories=[], date_modified=format_date(f.stat().st_mtime), date_created=format_date(f.stat().st_ctime), - size_kb=f.stat().st_size / 1024, + size_kb=int(f.stat().st_size / 1024), metadata=None, dir_item_count=len(list(f.glob("*"))) if f.is_dir() else 0, is_dir=f.is_dir(), @@ -233,7 +249,7 @@ class TemplateHelpers: IOError: If an I/O error occurs while reading the file. """ file_path = self.config.content_dir / path - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read(100) return content diff --git a/src/rendering/image.py b/src/rendering/image.py index 9d9e74b..c833a4e 100644 --- a/src/rendering/image.py +++ b/src/rendering/image.py @@ -34,9 +34,9 @@ def generate_thumbnail(image_path, resize_percent, min_width, max_width): if orientation == 3: img = img.rotate(180, expand=True) elif orientation == 6: - img = img.rotate(90, expand=True) - elif orientation == 8: img = img.rotate(270, expand=True) + elif orientation == 8: + img = img.rotate(90, expand=True) except (AttributeError, KeyError, IndexError): # cases: image don't have getexif exif = b"" diff --git a/src/rendering/renderer.py b/src/rendering/renderer.py index 6dd19ad..9fe78b7 100644 --- a/src/rendering/renderer.py +++ b/src/rendering/renderer.py @@ -203,22 +203,32 @@ def render_page( ) content = "" + c_frontmatter = None if "document" in category and type == "file": content, c_frontmatter, obj = render_markdown(target_file) if not (template_path / "base.html").exists(): raise Exception("Base template not found") - templates.append(template_path / "base.html") - # Filter templates to only those that exist - for template in templates: - content = render_template_string( - template.read_text(), - content=content, - styles=styles, - currentPath=str(relative_path), - metadata=c_frontmatter if "document" in category and type == "file" else None, - ) + # The first found template is the most specific one for the content. + page_template_path = templates[0] - return content + template_vars = { + "content": content, + "styles": styles, + "currentPath": str(relative_path), + "metadata": c_frontmatter if "document" in category and type == "file" else None, + } + + # First, render the specific page template. + final_content = render_template_string( + page_template_path.read_text(), **template_vars + ) + + # Now, render the base template, providing the result of the page + # template as the 'content' variable. + template_vars["content"] = final_content + return render_template_string( + (template_path / "base.html").read_text(), **template_vars + ) diff --git a/src/routes/routes.py b/src/routes/routes.py index 24254e6..70e909a 100644 --- a/src/routes/routes.py +++ b/src/routes/routes.py @@ -7,70 +7,105 @@ import os class RouteManager: + """ + RouteManager is responsible for handling and validating file system paths for serving content, styles, and static files in a web application. It ensures that all requested paths are securely resolved within configured base directories, prevents path traversal attacks, and restricts access to hidden files or folders. + + Args: + config (Configuration): The configuration object containing directory paths for content, templates, and styles. + + Methods: + _validate_and_sanitize_path(base_dir, requested_path_str): + Validates and sanitizes a requested path to ensure it is within the specified base directory and not a hidden file/folder. Returns a resolved Path object or None if invalid. + + _ensure_route(path): + Ensures the given path is valid and returns the corresponding Path object. Raises an Exception if the path is illegal. + + default_route(path): + Handles the default route for serving content files. Returns a rendered page or an error page if the path is invalid or not found. + + get_style(path): + Serves style files from the styles directory. Returns the file or an error page if the path is invalid or not found. + + get_static(path): + Serves static files from the content directory. If the file is an image, generates and returns a thumbnail. Returns the file or an error page if the path is invalid or not found. + """ + def __init__(self, config: Configuration): self.config = config - def _validate_and_sanitize_path(self, base_dir, requested_path): + def _validate_and_sanitize_path(self, base_dir, requested_path_str: str): """ - Validate and sanitize the requested path to ensure it does not traverse above the base directory. + Validates and sanitizes a requested file system path to ensure it is safe and allowed. - :param base_dir: The base directory that the requested path should be within. - :param requested_path: The requested file path to validate. - :return: A secure version of the requested path if valid, otherwise None. + This method resolves the requested path relative to a given base directory, ensuring: + - The resolved path exists. + - The resolved path is within the base directory (prevents directory traversal attacks). + - The path does not access hidden files or directories (those starting with '___'). + + Args: + base_dir (str or Path): The base directory against which the requested path is resolved. + requested_path_str (str): The user-supplied path to validate and sanitize. + + Returns: + Path or None: The resolved and validated Path object if the path is safe and allowed; + otherwise, None if the path is invalid, does not exist, attempts traversal, + or accesses hidden files/directories. """ - # Normalize both paths - base_dir = Path(base_dir) - requested_path: Path = base_dir / requested_path + try: + base_dir = Path(base_dir).resolve(strict=True) + # a requested path of "" or "." should resolve to the base directory + if not requested_path_str: + requested_path_str = "." + secure_path = (base_dir / requested_path_str).resolve(strict=True) + except FileNotFoundError: + return None # Path does not exist - # Check if the requested path is within the base directory - if requested_path < base_dir: + # The most important check: ensure the resolved path is inside the base directory. + if not secure_path.is_relative_to(base_dir): + print(f"Illegal path traversal attempt: {requested_path_str}") return None - # Ensure the path does not contain any '..' or '.' components - secure_path = os.path.relpath(requested_path, base_dir) - secure_path_parts = secure_path.split(os.sep) - - for part in secure_path_parts: - if part == "." or part == "..": - print("Illegal path nice try") - return None - - # Reconstruct the secure path - secure_path = os.path.join(base_dir, *secure_path_parts) - secure_path = Path(secure_path) - - # Check if path exists - if not secure_path.exists(): - raise Exception("Illegal path") - - for part in secure_path.parts: - if part.startswith("___"): - print("hidden file") - raise Exception("Illegal path") + # Check for hidden files/folders (starting with '___') + relative_parts = secure_path.relative_to(base_dir).parts + # Also check the final component for the case where path is the base_dir itself. + if any( + part.startswith("___") for part in relative_parts + ) or secure_path.name.startswith("___"): + print(f"Illegal access to hidden path: {requested_path_str}") + return None return secure_path def _ensure_route(self, path: str): - file_path: Path = self.config.content_dir / (path if path else "index.md") - if file_path < self.config.content_dir: - raise Exception("Illegal path") - - if not self._validate_and_sanitize_path( - self.config.content_dir, str(file_path) - ): + file_path = self._validate_and_sanitize_path(self.config.content_dir, path) + if not file_path: raise Exception("Illegal path") + return file_path def default_route(self, path: str): + """ + Handles the default route for serving content pages. + + Attempts to resolve the given path to a file within the content directory. + If the path is empty, defaults to "index.md". If the file is not found or an error occurs, + renders a 404 error page. Otherwise, renders the requested page using the specified + template and style directories. + + Args: + path (str): The requested path to resolve and serve. + + Returns: + Response: The rendered page or an error page if the file is not found. + """ try: - self._ensure_route(path) - except Exception as e: + file_path = self._ensure_route(path if path else "index.md") + except Exception as _: return render_error_page( 404, "Not Found", "The requested resource was not found on this server.", self.config.templates_dir, ) - file_path: Path = self.config.content_dir / (path if path else "index.md") return render_page( file_path, base_path=self.config.content_dir, @@ -79,19 +114,45 @@ class RouteManager: ) def get_style(self, path: str): - try: - self._validate_and_sanitize_path(self.config.styles_dir, path) - except Exception as e: + """ + Retrieves and serves a style file from the configured styles directory. + + Args: + path (str): The relative path to the requested style file. + + Returns: + Response: A Flask response object containing the requested file if found, + or an error page with a 404 status code if the file does not exist. + """ + file_path = self._validate_and_sanitize_path(self.config.styles_dir, path) + if not file_path: return render_error_page( 404, "Not Found", - f"The requested resource was not found on this server. {e}", + "The requested resource was not found on this server.", self.config.templates_dir, ) - file_path: Path = self.config.styles_dir / path - if file_path.exists(): - return send_file(file_path) - else: + return send_file(file_path) + + def get_static(self, path: str): + """ + Serves static files from the configured content directory. + + If the requested file is an image (JPEG, PNG, or GIF), generates and returns a thumbnail + with a maximum width specified by the 'max_width' query parameter (default: 2048). + Otherwise, serves the file as-is. + + Args: + path (str): The relative path to the requested static file. + + Returns: + Response: + - If the file is not found or invalid, returns a rendered 404 error page. + - If the file is an image, returns the thumbnail bytes with appropriate headers. + - Otherwise, returns the file using Flask's send_file. + """ + file_path = self._validate_and_sanitize_path(self.config.content_dir, path) + if not file_path: return render_error_page( 404, "Not Found", @@ -99,35 +160,18 @@ class RouteManager: self.config.templates_dir, ) - def get_static(self, path: str): - try: - self._validate_and_sanitize_path(self.config.content_dir, path) - except Exception as e: - return render_error_page( - 404, - "Not Found", - "The requested resource was not found on this server.", - self.config.templates_dir, + # Check to see if the file is an image, if it is, render a thumbnail + if file_path.suffix.lower() in [".jpg", ".jpeg", ".png", ".gif"]: + max_width = request.args.get("max_width", default=2048, type=int) + thumbnail_bytes, img_format = generate_thumbnail( + str(file_path), 10, 2048, max_width ) - file_path: Path = self.config.content_dir / path - if file_path.exists(): - # Check to see if the file is an image, if it is, render a thumbnail - if file_path.suffix.lower() in [".jpg", ".jpeg", ".png", ".gif"]: - max_width = request.args.get("max_width", default=2048, type=int) - thumbnail_bytes, img_format = generate_thumbnail( - str(file_path), 10, 2048, max_width - ) - return ( - thumbnail_bytes, - 200, - {"Content-Type": f"image/{img_format.lower()}", - "cache-control": "public, max-age=31536000"}, - ) - return send_file(file_path) - else: - return render_error_page( - 404, - "Not Found", - "The requested resource was not found on this server.", - self.config.templates_dir, + return ( + thumbnail_bytes, + 200, + { + "Content-Type": f"image/{img_format.lower()}", + "cache-control": "public, max-age=31536000", + }, ) + return send_file(file_path) diff --git a/src/server/server.py b/src/server/server.py index da7ed25..eaaf561 100644 --- a/src/server/server.py +++ b/src/server/server.py @@ -6,7 +6,29 @@ import multiprocessing class Server(BaseApplication): - + """ + Server class for managing a Flask web application with Gunicorn integration. + This class extends BaseApplication to provide a configurable server environment + for Flask applications. It supports custom template functions, dynamic worker/thread + configuration, and flexible server options. + Attributes: + debug (bool): Enables or disables debug mode for the Flask app. + host (str): The hostname or IP address to bind the server to. + port (int): The port number to listen on. + app (Flask): The Flask application instance. + application (Flask): Alias for the Flask application instance. + options (dict): Gunicorn server options such as bind address, reload, threads, and access log. + Methods: + __init__(self, debug=True, host="0.0.0.0", port=8080, template_functions=None, workers=..., access_log=True, options=None): + Initializes the Server instance with the specified configuration and registers template functions. + register_template_function(self, name, func): + Registers a Python function to be available in Jinja2 templates. + load_config(self): + Loads configuration options from self.options into the Gunicorn config object. + load(self): + Returns the Flask application instance managed by the server. + register_route(self, route, func, defaults=None): + """ def __init__( self, debug: bool = True, @@ -32,17 +54,42 @@ class Server(BaseApplication): "threads": workers, "accesslog": "-" if access_log else None, } - for name, func in template_functions.items(): - self.register_template_function(name, func) super().__init__() + for name, func in template_functions.items(): self.register_template_function(name, func) - super(Server, self).__init__() def register_template_function(self, name, func): + """ + Register a function to be available in Jinja2 templates. + + This method adds a Python function to the Jinja2 environment's globals, + making it available for use in all templates rendered by the application. + + Parameters: + ---------- + name : str + The name under which the function will be accessible in templates + func : callable + The Python function to register + + Examples: + -------- + >>> server.register_template_function('format_date', lambda d: d.strftime('%Y-%m-%d')) + >>> # In template: {{ format_date(some_date) }} + """ self.app.jinja_env.globals.update({name: func}) def load_config(self): + """ + Loads configuration options from self.options into self.cfg. + + This method filters out options that are not in self.cfg.settings or have None values. + The filtered options are then set in the configuration object (self.cfg) with lowercase keys. + + Returns: + None + """ config = { key: value for key, value in self.options.items() @@ -52,7 +99,24 @@ class Server(BaseApplication): self.cfg.set(key.lower(), value) def load(self): + """ + Returns the application instance associated with the server. + + Returns: + Application: The application object managed by the server. + """ return self.application def register_route(self, route, func, defaults=None): + """ + Registers a new route with the Flask application. + + Args: + route (str): The URL route to register. + func (callable): The view function to associate with the route. + defaults (dict, optional): A dictionary of default values for the route variables. Defaults to None. + + Returns: + None + """ self.app.add_url_rule(route, func.__name__, func, defaults=defaults)