filmsim/filmcolor

#!/usr/bin/env -S uv run --script
# /// script
# dependencies = [
#   "numpy",
#   "scipy",
#   "Pillow",
#   "imageio",
#   "rawpy",
#   "colour-science",
#   "torch",
#   "warp-lang",
#   "scikit-image",
# ]
# [tool.uv.sources]
# torch = [{ index = "pytorch-cu128" }]
# [[tool.uv.index]]
# name = "pytorch-cu128"
# url = "https://download.pytorch.org/whl/cu128"
# explicit = true
# ///

import argparse
import json
import math
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Optional

import colour
import imageio.v3 as iio
import numpy as np
import rawpy
import torch
import torch.nn.functional as F
import warp as wp
from colour.colorimetry import SDS_ILLUMINANTS
from scipy.integrate import quad
from scipy.interpolate import interp1d
from scipy.ndimage import (gaussian_filter, gaussian_filter1d,  # For LUTs
                           map_coordinates)
from scipy.signal.windows import gaussian  # For creating Gaussian kernel

# --- Configuration ---
EPSILON = 1e-10
LUT_SIZE = 65
GLOBAL_DEBUG = True
RUN_TIMESTAMP = datetime.now().strftime("%Y-%m-%d_%H%M%S")
DEBUG_OUTPUT_DIR = Path(f"debug_outputs/{RUN_TIMESTAMP}")


def save_debug_image(image: np.ndarray, tag: str, output_format: str = "jpeg"):
    """
    Saves a debug image at any point in the processing pipeline.

    The function takes a NumPy array (assumed to be float data in the [0.0, 1.0] range),
    a descriptive tag, and an optional format. It saves the image to a timestamped
    sub-directory within a `debug_outputs` folder.

    Args:
        image (np.ndarray): The image data to save. Expected to be a floating-point
                            array with values scaled between 0.0 and 1.0.
        tag (str): A descriptive name for this processing stage (e.g.,
                   "after_exposure_lut", "final_linear_image"). This will be
                   part of the filename.
        output_format (str, optional): The desired output format. Can be 'jpeg' (or 'jpg')
                                       for 8-bit JPEG, or 'tiff' (or 'tif') for 16-bit
                                       TIFF. Defaults to 'jpeg'.
    """
    global GLOBAL_DEBUG, DEBUG_OUTPUT_DIR
    if not GLOBAL_DEBUG:
        # If global debug is off, do nothing
        return
    try:
        # 1. Ensure the debug directory for this run exists.
        DEBUG_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

        # 2. Sanitize the tag to create a safe filename.
        # Removes invalid characters and replaces spaces with underscores.
        safe_tag = (
            "".join(c for c in tag if c.isalnum() or c in (" ", "_", "-"))
            .rstrip()
            .replace(" ", "_")
        )

        # 3. Create a unique filename with a precise timestamp.
        image_timestamp = datetime.now().strftime("%H%M%S_%f")

        # 4. Determine file extension and prepare the final image data.
        # Clip the float image data to the [0.0, 1.0] range to prevent wrap-around
        # errors or artifacts when converting to integer types.
        clipped_image = np.clip(image, 0.0, 1.0)

        if "tif" in output_format.lower():
            # Convert to 16-bit for TIFF
            output_image = (clipped_image * 65535.0).astype(np.uint16)
            extension = "tiff"
        else:
            # Convert to 8-bit for JPEG
            output_image = (clipped_image * 255.0).astype(np.uint8)
            extension = "jpg"

        filename = f"{image_timestamp}_{safe_tag}.{extension}"
        filepath = DEBUG_OUTPUT_DIR / filename

        # 5. Save the image and print a confirmation message.
        iio.imwrite(filepath, output_image)
        print(f"✅ DEBUG: Saved '{tag}' to '{filepath}'")

    except Exception as e:
        # If anything goes wrong, print a warning but don't crash the main script.
        print(
            f"⚠️ DEBUG WARNING: Could not save debug image for tag '{tag}'.\n   Reason: {e}"
        )

class Info:
    name: str
    description: str
    format_mm: int
    version: str

    def __init__(
        self, name: str, description: str, format_mm: int, version: str
    ) -> None:
        self.name, self.description, self.format_mm, self.version = (
            name,
            description,
            format_mm,
            version,
        )

class Balance:
    r_shift: float
    g_shift: float
    b_shift: float

    def __init__(self, r_shift: float, g_shift: float, b_shift: float) -> None:
        self.r_shift, self.g_shift, self.b_shift = r_shift, g_shift, b_shift

class Gamma:
    r_factor: float
    g_factor: float
    b_factor: float

    def __init__(self, r_factor: float, g_factor: float, b_factor: float) -> None:
        self.r_factor, self.g_factor, self.b_factor = r_factor, g_factor, b_factor

class Processing:
    gamma: Gamma
    balance: Balance

    def __init__(self, gamma: Gamma, balance: Balance) -> None:
        self.gamma, self.balance = gamma, balance


class Couplers:
    saturation_amount: float
    dir_amount_rgb: List[float]
    dir_diffusion_um: float
    dir_diffusion_interlayer: float

    def __init__(
        self,
        saturation_amount: float,
        dir_amount_rgb: List[float],
        dir_diffusion_um: float,
        dir_diffusion_interlayer: float,
    ) -> None:
        (
            self.saturation_amount,
            self.dir_amount_rgb,
            self.dir_diffusion_um,
            self.dir_diffusion_interlayer,
        ) = (
            saturation_amount,
            dir_amount_rgb,
            dir_diffusion_um,
            dir_diffusion_interlayer,
        )


class HDCurvePoint:
    d: Optional[float]
    r: float
    g: float
    b: float

    def __init__(self, d: Optional[float], r: float, g: float, b: float) -> None:
        self.d, self.r, self.g, self.b = d, r, g, b


class SpectralSensitivityCurvePoint:
    wavelength: float
    y: float
    m: float
    c: float

    def __init__(self, wavelength: float, y: float, m: float, c: float) -> None:
        self.wavelength, self.y, self.m, self.c = wavelength, y, m, c


class RGBValue:
    r: float
    g: float
    b: float

    def __init__(self, r: float, g: float, b: float) -> None:
        self.r, self.g, self.b = r, g, b


class Curves:
    hd: List[HDCurvePoint]
    spectral_sensitivity: List[SpectralSensitivityCurvePoint]

    def __init__(
        self,
        hd: List[HDCurvePoint],
        spectral_sensitivity: List[SpectralSensitivityCurvePoint],
    ) -> None:
        self.hd, self.spectral_sensitivity = hd, spectral_sensitivity


class Halation:
    strength: RGBValue
    size_um: RGBValue

    def __init__(self, strength: RGBValue, size_um: RGBValue) -> None:
        self.strength, self.size_um = strength, size_um


class Interlayer:
    diffusion_um: float

    def __init__(self, diffusion_um: float) -> None:
        self.diffusion_um = diffusion_um


class Calibration:
    iso: int
    middle_gray_logE: float

    def __init__(self, iso: int, middle_gray_logE: float) -> None:
        self.iso, self.middle_gray_logE = iso, middle_gray_logE


class Properties:
    halation: Halation
    couplers: Couplers
    interlayer: Interlayer
    curves: Curves
    calibration: Calibration

    def __init__(
        self,
        halation: Halation,
        couplers: Couplers,
        interlayer: Interlayer,
        curves: Curves,
        calibration: Calibration,
    ) -> None:
        self.halation, self.couplers, self.interlayer, self.curves, self.calibration = (
            halation,
            couplers,
            interlayer,
            curves,
            calibration,
        )


class FilmDatasheet:
    info: Info
    processing: Processing
    properties: Properties

    def __init__(
        self, info: Info, processing: Processing, properties: Properties
    ) -> None:
        self.info, self.processing, self.properties = info, processing, properties


def _gpu_separable_gaussian_blur(
    image_tensor: torch.Tensor, sigma: float, device: str = "cpu"
) -> torch.Tensor:
    """
    Applies a fast, separable Gaussian blur to a tensor on a specified device.

    Args:
        image_tensor (torch.Tensor): The input image tensor, shape (B, C, H, W).
        sigma (float): The standard deviation of the Gaussian kernel.
        device (str): The device to run on ('cpu', 'cuda', 'mps').

    Returns:
        torch.Tensor: The blurred image tensor.
    """
    kernel_radius = int(math.ceil(3 * sigma))
    kernel_size = 2 * kernel_radius + 1

    # Create a 1D Gaussian kernel
    x = torch.arange(
        -kernel_radius, kernel_radius + 1, dtype=torch.float32, device=device
    )
    kernel_1d = torch.exp(-0.5 * (x / sigma) ** 2)
    kernel_1d /= kernel_1d.sum()

    # Get tensor shape
    B, C, H, W = image_tensor.shape

    # Prepare kernels for 2D convolution
    # To blur horizontally, the kernel shape is (C, 1, 1, kernel_size)
    kernel_h = kernel_1d.view(1, 1, 1, kernel_size).repeat(C, 1, 1, 1)

    # To blur vertically, the kernel shape is (C, 1, kernel_size, 1)
    kernel_v = kernel_1d.view(1, 1, kernel_size, 1).repeat(C, 1, 1, 1)

    # Apply horizontal blur
    # padding='same' requires PyTorch 1.9+. For older versions, calculate padding manually.
    padding_h = (kernel_size // 2, 0)
    blurred_tensor = F.conv2d(image_tensor, kernel_h, padding=padding_h, groups=C)

    # Apply vertical blur
    padding_v = (0, kernel_size // 2)
    blurred_tensor = F.conv2d(blurred_tensor, kernel_v, padding=padding_v, groups=C)

    return blurred_tensor

# --- Datasheet Parsing (unchanged) ---
def parse_datasheet_json(json_filepath) -> FilmDatasheet | None:
    # This function remains identical to your last version
    # ...
    if not os.path.exists(json_filepath):
        print(f"Error: Datasheet file not found at {json_filepath}", file=sys.stderr)
        return None
    try:
        with open(json_filepath, "r") as jsonfile:
            data = json.load(jsonfile)
            info = Info(
                name=data["info"]["name"],
                description=data["info"]["description"],
                format_mm=data["info"]["format_mm"],
                version=data["info"]["version"],
            )
            gamma = Gamma(
                r_factor=data["processing"]["gamma"]["r_factor"],
                g_factor=data["processing"]["gamma"]["g_factor"],
                b_factor=data["processing"]["gamma"]["b_factor"],
            )
            balance = Balance(
                r_shift=data["processing"]["balance"]["r_shift"],
                g_shift=data["processing"]["balance"]["g_shift"],
                b_shift=data["processing"]["balance"]["b_shift"],
            )
            processing = Processing(gamma=gamma, balance=balance)
            halation = Halation(
                strength=RGBValue(
                    r=data["properties"]["halation"]["strength"]["r"],
                    g=data["properties"]["halation"]["strength"]["g"],
                    b=data["properties"]["halation"]["strength"]["b"],
                ),
                size_um=RGBValue(
                    r=data["properties"]["halation"]["size_um"]["r"],
                    g=data["properties"]["halation"]["size_um"]["g"],
                    b=data["properties"]["halation"]["size_um"]["b"],
                ),
            )
            couplers = Couplers(
                saturation_amount=data["properties"]["couplers"]["saturation_amount"],
                dir_amount_rgb=data["properties"]["couplers"]["dir_amount_rgb"],
                dir_diffusion_um=data["properties"]["couplers"]["dir_diffusion_um"],
                dir_diffusion_interlayer=data["properties"]["couplers"][
                    "dir_diffusion_interlayer"
                ],
            )
            interlayer = Interlayer(
                diffusion_um=data["properties"]["interlayer"]["diffusion_um"]
            )
            calibration = Calibration(
                iso=data["properties"]["calibration"]["iso"],
                middle_gray_logE=data["properties"]["calibration"]["middle_gray_logE"],
            )
            curves = Curves(
                hd=[
                    HDCurvePoint(d=point["d"], r=point["r"], g=point["g"], b=point["b"])
                    for point in data["properties"]["curves"]["hd"]
                ],
                spectral_sensitivity=[
                    SpectralSensitivityCurvePoint(
                        wavelength=point["wavelength"],
                        y=point["y"],
                        m=point["m"],
                        c=point["c"],
                    )
                    for point in data["properties"]["curves"]["spectral_sensitivity"]
                ],
            )
            properties = Properties(
                calibration=calibration,
                halation=halation,
                couplers=couplers,
                interlayer=interlayer,
                curves=curves,
            )
            return FilmDatasheet(
                info=info, processing=processing, properties=properties
            )
    except Exception as e:
        print(f"Error parsing datasheet JSON '{json_filepath}': {e}", file=sys.stderr)
        return None

# --- Core Simulation Functions ---

def apply_3d_lut(image: np.ndarray, lut: np.ndarray, device="cpu") -> np.ndarray:
    """
    Applies a 3D LUT to an image using PyTorch's grid_sample.

    Args:
        image: Input image, shape (H, W, 3), values in [0, 1].
        lut: 3D LUT, shape (N, N, N, 3).
        device: 'cpu' or 'cuda' for GPU acceleration.

    Returns:
        Output image, shape (H, W, 3).
    """
    # Ensure data is float32, which is standard for torch
    if image.dtype != np.float32:
        image = image.astype(np.float32)
    if lut.dtype != np.float32:
        lut = lut.astype(np.float32)

    # Convert to torch tensors and move to the target device
    image_torch = torch.from_numpy(image).to(device)
    lut_torch = torch.from_numpy(lut).to(device)

    # Prepare image coordinates for grid_sample
    # grid_sample expects coordinates in the range [-1, 1]
    # The image is used as the grid of sampling coordinates
    # We add a batch and depth dimension to match the 5D input requirement
    # Shape: (H, W, 3) -> (1, 1, H, W, 3)
    grid = image_torch * 2 - 1  # Scale from [0, 1] to [-1, 1]
    grid = grid.unsqueeze(0).unsqueeze(0)  # Add batch and depth dims

    # Prepare LUT for grid_sample
    # grid_sample expects the input grid to be (N, C, D_in, H_in, W_in)
    # Our LUT is (N, N, N, 3), which is (D, H, W, C)
    # Permute to (C, D, H, W) -> (3, N, N, N) and add a batch dimension
    # Shape: (N, N, N, 3) -> (1, 3, N, N, N)
    lut_torch = lut_torch.permute(3, 0, 1, 2).unsqueeze(0)

    # Apply the LUT
    # align_corners=True ensures that the corners of the LUT cube map to -1 and 1
    result_torch = F.grid_sample(lut_torch, grid, mode="bilinear", align_corners=True)

    # Reshape the output and convert back to numpy
    # Shape: (1, 3, 1, H, W) -> (H, W, 3)
    result_numpy = result_torch.squeeze().permute(1, 2, 0).cpu().numpy()

    return result_numpy

def create_exposure_lut(
    spectral_data: list[SpectralSensitivityCurvePoint],
    ref_illuminant_spd,
    middle_gray_logE: float,
    size: int,
) -> np.ndarray:
    """Generates a 3D LUT for the linear sRGB -> Log Exposure conversion."""
    print(f"Generating {size}x{size}x{size} exposure LUT...")

    # Create a grid of sRGB input values
    grid_points = np.linspace(0.0, 1.0, size)
    r, g, b = np.meshgrid(grid_points, grid_points, grid_points, indexing="ij")
    # Reshape grid into an "image" for batch processing
    srgb_grid = np.stack([r, g, b], axis=-1).reshape(
        -1, 3
    )  # Shape: (size*size*size, 3)
    srgb_grid = srgb_grid.reshape(size, size * size, 3)  # Make it 2D for the function

    # Run the full spectral calculation on this grid
    lut_data = calculate_film_log_exposure(
        srgb_grid, spectral_data, ref_illuminant_spd, middle_gray_logE, EPSILON
    )

    # Reshape back into a 3D LUT
    return lut_data.reshape(size, size, size, 3)

def create_density_lut(
    processing: Processing,
    hd_curve: List[HDCurvePoint],
    middle_gray_logE: float,
    log_exposure_range: tuple,
    size: int,
) -> np.ndarray:
    """Generates a 3D LUT for the Log Exposure -> Density conversion."""
    print(
        f"Generating {size}x{size}x{size} density LUT for range {log_exposure_range}..."
    )
    le_min, le_max = log_exposure_range

    grid_points = np.linspace(le_min, le_max, size)
    le_r, le_g, le_b = np.meshgrid(grid_points, grid_points, grid_points, indexing="ij")
    log_exposure_grid = np.stack([le_r, le_g, le_b], axis=-1)

    lut_data = apply_hd_curves(
        log_exposure_grid, processing, hd_curve, middle_gray_logE
    )
    return lut_data

def compute_inhibitor_matrix(
    amount_rgb: List[float], diffusion_interlayer: float
) -> np.ndarray:
    matrix = np.eye(3)
    if diffusion_interlayer > 0:
        matrix = gaussian_filter1d(
            matrix, sigma=diffusion_interlayer, axis=0, mode="constant", cval=0
        )
        matrix = gaussian_filter1d(
            matrix, sigma=diffusion_interlayer, axis=1, mode="constant", cval=0
        )
    row_sums = matrix.sum(axis=1)
    row_sums[row_sums == 0] = 1  # Avoid division by zero
    matrix = matrix / row_sums[:, np.newaxis]
    return matrix * np.array(amount_rgb)[:, np.newaxis]

def compute_uncoupled_hd_curves(
    hd_curve_data: List[HDCurvePoint], inhibitor_matrix: np.ndarray
) -> List[HDCurvePoint]:
    log_E_values = np.array([p.d for p in hd_curve_data if p.d is not None])
    density_r, density_g, density_b = (
        np.array([p.r for p in hd_curve_data]),
        np.array([p.g for p in hd_curve_data]),
        np.array([p.b for p in hd_curve_data]),
    )
    source_density_matrix = np.tile(density_g[:, np.newaxis], (1, 3))
    inhibitor_effect = source_density_matrix @ inhibitor_matrix
    uncoupled_r = np.interp(
        log_E_values, log_E_values - inhibitor_effect[:, 0], density_r
    )
    uncoupled_g = np.interp(
        log_E_values, log_E_values - inhibitor_effect[:, 1], density_g
    )
    uncoupled_b = np.interp(
        log_E_values, log_E_values - inhibitor_effect[:, 2], density_b
    )
    return [
        HDCurvePoint(d=log_e, r=uncoupled_r[i], g=uncoupled_g[i], b=uncoupled_b[i])
        for i, log_e in enumerate(log_E_values)
    ]

def apply_dir_coupler_simulation(
    log_exposure_rgb,
    naive_density_rgb,
    inhibitor_matrix,
    diffusion_um,
    film_format_mm,
    image_width_px,
):
    diffusion_pixels = um_to_pixels(diffusion_um, image_width_px, film_format_mm)
    inhibitor_signal_diffused = (
        gaussian_filter(
            naive_density_rgb, sigma=(diffusion_pixels, diffusion_pixels, 0)
        )
        if diffusion_pixels > EPSILON
        else naive_density_rgb
    )
    inhibitor_effect = np.einsum(
        "...s, sm -> ...m", inhibitor_signal_diffused, inhibitor_matrix
    )
    return log_exposure_rgb - inhibitor_effect

def um_to_pixels(sigma_um, image_width_px, film_format_mm):
    if film_format_mm <= 0 or image_width_px <= 0:
        return 0
    microns_per_pixel = (film_format_mm * 1000.0) / image_width_px
    return sigma_um / microns_per_pixel

def apply_hd_curves(
    log_exposure_rgb,
    processing: Processing,
    hd_curve: List[HDCurvePoint],
    middle_gray_logE: float,
) -> np.ndarray:
    density_rgb, gamma_factors, balance_shifts = (
        np.zeros_like(log_exposure_rgb),
        [
            processing.gamma.r_factor,
            processing.gamma.g_factor,
            processing.gamma.b_factor,
        ],
        [
            processing.balance.r_shift,
            processing.balance.g_shift,
            processing.balance.b_shift,
        ],
    )
    log_e_points = [p.d for p in hd_curve if p.d is not None]
    min_logE, max_logE = log_e_points[0], log_e_points[-1]
    for i, channel in enumerate(["R", "G", "B"]):
        gamma_factor = gamma_factors[i]
        log_exposure_adjusted = middle_gray_logE + (
            log_exposure_rgb[..., i] - middle_gray_logE
        ) / (gamma_factor if abs(gamma_factor) > EPSILON else EPSILON)
        y_points = (
            [p.r for p in hd_curve]
            if channel == "R"
            else [p.g for p in hd_curve] if channel == "G" else [p.b for p in hd_curve]
        )
        interp_func = interp1d(
            log_e_points,
            y_points,
            kind="linear",
            bounds_error=False,
            fill_value=(y_points[0], y_points[-1]),  # type: ignore
        )
        density_rgb[..., i] = np.maximum(
            interp_func(np.clip(log_exposure_adjusted, min_logE, max_logE))
            + balance_shifts[i],
            0,
        )
    return density_rgb

def apply_saturation_rgb(image_linear, saturation_factor):
    if saturation_factor == 1.0:
        return image_linear
    luminance = np.einsum(
        "...c, c -> ...", image_linear, np.array([0.2126, 0.7152, 0.0722])
    )
    return np.clip(
        np.expand_dims(luminance, axis=-1)
        + saturation_factor * (image_linear - np.expand_dims(luminance, axis=-1)),
        0.0,
        1.0,
    )

def apply_spatial_effects(
    image,
    film_format_mm,
    couplerData: Couplers,
    interlayerData: Interlayer,
    halationData: Halation,
    image_width_px,
):
    sigma_pixels_diffusion = um_to_pixels(
        interlayerData.diffusion_um, image_width_px, film_format_mm
    )
    if sigma_pixels_diffusion > EPSILON:
        image = gaussian_filter(
            image,
            sigma=[sigma_pixels_diffusion, sigma_pixels_diffusion, 0],
            mode="nearest",
        )
    halation_applied, blurred_image_halation = False, np.copy(image)
    strengths, sizes_um = [
        halationData.strength.r,
        halationData.strength.g,
        halationData.strength.b,
    ], [halationData.size_um.r, halationData.size_um.g, halationData.size_um.b]
    for i in range(3):
        sigma_pixels_halation = um_to_pixels(
            sizes_um[i], image_width_px, film_format_mm
        )
        if strengths[i] > EPSILON and sigma_pixels_halation > EPSILON:
            halation_applied = True
            channel_blurred = gaussian_filter(
                image[..., i], sigma=sigma_pixels_halation, mode="nearest"
            )
            blurred_image_halation[..., i] = (
                image[..., i] * (1.0 - strengths[i]) + channel_blurred * strengths[i]
            )
    return (
        np.clip(blurred_image_halation, 0.0, 1.0)
        if halation_applied
        else np.clip(image, 0.0, 1.0)
    )

def calculate_film_log_exposure(
    image_linear_srgb,
    spectral_data: list[SpectralSensitivityCurvePoint],
    ref_illuminant_spd,
    middle_gray_logE,
    EPSILON,
):
    """
    Converts linear sRGB values to per-channel log exposure values that the
    film's layers would receive. This version includes per-channel calibration.
    """
    common_shape, common_wavelengths = (
        colour.SpectralShape(380, 780, 5),
        colour.SpectralShape(380, 780, 5).wavelengths,
    )
    # The order here is critical: C, M, Y dye layers are sensitive to R, G, B light respectively.
    sensitivities = np.stack(
        [
            interp1d(
                [p.wavelength for p in spectral_data],
                [p.c for p in spectral_data], # Cyan layer is Red-sensitive
                bounds_error=False,
                fill_value=0,
            )(common_wavelengths),
            interp1d(
                [p.wavelength for p in spectral_data],
                [p.m for p in spectral_data], # Magenta layer is Green-sensitive
                bounds_error=False,
                fill_value=0,
            )(common_wavelengths),
            interp1d(
                [p.wavelength for p in spectral_data],
                [p.y for p in spectral_data], # Yellow layer is Blue-sensitive
                bounds_error=False,
                fill_value=0,
            )(common_wavelengths),
        ],
        axis=-1,
    )
    illuminant_aligned = ref_illuminant_spd.copy().align(common_shape)
    mallett_basis_aligned = (
        colour.recovery.MSDS_BASIS_FUNCTIONS_sRGB_MALLETT2019.copy().align(common_shape)
    )
    spectral_reflectance = np.einsum(
        "...c, kc -> ...k", image_linear_srgb, mallett_basis_aligned.values
    )
    light_from_scene = spectral_reflectance * illuminant_aligned.values
    film_exposure_values = np.einsum(
        "...k, ks -> ...s", light_from_scene, sensitivities
    )
    gray_reflectance = np.einsum(
        "c, kc -> k", np.array([0.184, 0.184, 0.184]), mallett_basis_aligned.values
    )
    gray_light = gray_reflectance * illuminant_aligned.values
    exposure_18_gray_film = np.einsum("k, ks -> s", gray_light, sensitivities)

    # --- CORRECTED LOGIC ---
    # Instead of a single scalar shift based on the green channel, we calculate
    # a vector of three shifts, one for each channel (R, G, B). This ensures
    # each layer is independently calibrated against its own response to gray.
    log_shift_per_channel = middle_gray_logE - np.log10(exposure_18_gray_film + EPSILON)

    # Apply the per-channel shift to the per-channel exposure values.
    return np.log10(film_exposure_values + EPSILON) + log_shift_per_channel

def apply_spatial_effects_new(
    image: np.ndarray,
    film_format_mm: int,
    interlayerData: Interlayer,
    halationData: Halation,
    image_width_px: int,
    device: str = "cpu",
    halation_threshold: float = 0.8,
) -> np.ndarray:
    """
    Applies realistic, performant spatial effects (interlayer diffusion and halation).

    Args:
        image (np.ndarray): Input linear image array (H, W, 3).
        film_format_mm (int): Width of the film format in mm.
        interlayerData (Interlayer): Object with interlayer diffusion data.
        halationData (Halation): Object with halation strength and size data.
        image_width_px (int): Width of the input image in pixels.
        device (str): Device for torch operations ('cpu', 'cuda', 'mps').
        halation_threshold (float): Linear brightness value above which halation occurs.

    Returns:
        np.ndarray: Image with spatial effects applied.
    """
    # 1. Setup: Convert numpy image to a torch tensor for GPU processing
    # Tensor shape convention is (Batch, Channels, Height, Width)
    image_tensor = (
        torch.from_numpy(image.astype(np.float32))
        .permute(2, 0, 1)
        .unsqueeze(0)
        .to(device)
    )

    # 2. Interlayer Diffusion: A subtle, overall blur simulating light scatter.
    sigma_pixels_diffusion = um_to_pixels(
        interlayerData.diffusion_um, image_width_px, film_format_mm
    )
    if sigma_pixels_diffusion > EPSILON:
        diffused_tensor = _gpu_separable_gaussian_blur(
            image_tensor, sigma_pixels_diffusion, device
        )
    else:
        diffused_tensor = image_tensor

    # 3. Halation: A more physically-based model
    # a. Create a "halation source" mask from the brightest parts of the image.
    luminance = torch.einsum(
        "bchw, c -> bhw",
        [diffused_tensor, torch.tensor([0.2126, 0.7152, 0.0722]).to(device)],
    ).unsqueeze(1)

    # Apply threshold: only bright areas contribute to the glow.
    halation_source = torch.relu(luminance - halation_threshold)

    # b. Create the colored glow by blurring the source mask with per-channel settings.
    halation_strengths = torch.tensor(
        [halationData.strength.r, halationData.strength.g, halationData.strength.b],
        device=device,
    ).view(1, 3, 1, 1)

    halation_sizes_px = [
        um_to_pixels(halationData.size_um.r, image_width_px, film_format_mm),
        um_to_pixels(halationData.size_um.g, image_width_px, film_format_mm),
        um_to_pixels(halationData.size_um.b, image_width_px, film_format_mm),
    ]

    # Use a downsampling-upsampling pyramid for a very fast, high-quality large blur.
    # This is much faster than a single large convolution.
    halation_glow = torch.zeros_like(diffused_tensor)

    # Initial source for the pyramid
    mip_source = halation_source

    for i in range(4):  # 4 levels of downsampling for a wide, soft glow
        # Blur the current mip level
        mip_blurred = _gpu_separable_gaussian_blur(
            mip_source, 2.0, device
        )  # Small blur at each level

        # Upsample to full size to be added to the final glow
        # Note: 'bilinear' upsampling ensures a smooth result
        upsampled_glow = F.interpolate(
            mip_blurred,
            size=diffused_tensor.shape[2:],
            mode="bilinear",
            align_corners=False,
        )

        # Add this layer's contribution to the total glow
        halation_glow += upsampled_glow

        # Downsample for the next iteration, if not the last level
        if i < 3:
            mip_source = F.max_pool2d(mip_source, kernel_size=2, stride=2)

    # c. Apply color tint and strength, then add the final glow to the image
    # The glow is tinted by the halation strength colors and added to the diffused image
    final_image_tensor = diffused_tensor + (halation_glow * halation_strengths)

    # 4. Finalize: Clip values and convert back to a numpy array
    final_image_tensor = torch.clamp(final_image_tensor, 0.0, 1.0)
    result_numpy = final_image_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()

    return result_numpy


# --- Physically-Based Scanner Data ---
# This data represents a more physical model of scanner components. The spectral
# data is representative and designed to create physically-plausible results.

# Define a common spectral shape for all our operations
COMMON_SPECTRAL_SHAPE = colour.SpectralShape(380, 780, 10)

# 1. Scanner Light Source Spectra (SPD - Spectral Power Distribution)
# Modeled based on known lamp types for these scanners.
SDS_SCANNER_LIGHT_SOURCES = {
    # Hasselblad/Flextight: Simulates a Cold Cathode Fluorescent Lamp (CCFL)
    # Characterized by broad but spiky emission, especially in blue/green.
    "hasselblad": colour.sd_multi_leds(
        [450, 540, 610],
        half_spectral_widths=[20, 30, 25]  # half spectral widths (FWHM/2)
    ).align(COMMON_SPECTRAL_SHAPE),

    # Fuji Frontier: Simulates a set of narrow-band, high-intensity LEDs.
    # This gives the characteristic color separation and vibrancy.
    "frontier": colour.sd_multi_leds(
        [465, 535, 625], # R, G, B LED peaks
        half_spectral_widths=[20, 25, 20] # Full Width at Half Maximum (very narrow)
    ).align(COMMON_SPECTRAL_SHAPE),

    # Noritsu: Also LED-based, but often with slightly different peaks and
    # calibration leading to a different color rendering.
    "noritsu": colour.sd_multi_leds(
        [460, 545, 630],
        half_spectral_widths=[22, 28, 22]
    ).align(COMMON_SPECTRAL_SHAPE),
}

ADVANCED_SCANNER_PRESETS = {
    "hasselblad": {
        # Hasselblad/Flextight: Renowned for its neutrality and high fidelity.
        # Its model aims for a "pure" rendition of the film, close to a perfect observer.
        "sensitivities": {
            # Based on a high-quality fluorescent light source and accurate sensor,
            # approximating the CIE 1931 standard observer for maximum neutrality.
            "primaries": np.array([
                [0.7347, 0.2653], [0.2738, 0.7174], [0.1666, 0.0089]
            ]),
            "whitepoint": colour.CCS_ILLUMINANTS['CIE 1931 2 Degree Standard Observer']['D65'],
        },
        "tone_curve_params": {
            "black_point": 0.005, "white_point": 0.998, "contrast": 0.25, "shoulder": 0.1
        },
        "color_matrix": np.identity(3), # No additional color shift; aims for accuracy.
        "saturation": 1.0,
        "vibrance": 0.05, # A very slight boost to color without over-saturating.
    },
    "frontier": {
        # Fuji Frontier: The classic lab scanner look. Famous for its handling of greens and skin tones.
        "sensitivities": {
            # Model mimics a narrow-band LED system. The green primary is shifted slightly
            # towards cyan, and the red primary is shifted slightly towards orange,
            # contributing to Fuji's signature color rendering, especially in foliage and skin.
            "primaries": np.array([
                [0.685, 0.315], [0.250, 0.725], [0.155, 0.045]
            ]),
            "whitepoint": colour.CCS_ILLUMINANTS['CIE 1931 2 Degree Standard Observer']['D65'],
        },
        "tone_curve_params": {
            "black_point": 0.01, "white_point": 0.995, "contrast": 0.40, "shoulder": 0.3
        },
        "color_matrix": np.array([
            [1.0, -0.05, 0.0],
            [-0.04, 1.0, -0.04],
            [0.0, 0.05, 1.0]
        ]), # The classic Frontier color science matrix from the previous model.
        "saturation": 1.05,
        "vibrance": 0.15, # Higher vibrance gives it that well-known "pop".
    },
    "noritsu": {
        # Noritsu: Known for rich, warm, and often high-contrast scans.
        "sensitivities": {
            # Models a different LED array with broader spectral responses. The red primary is wider,
            # enhancing warmth, and the blue is very strong, creating deep, rich blues in skies.
            "primaries": np.array([
                [0.690, 0.310], [0.280, 0.690], [0.150, 0.050]
            ]),
            "whitepoint": colour.CCS_ILLUMINANTS['CIE 1931 2 Degree Standard Observer']['D65'],
        },
        "tone_curve_params": {
            "black_point": 0.015, "white_point": 0.990, "contrast": 0.50, "shoulder": 0.2
        },
        "color_matrix": np.array([
            [1.02, 0.0, -0.02],
            [-0.02, 1.02, 0.0],
            [-0.02, -0.02, 1.02]
        ]), # Stronger matrix for color separation.
        "saturation": 1.1,
        "vibrance": 0.1, # Boosts saturation, contributing to the rich look.
    }
}

def apply_parametric_s_curve(image, black_point, white_point, contrast, shoulder):
    """Applies a flexible, non-linear S-curve for contrast."""
    # 1. Linear re-scale based on black/white points
    leveled = (image - black_point) / (white_point - black_point)
    x = np.clip(leveled, 0.0, 1.0)

    # 2. Base smoothstep curve
    s_curve = 3 * x**2 - 2 * x**3

    # 3. Shoulder compression using a power function
    # A higher 'shoulder' value compresses highlights more, protecting them.
    shoulder_curve = x ** (1.0 + shoulder)

    # 4. Blend the curves based on the contrast parameter
    # A contrast of 0 is linear, 1 is full s-curve + shoulder.
    final_curve = (
        x * (1 - contrast) +
        (s_curve * (1 - shoulder) + shoulder_curve * shoulder) * contrast
    )

    return np.clip(final_curve, 0.0, 1.0)

def apply_vibrance(image, amount):
    """Selectively boosts saturation of less-saturated colors."""
    if amount == 0:
        return image
    # Calculate per-pixel saturation (max(R,G,B) - min(R,G,B))
    pixel_sat = np.max(image, axis=-1) - np.min(image, axis=-1)
    # Create a weight map: less saturated pixels get a higher weight.
    weight = 1.0 - np.clip(pixel_sat * 1.5, 0, 1) # Multiplier controls falloff
    # Calculate luminance for blending
    luminance = np.einsum("...c, c -> ...", image, np.array([0.2126, 0.7152, 0.0722]))
    luminance = np.expand_dims(luminance, axis=-1)
    # Create a fully saturated version of the image
    saturated_version = np.clip(luminance + (image - luminance) * 2.0, 0.0, 1.0)
    # Expand weight to match image dimensions (H,W) -> (H,W,1)
    weight = np.expand_dims(weight, axis=-1)
    # Blend the original with the saturated version based on the weight map and amount
    vibrant_image = (
        image * (1 - weight * amount) +
        saturated_version * (weight * amount)
    )
    return np.clip(vibrant_image, 0.0, 1.0)

def scan_film(
    image_to_scan: np.ndarray,
    film_base_color_linear_rgb: np.ndarray,
    scanner_type: str,
) -> np.ndarray:
    """
    Simulates the physical scanning process using a spectral sensitivity model
    and advanced tone/color processing.

    Args:
        image_to_scan: Linear sRGB data of the transmitted light.
        film_base_color_linear_rgb: Linear sRGB color of the film base.
        scanner_type: The scanner model to emulate.

    Returns:
        A linear sRGB image representing the final scanned positive.
    """
    print(f"--- Starting Advanced '{scanner_type}' scanner simulation ---")
    if scanner_type not in ADVANCED_SCANNER_PRESETS:
        print(f"Warning: Scanner type '{scanner_type}' not found. Returning original image.")
        return image_to_scan

    params = ADVANCED_SCANNER_PRESETS[scanner_type]
    srgb_cs = colour.models.RGB_COLOURSPACE_sRGB

    # 1. Define the Scanner's Native Color Space
    scanner_cs = colour.RGB_Colourspace(
        name=f"Scanner - {scanner_type}",
        primaries=params['sensitivities']['primaries'],
        whitepoint=params['sensitivities']['whitepoint']
    )
    print("  - Step 1: Defined scanner's unique spectral sensitivity model.")

    # 2. Re-render Image into Scanner's Native Space
    image_in_scanner_space = colour.RGB_to_RGB(
        image_to_scan, srgb_cs, scanner_cs
    )
    base_in_scanner_space = colour.RGB_to_RGB(
        film_base_color_linear_rgb, srgb_cs, scanner_cs
    )
    save_debug_image(np.clip(image_in_scanner_space, 0.0, 1.0), f"10a_scanner_native_capture_{scanner_type}_RGB")
    print("  - Step 2: Re-rendered image into scanner's native color space.")

    # 3. Film Base Removal and Inversion (in scanner space)
    masked_removed = image_in_scanner_space / (base_in_scanner_space + EPSILON)
    inverted_image = 1.0 / (masked_removed + EPSILON)
    print("  - Step 3: Performed negative inversion in scanner space.")

    # --- FIX: ADD AUTO-EXPOSURE NORMALIZATION ---
    # This step is crucial. It mimics the scanner setting its white point from the
    # brightest part of the inverted image (Dmin), scaling the raw inverted data
    # into a usable range before applying the tone curve.
    white_point_percentile = 99.9  # Use a high percentile to be robust against outliers
    white_point = np.percentile(inverted_image, white_point_percentile, axis=(0, 1))
    # Protect against division by zero if a channel is all black
    white_point[white_point < EPSILON] = 1.0
    normalized_image = inverted_image / white_point
    print(f"  - Step 3a: Auto-exposed image (set {white_point_percentile}% white point).")
    save_debug_image(np.clip(normalized_image, 0.0, 1.0), f"10aa_scanner_normalized_{scanner_type}_RGB")

    # 4. Apply Scanner Tone Curve
    # The input is now the normalized_image, which is correctly scaled.
    tone_params = params['tone_curve_params']
    tone_curved_image = apply_parametric_s_curve(
        normalized_image, **tone_params
    )
    save_debug_image(tone_curved_image, f"10b_scanner_tone_curve_{scanner_type}_RGB")
    print(f"  - Step 4: Applied scanner's tone curve. (Contrast: {tone_params['contrast']})")


    # 5. Apply Scanner Color Science
    color_corrected_image = np.einsum(
        'hwj,ij->hwi', tone_curved_image, np.array(params['color_matrix'])
    )
    saturated_image = apply_saturation_rgb(color_corrected_image, params['saturation'])
    final_look_image = apply_vibrance(saturated_image, params['vibrance'])
    save_debug_image(final_look_image, f"10c_scanner_color_science_{scanner_type}_RGB")
    print(f"  - Step 5: Applied color matrix, saturation, and vibrance.")

    # 6. Convert Image back to Standard Linear sRGB
    final_image_srgb = colour.RGB_to_RGB(
        final_look_image, scanner_cs, srgb_cs
    )
    print("  - Step 6: Converted final image back to standard sRGB linear.")
    print(f"--- Finished Advanced '{scanner_type}' scanner simulation ---")

    return np.clip(final_image_srgb, 0.0, 1.0)

def chromatic_adaptation_white_balance(
    image_linear_srgb: np.ndarray,
    target_illuminant_name: str = "D65",
    source_estimation_p_norm: float = 6.0,
    source_estimation_clip_percentile: float = 2.0,
    adaptation_method: str = "Von Kries",
) -> np.ndarray:
    """
    Performs white balance using chromatic adaptation to a target illuminant,
    while preserving the original image's perceived luminance.
    """
    print(f"Applying Chromatic Adaptation White Balance to {target_illuminant_name} (luminance preserving)...")

    if np.all(image_linear_srgb < 1e-6):
        print("  - Image is black, skipping WB.")
        return image_linear_srgb

    # 1. Estimate Source Illuminant (adapted from your apply_shades_of_grey_wb)
    img_for_estimation = image_linear_srgb.copy()
    epsilon = 1e-10

    if source_estimation_clip_percentile > 0:
        lower = np.percentile(img_for_estimation, source_estimation_clip_percentile, axis=(0,1))
        upper = np.percentile(img_for_estimation, 100 - source_estimation_clip_percentile, axis=(0,1))
        # Ensure lower is not greater than upper to avoid issues with uniform images
        lower = np.minimum(lower, upper - epsilon)
        img_for_estimation = np.clip(img_for_estimation, lower, upper)


    r_est = img_for_estimation[..., 0]
    g_est = img_for_estimation[..., 1]
    b_est = img_for_estimation[..., 2]

    if source_estimation_p_norm == float("inf"):
        illum_r_rgb = np.max(r_est) + epsilon
        illum_g_rgb = np.max(g_est) + epsilon
        illum_b_rgb = np.max(b_est) + epsilon
    else:
        illum_r_rgb = np.power(np.mean(np.power(r_est, source_estimation_p_norm)), 1 / source_estimation_p_norm) + epsilon
        illum_g_rgb = np.power(np.mean(np.power(g_est, source_estimation_p_norm)), 1 / source_estimation_p_norm) + epsilon
        illum_b_rgb = np.power(np.mean(np.power(b_est, source_estimation_p_norm)), 1 / source_estimation_p_norm) + epsilon

    source_illuminant_RGB = np.array([illum_r_rgb, illum_g_rgb, illum_b_rgb])
    # print(f"  - Estimated Source Illuminant (RGB): {np.round(source_illuminant_RGB, 4)}") # Optional debug

    srgb_colourspace = colour.models.RGB_COLOURSPACE_sRGB
    source_illuminant_XYZ = colour.RGB_to_XYZ(source_illuminant_RGB,
                                              srgb_colourspace,
                                              srgb_colourspace.whitepoint,
                                              adaptation_method)
    print(f"  - Estimated Source Illuminant (XYZ): {np.round(source_illuminant_XYZ, 4)}")

    # 2. Define Target Illuminant and Normalize its Luminance (Y usually to 1.0 for reference)
    try:
        target_illuminant_XYZ_ref = colour.xy_to_XYZ(srgb_colourspace.whitepoint) # Default to sRGB's D65
        if target_illuminant_name:
            target_illuminant_spd = SDS_ILLUMINANTS.get(target_illuminant_name)
            if target_illuminant_spd:
                # sd_to_XYZ often returns Y=100, so normalize to Y=1
                xyz_from_sd = colour.sd_to_XYZ(target_illuminant_spd, illuminant=srgb_colourspace.whitepoint)
                if xyz_from_sd[1] != 0:
                    target_illuminant_XYZ_ref = xyz_from_sd / xyz_from_sd[1] # Normalize Y to 1.0
                else: # Should not happen for standard illuminants
                    target_illuminant_XYZ_ref = xyz_from_sd
            else:
                print(f"  - Warning: Target illuminant '{target_illuminant_name}' not found. Using sRGB D65.")
        # Ensure Y is around 1.0 after potential direct XYZ fetching or other sources
        if target_illuminant_XYZ_ref[1] != 0 and not np.isclose(target_illuminant_XYZ_ref[1], 1.0):
            target_illuminant_XYZ_ref = target_illuminant_XYZ_ref / target_illuminant_XYZ_ref[1]

    except Exception as e:
        print(f"  - Error defining target illuminant '{target_illuminant_name}', defaulting to sRGB D65 (Y=1). Error: {e}")
        target_illuminant_XYZ_ref = colour.xy_to_XYZ(srgb_colourspace.whitepoint)
        if target_illuminant_XYZ_ref[1] != 0: # Ensure D65 default is Y=1
            target_illuminant_XYZ_ref = target_illuminant_XYZ_ref / target_illuminant_XYZ_ref[1]

    print(f"  - Reference Target Illuminant {target_illuminant_name} (XYZ, Y normalized to 1): {np.round(target_illuminant_XYZ_ref, 4)}")

    # 3. Check for effective no-op
    # Compare chromaticities of source and reference target
    # Using a small epsilon to avoid division by zero in XYZ_to_xy if XYZ is black
    xy_source = colour.XYZ_to_xy(source_illuminant_XYZ + epsilon)
    xy_target_ref = colour.XYZ_to_xy(target_illuminant_XYZ_ref + epsilon)

    if np.allclose(xy_source, xy_target_ref, atol=1e-3):
        print("  - Source and target illuminants have very similar chromaticity. Skipping CAT to preserve luminance.")
        return np.clip(image_linear_srgb, 0.0, 1.0)

    # 4. Create a version of the target illuminant that has the *same luminance* as the source illuminant.
    # This ensures the CAT primarily adjusts chromaticity.
    if source_illuminant_XYZ[1] > epsilon: # Check Y component of source is not effectively zero
        # Scale the (Y=1 normalized) target illuminant's chromaticity to match the source's luminance
        XYZ_target_for_CAT = target_illuminant_XYZ_ref * source_illuminant_XYZ[1]
    else:
        # Source illuminant is effectively black or has zero luminance.
        # Meaningful adaptation isn't possible without massive, undefined gain.
        print("  - Source illuminant luminance is effectively zero. Skipping adaptation.")
        return np.clip(image_linear_srgb, 0.0, 1.0)

    print(f"  - Target for CAT (D65 chromaticity, source luminance) (XYZ): {np.round(XYZ_target_for_CAT, 4)}")


    # 5. Perform Chromatic Adaptation
    adapted_image_linear_srgb = colour.adaptation.chromatic_adaptation(
        image_linear_srgb,
        source_illuminant_XYZ,       # Original estimated source illuminant
        XYZ_target_for_CAT,          # Target illuminant with D65 chromaticity but matched to source's luminance
        method=adaptation_method,
        transform_kwargs={'incomplete': True, 'inverse_transform_kwargs': {'incomplete': True}}
    )

    # 6. Clip and return
    final_image = np.clip(adapted_image_linear_srgb, 0.0, 1.0)
    print("  - Luminance-Preserving Chromatic Adaptation White Balance applied.")
    return final_image

def auto_exposure_and_contrast(
    image: np.ndarray,
    black_point_percentile: float = 0.1,
    white_point_percentile: float = 99.9,
    middle_gray_target: float = 0.18,
    gamma_clamp: tuple = (0.4, 2.5)
) -> np.ndarray:
    """
    Performs a more robust automatic exposure and contrast adjustment.

    This function improves upon simple linear scaling by:
    1.  Setting black and white points based on percentiles to enhance contrast
        and remove haze. This is done per-channel to help correct color casts.
    2.  Applying a non-linear gamma (power-law) correction to adjust mid-tones,
        which preserves highlight and shadow detail far better than linear scaling.
    3.  Using the median luminance for its exposure calculation, which is more
        robust to high-key or low-key images than a simple average.
    4.  Clamping the gamma adjustment to prevent extreme, unnatural changes.

    Args:
        image (np.ndarray): The input linear sRGB image with values in [0, 1].
        black_point_percentile (float): The percentile for setting the black point.
        white_point_percentile (float): The percentile for setting the white point.
        middle_gray_target (float): The target linear luminance for the image's median.
        gamma_clamp (tuple): A (min, max) tuple to constrain the gamma correction factor.

    Returns:
        np.ndarray: The adjusted image, clipped to the [0, 1] range.
    """
    print("Applying intelligent exposure and contrast adjustment...")

    # Return immediately if the image is all black to avoid errors.
    if np.all(image < 1e-6):
        print("  - Image is black, skipping adjustment.")
        return image

    # --- 1. Automatic Black & White Point Adjustment (Levels) ---
    # This enhances contrast by stretching the dynamic range of the image.
    # It is performed per-channel to help correct for color casts in the shadows/highlights.
    black_points = np.percentile(image, black_point_percentile, axis=(0, 1))
    white_points = np.percentile(image, white_point_percentile, axis=(0, 1))

    # Ensure black points are not higher than white points to avoid inversion.
    # Add a small epsilon for numerical stability.
    black_points = np.minimum(black_points, white_points - 1e-6)

    print(f"  - Calculated Black Points (R,G,B): {np.round(black_points, 4)}")
    print(f"  - Calculated White Points (R,G,B): {np.round(white_points, 4)}")

    # Apply the levels adjustment to stretch the contrast.
    contrast_adjusted_image = (image - black_points) / (white_points - black_points)
    contrast_adjusted_image = np.clip(contrast_adjusted_image, 0.0, 1.0)
    save_debug_image(contrast_adjusted_image, "12a_contrast_adjusted_image_RGB")


    # --- 2. Gamma-based Exposure Correction (Mid-tone Brightness) ---
    # This adjusts the mid-tones without re-clipping the black/white points.
    # We use a luminance approximation (BT.709) for perceptual relevance.
    luminance = np.einsum("...c, c -> ...", contrast_adjusted_image, np.array([0.2126, 0.7152, 0.0722]))

    # Calculate the median of the luminance. The median is more robust to outliers
    # (like specular highlights or deep shadows) than the mean.
    current_median_lum = np.median(luminance)
    print(f"  - Current median luminance: {current_median_lum:.4f}")
    print(f"  - Target median luminance: {middle_gray_target:.4f}")

    # Avoid division by zero or log(0) for very dark images.
    if current_median_lum < 1e-6:
        print("  - Median luminance is zero, skipping gamma correction.")
        return contrast_adjusted_image

    # Calculate the required gamma correction using the formula: power = log(target) / log(current).
    # This formula finds the power `p` such that `current^p = target`.
    gamma = np.log(middle_gray_target) / np.log(current_median_lum)

    # Clamp the gamma to a reasonable range to prevent extreme adjustments.
    min_gamma, max_gamma = gamma_clamp
    clamped_gamma = np.clip(gamma, min_gamma, max_gamma)
    print(f"  - Calculated Gamma: {gamma:.4f} (Clamped to: {clamped_gamma:.4f})")

    # Apply the gamma correction to the contrast-adjusted image.
    corrected_image = contrast_adjusted_image ** clamped_gamma

    final_image = np.clip(corrected_image, 0.0, 1.0)
    return final_image

def apply_shades_of_grey_wb(final_image_to_save):
    print("Applying Shades of Gray white balance...")

    # Parameters for the white balance algorithm
    p = 6  # Minkowski norm parameter (p=1 is Gray World, p=∞ is White Patch)
    clip_percentile = 5  # Percentage of pixels to clip (for robustness)
    epsilon = 1e-10  # Small value to avoid division by zero

    # Helper functions for linearization
    def _linearize_srgb(x):
        # Convert from sRGB to linear RGB
        return np.where(x <= 0.04045, x / 12.92, ((x + 0.055) / 1.055) ** 2.4)

    def _delinearize_srgb(x):
        # Convert from linear RGB to sRGB
        return np.where(x <= 0.0031308, 12.92 * x, 1.055 * (x ** (1 / 2.4)) - 0.055)

        # Work with a copy of the image

    img_float = final_image_to_save.copy()

    # Extract RGB channels
    r = img_float[..., 0]
    g = img_float[..., 1]
    b = img_float[..., 2]

    # Handle clipping of dark and bright pixels for robustness
    if clip_percentile > 0:
        lower = np.percentile(img_float, clip_percentile)
        upper = np.percentile(img_float, 100 - clip_percentile)
        img_float = np.clip(img_float, lower, upper)
        r = img_float[..., 0]
        g = img_float[..., 1]
        b = img_float[..., 2]

        # Calculate illuminant estimate using the Minkowski p-norm
    if p == float("inf"):
        # White Patch (max RGB) case
        illum_r = np.max(r) + epsilon
        illum_g = np.max(g) + epsilon
        illum_b = np.max(b) + epsilon
    else:
        # Standard Minkowski norm
        illum_r = np.power(np.mean(np.power(r, p)), 1 / p) + epsilon
        illum_g = np.power(np.mean(np.power(g, p)), 1 / p) + epsilon
        illum_b = np.power(np.mean(np.power(b, p)), 1 / p) + epsilon

        # The illuminant is the estimated color of the light source
    illuminant = np.array([illum_r, illum_g, illum_b])

    # The "gray" value is typically the average of the illuminant channels
    gray_val = np.mean(illuminant)

    # Calculate scaling factors
    scale_r = gray_val / illum_r
    scale_g = gray_val / illum_g
    scale_b = gray_val / illum_b

    # Apply scaling factors to the original (non-clipped) image
    r_orig = final_image_to_save[..., 0]
    g_orig = final_image_to_save[..., 1]
    b_orig = final_image_to_save[..., 2]

    # Apply the white balance correction
    balanced_r = r_orig * scale_r
    balanced_g = g_orig * scale_g
    balanced_b = b_orig * scale_b

    # Merge channels
    balanced_img = np.stack([balanced_r, balanced_g, balanced_b], axis=-1)

    # Clip to valid range
    final_image_to_save = np.clip(balanced_img, 0.0, 1.0)
    return final_image_to_save

def main():
    parser = argparse.ArgumentParser(
        description="Simulate film stock color characteristics using a datasheet JSON."
    )
    parser.add_argument(
        "input_image",
        help="Path to the input RGB image (e.g., PNG, TIFF). Assumed linear RGB.",
    )
    parser.add_argument("datasheet_json", help="Path to the film datasheet JSON file.")
    parser.add_argument("output_image", help="Path to save the output emulated image.")
    parser.add_argument(
        "--no-cache",
        action="store_true",
        help="Force regeneration of LUTs and do not save them.",
    )
    parser.add_argument(
        "--force-d65",
        action="store_true",
        help="Force white balance when loading RAW files.",
    )
    # --- Modified argument for border ---
    parser.add_argument(
        "--border",
        action="store_true",
        help="Add a 10%% border with the film base color and stock info.",
    )
    parser.add_argument(
        "--gpu", action="store_true", help="Use GPU for LUT processing if available."
    )
    parser.add_argument(
        "--print-film-base-color",
        help="Outputs the film base color determined from HD curve as a CIEXYZ value (D65 illuminant).",
        action="store_true",
    )
    parser.add_argument(
        "--perform-negative-correction",
        action="store_true",
        help="Apply negative film correction based on the datasheet base color.",
    )
    parser.add_argument(
        "--perform-white-balance",
        action="store_true",
        help="Apply white balance to the final result. This is only effective when also using --perform-negative-correction.",
    )
    parser.add_argument(
        "--perform-exposure-correction",
        action="store_true",
        help="Apply exposure correction to the final result. This is only effective when also using --perform-negative-correction. Exposure correction is applied before white balance (if white balancing).",
    )
    parser.add_argument(
        "--raw-auto-exposure",
        action="store_true",
        help="Automatically adjust exposure for RAW images on load.",
    )
    parser.add_argument(
        "--simulate-grain",
        action="store_true",
        help="Simulate film grain in the output image.",
    )
    parser.add_argument(
        "--mono-grain",
        action="store_true",
        help="Simulate monochrome film grain in the output image.",
    )
    parser.add_argument(
        "--scanner-type",
        type=str.lower,
        choices=["none", "hasselblad", "frontier", "noritsu"],
        default="none",
        help="Simulate the color science of a specific scanner during negative conversion. "
             "Set to 'none' to use the simple inversion. "
             "Requires --perform-negative-correction."
    )
    args = parser.parse_args()

    datasheet: FilmDatasheet | None = parse_datasheet_json(args.datasheet_json)
    if datasheet is None:
        sys.exit(1)
    print(
        f"Simulating: {datasheet.info.name} ({datasheet.info.format_mm}mm) (v{datasheet.info.version})\n\t{datasheet.info.description}"
    )

    lut_device = "cuda" if args.gpu and torch.cuda.is_available() else "cpu"
    # Check for Apple ARM support
    if lut_device == "cuda" and torch.backends.mps.is_available():
        print("Using Apple MPS backend for GPU acceleration.")
        lut_device = "mps"

    # --- LUT Generation and Pre-computation ---
    cache_dir = Path.home() / ".filmsim"
    # Sanitize datasheet name for use in a filename
    safe_name = "".join(
        c for c in datasheet.info.name if c.isalnum() or c in (" ", "_")
    ).rstrip()
    base_filename = (
        f"{safe_name.replace(' ', '_')}_v{datasheet.info.version}_size{LUT_SIZE}"
    )
    exposure_lut_path = cache_dir / f"{base_filename}_exposure.cube"
    naive_density_lut_path = cache_dir / f"{base_filename}_naive_density.cube"
    uncoupled_density_lut_path = cache_dir / f"{base_filename}_uncoupled_density.cube"
    user_consent_to_save = None

    middle_gray_logE = float(datasheet.properties.calibration.middle_gray_logE)

    exposure_lut: Optional[np.ndarray] = None  # Initialize exposure_lut

    if not args.no_cache and exposure_lut_path.exists():
        try:
            print(f"Loading exposure LUT from cache: {exposure_lut_path}")
            loaded_obj = colour.read_LUT(str(exposure_lut_path))
            if isinstance(loaded_obj, colour.LUT3D):
                exposure_lut = loaded_obj.table
                print(f"Successfully loaded exposure LUT from {exposure_lut_path}")
            else:
                # Log a warning and fall through to regeneration
                print(
                    f"Warning: Cached exposure LUT {exposure_lut_path} is not a LUT3D (type: {type(loaded_obj)}). Will regenerate."
                )
        except Exception as e:
            # Log a warning and fall through to regeneration
            print(
                f"Warning: Error loading exposure LUT from {exposure_lut_path}: {e}. Will regenerate."
            )

    # If LUT wasn't loaded from cache (or cache was disabled, or loading failed), generate it
    if exposure_lut is None:
        print("Generating new exposure LUT...")
        generated_exposure_lut_table = create_exposure_lut(
            datasheet.properties.curves.spectral_sensitivity,
            SDS_ILLUMINANTS["D65"],
            middle_gray_logE,
            size=LUT_SIZE,
        )
        exposure_lut = (
            generated_exposure_lut_table  # Assign to the main variable used later
        )

        # Save the newly generated LUT if caching is enabled
        if not args.no_cache:
            if (
                user_consent_to_save is None
            ):  # Ask for consent only once per run if needed
                response = input(
                    f"Save generated LUTs to {cache_dir} for future use? [Y/n]: "
                ).lower()
                user_consent_to_save = response in ("y", "yes", "")

            if user_consent_to_save:
                try:
                    cache_dir.mkdir(parents=True, exist_ok=True)
                    colour.write_LUT(
                        colour.LUT3D(
                            table=generated_exposure_lut_table,
                            name=f"{base_filename}_exposure",
                        ),
                        str(exposure_lut_path),
                    )
                    print(f"Saved exposure LUT to {exposure_lut_path}")
                except Exception as e:
                    print(
                        f"Error saving exposure LUT to {exposure_lut_path}: {e}",
                        file=sys.stderr,
                    )

    # Ensure exposure_lut is now populated. If not, something went wrong.
    if exposure_lut is None:
        # This case should ideally not be reached if create_exposure_lut always returns a valid LUT or raises an error.
        print(
            "Critical error: Exposure LUT could not be loaded or generated. Exiting.",
            file=sys.stderr,
        )
        sys.exit(1)

    # For density LUTs, we need to know the typical range of log exposure values
    # We can estimate this from the H&D curve data itself.
    log_e_points = [p.d for p in datasheet.properties.curves.hd if p.d is not None]
    log_exposure_range = (log_e_points[0], log_e_points[-1])

    density_lut_naive: Optional[np.ndarray] = None  # Initialize naive density LUT
    if not args.no_cache and naive_density_lut_path.exists():
        try:
            print(f"Loading naive density LUT from cache: {naive_density_lut_path}")
            loaded_obj = colour.read_LUT(str(naive_density_lut_path))
            if isinstance(loaded_obj, colour.LUT3D):
                density_lut_naive = loaded_obj.table
                print(
                    f"Successfully loaded naive density LUT from {naive_density_lut_path}"
                )
            else:
                # Log a warning and fall through to regeneration
                print(
                    f"Warning: Cached naive density LUT {naive_density_lut_path} is not a LUT3D (type: {type(loaded_obj)}). Will regenerate."
                )
        except Exception as e:
            # Log a warning and fall through to regeneration
            print(
                f"Warning: Error loading naive density LUT from {naive_density_lut_path}: {e}. Will regenerate."
            )

    if density_lut_naive is None:
        print("Generating new naive density LUT...")
        density_lut_naive = create_density_lut(
            datasheet.processing,
            datasheet.properties.curves.hd,
            middle_gray_logE,
            log_exposure_range,
            size=LUT_SIZE,
        )
        if not args.no_cache:
            if user_consent_to_save is None:
                response = input(
                    f"Save generated naive density LUT to {cache_dir} for future use? [Y/n]: "
                ).lower()
                user_consent_to_save = response in ("y", "yes", "")
            if user_consent_to_save:
                try:
                    cache_dir.mkdir(parents=True, exist_ok=True)
                    colour.write_LUT(
                        colour.LUT3D(
                            table=density_lut_naive,
                            name=f"{base_filename}_naive_density",
                        ),
                        str(naive_density_lut_path),
                    )
                    print(f"Saved naive density LUT to {naive_density_lut_path}")
                except Exception as e:
                    print(
                        f"Error saving naive density LUT to {naive_density_lut_path}: {e}",
                        file=sys.stderr,
                    )

    if density_lut_naive is None:
        # This case should ideally not be reached if create_density_lut always returns a valid LUT or raises an error.
        print(
            "Critical error: Naive density LUT could not be loaded or generated. Exiting.",
            file=sys.stderr,
        )
        sys.exit(1)

    inhibitor_matrix = compute_inhibitor_matrix(
        datasheet.properties.couplers.dir_amount_rgb,
        datasheet.properties.couplers.dir_diffusion_interlayer,
    )

    density_lut_uncoupled: Optional[np.ndarray] = None  # Initialize
    if not args.no_cache and uncoupled_density_lut_path.exists():
        try:
            print(
                f"Loading uncoupled density LUT from cache: {uncoupled_density_lut_path}"
            )
            loaded_obj = colour.read_LUT(str(uncoupled_density_lut_path))
            if isinstance(loaded_obj, colour.LUT3D):
                density_lut_uncoupled = loaded_obj.table
                print(
                    f"Successfully loaded uncoupled density LUT from {uncoupled_density_lut_path}"
                )
            else:
                # Log a warning and fall through to regeneration
                print(
                    f"Warning: Cached uncoupled density LUT {uncoupled_density_lut_path} is not a LUT3D (type: {type(loaded_obj)}). Will regenerate."
                )
        except Exception as e:
            # Log a warning and fall through to regeneration
            print(
                f"Warning: Error loading uncoupled density LUT from {uncoupled_density_lut_path}: {e}. Will regenerate."
            )

    if density_lut_uncoupled is None:
        uncoupled_hd_curve = compute_uncoupled_hd_curves(
            datasheet.properties.curves.hd, inhibitor_matrix
        )
        density_lut_uncoupled = create_density_lut(
            datasheet.processing,
            uncoupled_hd_curve,
            middle_gray_logE,
            log_exposure_range,
            size=LUT_SIZE,
        )
        if not args.no_cache:
            if user_consent_to_save is None:
                response = input(
                    f"Save generated uncoupled density LUT to {cache_dir} for future use? [Y/n]: "
                ).lower()
                user_consent_to_save = response in ("y", "yes", "")
            if user_consent_to_save:
                try:
                    cache_dir.mkdir(parents=True, exist_ok=True)
                    colour.write_LUT(
                        colour.LUT3D(
                            table=density_lut_uncoupled,
                            name=f"{base_filename}_uncoupled_density",
                        ),
                        str(uncoupled_density_lut_path),
                    )
                    print(
                        f"Saved uncoupled density LUT to {uncoupled_density_lut_path}"
                    )
                except Exception as e:
                    print(
                        f"Error saving uncoupled density LUT to {uncoupled_density_lut_path}: {e}",
                        file=sys.stderr,
                    )

    if density_lut_uncoupled is None:
        # This case should ideally not be reached if create_density_lut always returns a valid LUT or raises an error.
        print(
            "Critical error: Uncoupled density LUT could not be loaded or generated. Exiting.",
            file=sys.stderr,
        )
        sys.exit(1)

    # --- Load and Prepare Input Image ---
    try:
        if any(
            args.input_image.lower().endswith(ext) for ext in [".dng", ".raw", ".arw"]
        ):
            with rawpy.imread(args.input_image) as raw:
                wb_params = {}
                if args.force_d65:
                    # Use a neutral white balance if forcing D65, as post-process will handle it.
                    # Note: rawpy's 'camera' wb is often the best starting point. Forcing D65 might
                    # be better handled by a different method if colors seem off.
                    wb_params = {"user_wb": (1.0, 1.0, 1.0, 1.0)}
                else:
                    wb_params = {"use_camera_wb": True}

                image_raw = raw.postprocess(
                    demosaic_algorithm=rawpy.DemosaicAlgorithm.AHD,  # type: ignore
                    output_bps=16,
                    gamma=(1, 1),  # Linear gamma
                    no_auto_bright=args.raw_auto_exposure,  # type: ignore
                    output_color=rawpy.ColorSpace.sRGB,  # type: ignore
                    **wb_params,
                )
                image_linear = image_raw.astype(np.float64) / 65535.0
                print("RAW file processed to linear floating point directly.")
        else:
            image_raw = iio.imread(args.input_image)
            image_float = image_raw.astype(np.float64) / (
                65535.0 if image_raw.dtype == np.uint16 else 255.0
            )
            image_linear = (
                colour.cctf_decoding(image_float, function="sRGB")
                if image_raw.dtype in (np.uint8, np.uint16)
                else image_float
            )
            if args.force_d65:
                print("Only supporting D65 white balance for RAW files.")
    except Exception as e:
        print(f"Error reading input image: {e}", file=sys.stderr)
        sys.exit(1)

    if image_linear.shape[-1] > 3:
        image_linear = image_linear[..., :3]
    image_linear, image_width_px = np.maximum(image_linear, 0.0), image_linear.shape[1]

    save_debug_image(image_linear, "01_input_linear_sRGB")

    # --- Pipeline Steps ---
    ### PERFORMANCE ###
    # 1. Convert Linear RGB to Log Exposure using the LUT
    print("Applying exposure LUT...")
    log_exposure_rgb = apply_3d_lut(image_linear, exposure_lut, device=lut_device)
    save_debug_image(log_exposure_rgb, "02_log_exposure_RGB")

    # 2. Apply DIR Coupler Simulation
    print("Applying DIR coupler simulation...")
    # 2a. Calculate "naive" density using its LUT
    naive_density_rgb = apply_3d_lut(
        (log_exposure_rgb - log_exposure_range[0])
        / (log_exposure_range[1] - log_exposure_range[0]),
        density_lut_naive,
        device=lut_device,
    )
    save_debug_image(naive_density_rgb, "03_naive_density_RGB")
    # naive_density_rgb = apply_hd_curves(log_exposure_rgb, datasheet.processing, datasheet.properties.curves.hd, middle_gray_logE)  # Apply H&D curves to naive density

    # 2b. Use this density to modify log exposure
    modified_log_exposure_rgb = apply_dir_coupler_simulation(
        log_exposure_rgb,
        naive_density_rgb,
        inhibitor_matrix,
        datasheet.properties.couplers.dir_diffusion_um,
        datasheet.info.format_mm,
        image_width_px,
    )
    save_debug_image(modified_log_exposure_rgb, "04_modified_log_exposure_RGB")

    # 3. Apply the *uncoupled* density LUT to the *modified* exposure
    print("Applying uncoupled density LUT...")
    # Normalize the modified log exposure to the [0,1] range for LUT lookup
    norm_log_exposure = (modified_log_exposure_rgb - log_exposure_range[0]) / (
        log_exposure_range[1] - log_exposure_range[0]
    )
    density_rgb = apply_3d_lut(
        np.clip(norm_log_exposure, 0, 1), density_lut_uncoupled, device=lut_device
    )
    save_debug_image(density_rgb, "05_uncoupled_density_RGB")

    # (Rest of the pipeline is unchanged)
    print("Converting density to linear transmittance...")
    linear_transmittance = np.clip(10.0 ** (-density_rgb), 0.0, 1.0)
    save_debug_image(linear_transmittance, "06_linear_transmittance_RGB")
    print("Applying spatial effects (diffusion, halation)...")
    linear_post_spatial = apply_spatial_effects_new(
        linear_transmittance,
        datasheet.info.format_mm,
        datasheet.properties.interlayer,
        datasheet.properties.halation,
        image_width_px,
    )

    save_debug_image(linear_post_spatial, "07_linear_post_spatial_RGB")
    print("Applying saturation adjustment...")
    linear_post_saturation = apply_saturation_rgb(
        linear_post_spatial, datasheet.properties.couplers.saturation_amount
    )
    save_debug_image(linear_post_saturation, "08_linear_post_saturation_RGB")

    final_image_to_save = linear_post_saturation

    # 1. Get Dmin (minimum density) from the first point of the H&D curve.
    # This represents the density of the unexposed film base (the orange mask).
    dmin_r = datasheet.properties.curves.hd[0].r
    dmin_g = datasheet.properties.curves.hd[0].g
    dmin_b = datasheet.properties.curves.hd[0].b

    # 2. The final density is also affected by the balance shifts.
    dmin_r += datasheet.processing.balance.r_shift
    dmin_g += datasheet.processing.balance.g_shift
    dmin_b += datasheet.processing.balance.b_shift

    base_density = np.array([dmin_r, dmin_g, dmin_b])

    # 3. Convert this final base density to a linear color value.
    film_base_color_linear_rgb = 10.0 ** (-base_density)

    if args.border or args.print_film_base_color:

        # 3. Convert this final base density to a linear color value.
        base_color_linear = 10.0 ** (-base_density)
        if args.print_film_base_color:
            # Convert the base color to CIEXYZ using D65 illuminant
            base_color_xyz = colour.RGB_to_XYZ(
                base_color_linear,
                colour.models.RGB_COLOURSPACE_sRGB,
                colour.models.RGB_COLOURSPACE_sRGB.whitepoint,
            )
            print(f"Film base color (D65): {base_color_xyz} (XYZ)")

        if args.border:
            print("Adding film base colored border...")
            # 4. Create the new image with a 2.25% border on all sides.
            h, w, _ = final_image_to_save.shape
            border_h = int(h * 0.0225)
            border_w = int(w * 0.0225)

            bordered_image = np.full(
                (h + 2 * border_h, w + 2 * border_w, 3),
                base_color_linear,
                dtype=final_image_to_save.dtype,
            )

            # 5. Place the original rendered image in the center.
            bordered_image[border_h : h + border_h, border_w : w + border_w, :] = (
                final_image_to_save
            )
            try:
                from PIL import Image, ImageDraw, ImageFont

                # Convert the float image (0.0-1.0) to a uint8 image (0-255) for Pillow
                bordered_image_uint8 = (np.clip(bordered_image, 0.0, 1.0) * 255).astype(
                    np.uint8
                )
                pil_image = Image.fromarray(bordered_image_uint8)
                draw = ImageDraw.Draw(pil_image)

                # Prepare the text
                text_to_draw = f"{datasheet.info.name.upper()} {datasheet.properties.calibration.iso}"
                text_to_repeat = f"  {text_to_draw}  "

                # Find a suitable monospaced font, with size relative to image height
                font_size = max(
                    4, int(border_h * 0.75)
                )  # At least 4px, 4% of original height otherwise
                font = None
                font_paths = [
                    "Menlo.ttc",
                    "Consolas.ttf",
                    "DejaVuSansMono.ttf",
                    "cour.ttf",
                ]
                for path in font_paths:
                    try:
                        font = ImageFont.truetype(path, size=font_size)
                        break
                    except IOError:
                        continue
                if font is None:
                    print(
                        "Warning: A common monospaced font not found. Using Pillow's default font."
                    )
                    try:
                        font = ImageFont.load_default(size=font_size)
                    except AttributeError:  # For older Pillow versions
                        font = ImageFont.load_default()

                # Calculate text size using the more accurate textbbox
                _, _, text_width, text_height = draw.textbbox(
                    (0, 0), text_to_repeat, font=font
                )

                # Calculate vertical positions to center text in the border
                y_top = (border_h - text_height) // 2
                y_bottom = (h + border_h) + (border_h - text_height) // 2

                # Start drawing with a 2% margin
                start_x = int(w * 0.02)

                # Repeat text across top and bottom borders
                for y_pos in [y_top, y_bottom]:
                    x = start_x
                    while x < pil_image.width:
                        draw.text(
                            (x, y_pos), text_to_repeat, font=font, fill=(200, 200, 200)
                        )  # Light gray fill
                        x += text_width

                # Convert back to float numpy array for saving
                final_image_to_save = np.array(pil_image).astype(np.float64) / 255.0

                save_debug_image(final_image_to_save, "09_bordered_image_RGB")
            except ImportError:
                print(
                    "Warning: Pillow library not found. Skipping text drawing on border.",
                    file=sys.stderr,
                )
                final_image_to_save = bordered_image
            except Exception as e:
                print(f"An error occurred during text drawing: {e}", file=sys.stderr)
                final_image_to_save = bordered_image

    # Apply Film Negative Correction if requested
    if args.perform_negative_correction:
        if args.scanner_type != "none":
            final_image_to_save = scan_film(
                final_image_to_save,
                film_base_color_linear_rgb,
                args.scanner_type
            )
            save_debug_image(final_image_to_save, f"10_scanned_image_{args.scanner_type}_RGB")
        else:
            print("Applying simple film negative inversion...")
            print("Film base color:", film_base_color_linear_rgb)
            masked_removed = final_image_to_save / (film_base_color_linear_rgb + EPSILON)
            inverted_image = 1.0 / (masked_removed + EPSILON)  # Avoid division by zero
            max_val = np.percentile(inverted_image, 99.9)
            final_image_to_save = np.clip(inverted_image / max_val, 0.0, 1.0)
            save_debug_image(final_image_to_save, "10_negative_corrected_image_RGB")

            # Apply Exposure Correction
    if args.perform_exposure_correction:
        print("Applying exposure correction...")
        if not args.perform_negative_correction:
            print(
                "Warning: Exposure correction is only effective when using --perform-negative-correction. Ignoring flag."
            )
        else:
            # Define the patch ratio for measurement
            final_image_to_save = auto_exposure_and_contrast(
                final_image_to_save
            )
            save_debug_image(final_image_to_save, "12_exposure_corrected_image_RGB")

    # Apply White Balance Correction
    if args.perform_white_balance:
        if not args.perform_negative_correction:
            print(
                "Warning: White balance correction is only effective when using --perform-negative-correction. Ignoring flag."
            )
        else:
            final_image_to_save = chromatic_adaptation_white_balance(final_image_to_save)
            save_debug_image(
                final_image_to_save, "11_shades_of_gray_corrected_image_RGB"
            )

    # Apply Tone Curve Correction

    # Apply Film Grain
    if args.simulate_grain:
        print("Simulating film grain...")
        # Import warp if available for GPU acceleration
        try:
            wp_available = True
            wp.init()
            print("Using NVIDIA Warp for GPU-accelerated film grain simulation")
        except ImportError:
            wp_available = False
            print("Warp not available. Using CPU for film grain (slower)")

        # Get ISO from film datasheet
        iso = datasheet.properties.calibration.iso
        height, width = final_image_to_save.shape[:2]

        # Function to get grain parameters based on image dimensions and ISO
        def get_grain_parameters(width, height, iso):
            # --- Baseline Parameters (calibrated for a 24MP image @ ISO 400) ---
            PIXELS_BASE = 24_000_000.0
            ISO_BASE = 400.0
            MU_R_BASE = 0.075
            SIGMA_BASE = 0.25

            # --- Scaling Exponents (Artistically chosen for a natural feel) ---
            ISO_EXPONENT_MU = 0.4
            ISO_EXPONENT_SIGMA = 0.3

            # Clamp ISO to a reasonable range
            iso = max(64.0, min(iso, 8000.0))

            # Calculate the total number of pixels in the actual image
            pixels_actual = float(width * height)

            # Calculate the resolution scaler
            resolution_scaler = math.sqrt(pixels_actual / PIXELS_BASE)
            print(
                f"Resolution scaler: {resolution_scaler:.4f} (for {width}x{height} image)"
            )

            # Calculate the ISO scaler
            iso_ratio = iso / ISO_BASE
            iso_scaler_mu = iso_ratio**ISO_EXPONENT_MU
            iso_scaler_sigma = iso_ratio**ISO_EXPONENT_SIGMA
            print(
                f"ISO scaler: μ = {iso_scaler_mu:.4f}, σ = {iso_scaler_sigma:.4f} (for ISO {iso})"
            )

            # Calculate the final parameters by applying both scalers
            final_mu_r = MU_R_BASE * resolution_scaler * iso_scaler_mu
            final_sigma = SIGMA_BASE * resolution_scaler * iso_scaler_sigma

            return (final_mu_r, final_sigma)

        if wp_available:
            # Define Warp functions and kernels
            @wp.func
            def w_func(x: float):
                if x >= 2.0:
                    return 0.0
                elif x < 0.0:
                    return 1.0
                else:
                    acos_arg = x / 2.0
                    if acos_arg > 1.0:
                        acos_arg = 1.0
                    if acos_arg < -1.0:
                        acos_arg = -1.0

                    sqrt_term_arg = 1.0 - acos_arg * acos_arg
                    if sqrt_term_arg < 0.0:
                        sqrt_term_arg = 0.0

                    overlap_area_over_pi = (
                        2.0 * wp.acos(acos_arg) - x * wp.sqrt(sqrt_term_arg)
                    ) / wp.pi
                    return overlap_area_over_pi

            @wp.func
            def CB_const_radius_unit(u_pixel: float, x: float):
                safe_u = wp.min(u_pixel, 0.99999)
                if safe_u < 0.0:
                    safe_u = 0.0

                one_minus_u = 1.0 - safe_u
                if one_minus_u <= 1e-9:
                    return 0.0

                wx = w_func(x)
                exponent = 2.0 - wx

                term1 = wp.pow(one_minus_u, exponent)
                term2 = one_minus_u * one_minus_u
                return term1 - term2

            @wp.kernel
            def generate_noise_kernel(
                u_image: wp.array2d(dtype=float),
                variance_lut: wp.array(dtype=float),
                noise_out: wp.array2d(dtype=float),
                mu_r: float,
                sigma_filter: float,
                seed: int,
            ):
                ix, iy = wp.tid()
                height = u_image.shape[0]
                width = u_image.shape[1]
                if ix >= height or iy >= width:
                    return

                lut_size = variance_lut.shape[0]
                u_val = u_image[ix, iy]

                lut_pos = u_val * float(lut_size - 1)
                lut_index0 = int(lut_pos)
                lut_index0 = wp.min(wp.max(lut_index0, 0), lut_size - 2)
                lut_index1 = lut_index0 + 1
                t = lut_pos - float(lut_index0)
                if t < 0.0:
                    t = 0.0
                if t > 1.0:
                    t = 1.0

                integral_val = wp.lerp(
                    variance_lut[lut_index0], variance_lut[lut_index1], t
                )

                var_bp = 0.0
                if sigma_filter > 1e-6 and mu_r > 1e-6:
                    var_bp = wp.max(
                        0.0,
                        (mu_r * mu_r)
                        / (2.0 * sigma_filter * sigma_filter)
                        * integral_val,
                    )

                std_dev = wp.sqrt(var_bp)
                state = wp.rand_init(seed, ix * width + iy + seed)
                noise_sample = wp.randn(state) * std_dev
                noise_out[ix, iy] = noise_sample

            @wp.kernel
            def convolve_2d_kernel(
                input_array: wp.array2d(dtype=float),
                kernel: wp.array(dtype=float),
                kernel_radius: int,
                output_array: wp.array2d(dtype=float),
            ):
                ix, iy = wp.tid()
                height = input_array.shape[0]
                width = input_array.shape[1]
                if ix >= height or iy >= width:
                    return

                kernel_dim = 2 * kernel_radius + 1
                accum = float(0.0)

                for ky_offset in range(kernel_dim):
                    for kx_offset in range(kernel_dim):
                        k_idx = ky_offset * kernel_dim + kx_offset
                        weight = kernel[k_idx]

                        # Image coordinates to sample from
                        read_row = ix + (ky_offset - kernel_radius)
                        read_col = iy + (kx_offset - kernel_radius)

                        clamped_row = wp.max(0, wp.min(read_row, height - 1))
                        clamped_col = wp.max(0, wp.min(read_col, width - 1))

                        sample_val = input_array[clamped_row, clamped_col]
                        accum += weight * sample_val
                output_array[ix, iy] = accum

            @wp.kernel
            def add_rgb_noise_and_clip_kernel(
                r_in: wp.array2d(dtype=float),
                g_in: wp.array2d(dtype=float),
                b_in: wp.array2d(dtype=float),
                noise_r: wp.array2d(dtype=float),
                noise_g: wp.array2d(dtype=float),
                noise_b: wp.array2d(dtype=float),
                r_out: wp.array2d(dtype=float),
                g_out: wp.array2d(dtype=float),
                b_out: wp.array2d(dtype=float),
            ):
                ix, iy = wp.tid()

                height = r_in.shape[0]
                width = r_in.shape[1]
                if ix >= height or iy >= width:
                    return

                r_out[ix, iy] = wp.clamp(r_in[ix, iy] + noise_r[ix, iy], 0.0, 1.0)
                g_out[ix, iy] = wp.clamp(g_in[ix, iy] + noise_g[ix, iy], 0.0, 1.0)
                b_out[ix, iy] = wp.clamp(b_in[ix, iy] + noise_b[ix, iy], 0.0, 1.0)

            def integrand_variance(x, u_pixel):
                if x < 0:
                    return 0.0
                if x >= 2.0:
                    return 0.0

                safe_u = np.clip(u_pixel, 0.0, 0.99999)
                one_minus_u = 1.0 - safe_u
                if one_minus_u <= 1e-9:
                    return 0.0

                acos_arg = x / 2.0
                if acos_arg > 1.0:
                    acos_arg = 1.0
                if acos_arg < -1.0:
                    acos_arg = -1.0

                sqrt_term_arg = 1.0 - acos_arg * acos_arg
                if sqrt_term_arg < 0.0:
                    sqrt_term_arg = 0.0

                wx = (2.0 * np.arccos(acos_arg) - x * np.sqrt(sqrt_term_arg)) / np.pi

                cb = np.power(one_minus_u, 2.0 - wx) - np.power(one_minus_u, 2.0)
                return cb * x

            def precompute_variance_lut(num_u_samples=256):
                print(f"Precomputing variance LUT with {num_u_samples+1} entries...")
                u_values_for_lut = np.linspace(
                    0.0, 1.0, num_u_samples + 1, endpoint=True
                )
                lut = np.zeros(num_u_samples + 1, dtype=np.float32)

                for i, u in enumerate(u_values_for_lut):
                    result, error = quad(
                        integrand_variance, 0, 2, args=(u,), epsabs=1e-6, limit=100
                    )
                    if result < 0:
                        result = 0.0
                    lut[i] = result
                    if i % ((num_u_samples + 1) // 10) == 0:
                        print(f"  LUT progress: {i}/{num_u_samples+1}")
                print("Variance LUT computed.")
                return lut

            def create_gaussian_kernel_2d(sigma, radius):
                kernel_size = 2 * radius + 1
                g = gaussian(kernel_size, sigma, sym=True)
                kernel_2d = np.outer(g, g)
                sum_sq = np.sum(kernel_2d**2)
                if sum_sq > 1e-9:
                    kernel_2d /= np.sqrt(sum_sq)
                return kernel_2d.flatten().astype(np.float32)

            # Calculate grain parameters
            mu_r, sigma_filter = get_grain_parameters(width, height, iso)
            print(f"Film grain parameters: μr = {mu_r}, σ_filter = {sigma_filter}")

            # Use 256 u_samples for LUT
            variance_lut_np = precompute_variance_lut(num_u_samples=256)
            variance_lut_wp = wp.array(variance_lut_np, dtype=float, device="cuda")

            kernel_radius = max(1, int(np.ceil(3 * sigma_filter)))
            kernel_np = create_gaussian_kernel_2d(sigma_filter, kernel_radius)
            kernel_wp = wp.array(kernel_np, dtype=float, device="cuda")
            print(
                f"Using Gaussian filter with sigma={sigma_filter}, radius={kernel_radius}"
            )

            # Prepare image arrays on GPU
            img_r = final_image_to_save[:, :, 0]
            img_g = final_image_to_save[:, :, 1]
            img_b = final_image_to_save[:, :, 2]

            r_original_wp = wp.array(img_r, dtype=float, device="cuda")
            g_original_wp = wp.array(img_g, dtype=float, device="cuda")
            b_original_wp = wp.array(img_b, dtype=float, device="cuda")

            # Allocate noise arrays
            noise_r_unfiltered_wp = wp.empty_like(r_original_wp)
            noise_g_unfiltered_wp = wp.empty_like(g_original_wp)
            noise_b_unfiltered_wp = wp.empty_like(b_original_wp)

            noise_r_filtered_wp = wp.empty_like(r_original_wp)
            noise_g_filtered_wp = wp.empty_like(g_original_wp)
            noise_b_filtered_wp = wp.empty_like(b_original_wp)

            # Output arrays
            r_output_wp = wp.empty_like(r_original_wp)
            g_output_wp = wp.empty_like(g_original_wp)
            b_output_wp = wp.empty_like(b_original_wp)

            # Use a random seed based on the ISO value for consistency
            seed = args.seed if hasattr(args, "seed") else int(iso)

            # Generate and apply noise for each channel
            if args.mono_grain:
                # Create luminance image
                img_gray_np = 0.299 * img_r + 0.587 * img_g + 0.114 * img_b
                u_gray_wp = wp.array(img_gray_np, dtype=float, device="cuda")
                noise_image_wp = wp.empty_like(u_gray_wp)

                print("Generating monochromatic noise...")
                wp.launch(
                    kernel=generate_noise_kernel,
                    dim=(height, width),
                    inputs=[
                        u_gray_wp,
                        variance_lut_wp,
                        noise_image_wp,
                        mu_r,
                        sigma_filter,
                        seed,
                    ],
                    device="cuda",
                )

                noise_filtered_wp = wp.empty_like(u_gray_wp)
                wp.launch(
                    kernel=convolve_2d_kernel,
                    dim=(height, width),
                    inputs=[
                        noise_image_wp,
                        kernel_wp,
                        kernel_radius,
                        noise_filtered_wp,
                    ],
                    device="cuda",
                )

                # Copy the same noise to all channels
                noise_r_filtered_wp.assign(noise_filtered_wp)
                noise_g_filtered_wp.assign(noise_filtered_wp)
                noise_b_filtered_wp.assign(noise_filtered_wp)
            else:
                # Process each channel separately
                print("Processing R channel...")
                wp.launch(
                    kernel=generate_noise_kernel,
                    dim=(height, width),
                    inputs=[
                        r_original_wp,
                        variance_lut_wp,
                        noise_r_unfiltered_wp,
                        mu_r,
                        sigma_filter,
                        seed,
                    ],
                    device="cuda",
                )
                wp.launch(
                    kernel=convolve_2d_kernel,
                    dim=(height, width),
                    inputs=[
                        noise_r_unfiltered_wp,
                        kernel_wp,
                        kernel_radius,
                        noise_r_filtered_wp,
                    ],
                    device="cuda",
                )

                print("Processing G channel...")
                wp.launch(
                    kernel=generate_noise_kernel,
                    dim=(height, width),
                    inputs=[
                        g_original_wp,
                        variance_lut_wp,
                        noise_g_unfiltered_wp,
                        mu_r,
                        sigma_filter,
                        seed + 1,
                    ],
                    device="cuda",
                )
                wp.launch(
                    kernel=convolve_2d_kernel,
                    dim=(height, width),
                    inputs=[
                        noise_g_unfiltered_wp,
                        kernel_wp,
                        kernel_radius,
                        noise_g_filtered_wp,
                    ],
                    device="cuda",
                )

                print("Processing B channel...")
                wp.launch(
                    kernel=generate_noise_kernel,
                    dim=(height, width),
                    inputs=[
                        b_original_wp,
                        variance_lut_wp,
                        noise_b_unfiltered_wp,
                        mu_r,
                        sigma_filter,
                        seed + 2,
                    ],
                    device="cuda",
                )
                wp.launch(
                    kernel=convolve_2d_kernel,
                    dim=(height, width),
                    inputs=[
                        noise_b_unfiltered_wp,
                        kernel_wp,
                        kernel_radius,
                        noise_b_filtered_wp,
                    ],
                    device="cuda",
                )

                # Add the noise to the original image
                print("Adding noise to image...")
                wp.launch(
                    kernel=add_rgb_noise_and_clip_kernel,
                    dim=(height, width),
                    inputs=[
                        r_original_wp,
                        g_original_wp,
                        b_original_wp,
                        noise_r_filtered_wp,
                        noise_g_filtered_wp,
                        noise_b_filtered_wp,
                        r_output_wp,
                        g_output_wp,
                        b_output_wp,
                    ],
                    device="cuda",
                )

            # Copy results back to CPU
            img_with_grain = np.zeros((height, width, 3), dtype=np.float32)
            img_with_grain[:, :, 0] = r_output_wp.numpy()
            img_with_grain[:, :, 1] = g_output_wp.numpy()
            img_with_grain[:, :, 2] = b_output_wp.numpy()

            final_image_to_save = img_with_grain

        else:
            # CPU fallback implementation (simplified)
            print("Using CPU for film grain simulation (this will be slower)")
            print("Consider installing NVIDIA Warp for GPU acceleration")

            # Calculate grain parameters
            mu_r, sigma_filter = get_grain_parameters(width, height, iso)

            # Simple noise generation for CPU fallback
            rng = np.random.RandomState(
                args.seed if hasattr(args, "seed") else int(iso)
            )

            # Generate random noise for each channel
            noise_strength = mu_r * 0.15  # Simplified approximation

            if args.mono_grain:
                # Generate one noise channel and apply to all
                noise = rng.normal(0, noise_strength, (height, width))
                noise = gaussian_filter(noise, sigma=sigma_filter)

            # Apply same noise to all channels
            for c in range(3):
                final_image_to_save[:, :, c] = np.clip(
                    final_image_to_save[:, :, c] + noise, 0.0, 1.0
                )
            else:
                # Generate separate noise for each channel
                for c in range(3):
                    noise = rng.normal(0, noise_strength, (height, width))
                    noise = gaussian_filter(noise, sigma=sigma_filter)
                    final_image_to_save[:, :, c] = np.clip(
                        final_image_to_save[:, :, c] + noise, 0.0, 1.0
                    )

        save_debug_image(final_image_to_save, "13_final_image_with_grain_RGB")

    print("Converting to output format and saving...")
    # For some reason DNG files have the top 64 pixel rows black, so we crop them out.
    if args.input_image.lower().endswith((".dng")):
        final_image_to_save = final_image_to_save[64:, :, :]


    # Apply Linear to Output Color Space Conversion
    # final_image_to_save = colour.cctf_encoding(final_image_to_save, function="sRGB")

    output_image = (
        np.clip(final_image_to_save, 0.0, 1.0)
        * (65535.0 if args.output_image.lower().endswith((".tiff", ".tif")) else 255.0)
    ).astype(
        np.uint16 if args.output_image.lower().endswith((".tiff", ".tif")) else np.uint8
    )
    iio.imwrite(args.output_image, output_image)
    print("Done.")

if __name__ == "__main__":
    main()