filmsim/testbench.py

#!/usr/bin/env python3
import os
import sys
import argparse
import itertools
import subprocess
from multiprocessing import Pool
from functools import partial

# --- Configuration ---

# This dictionary maps the desired abbreviation to the full command-line flag.
# This makes it easy to add or remove flags in the future.
# This dictionary maps the desired abbreviation to the full command-line flag.
# Arguments are organized into "oneof" groups to avoid invalid combinations.
ARGS_MAP = {
    # 'fd': '--force-d65',
    # 'pnc': '--perform-negative-correction',
    # 'pwb': '--perform-white-balance',
    # 'pec': '--perform-exposure-correction',
    # 'rae': '--raw-auto-exposure',


}

# Groups of mutually exclusive arguments (only one from each group should be used)
ONEOF_GROUPS = [
    {
        'smf': ['--scanner-type', 'frontier'],
        'smh': ['--scanner-type', 'hasselblad'],
        'smn': ['--scanner-type', 'noritsu']
    },
    {
        'sg': '--simulate-grain',
        'mg': '--mono-grain'
    }
]

# --- Worker Function for Multiprocessing ---

def run_filmcolor_command(job_info, filmcolor_path, dry_run=False):
    """
    Executes a single filmcolor command.
    This function is designed to be called by a multiprocessing Pool.
    """
    input_file, datasheet, output_file, flags = job_info

    command = [
        filmcolor_path,
        input_file,
        datasheet,
        output_file
    ]

    # Add all flags to the command
    for flag in flags:
        if isinstance(flag, list):
            command.extend(flag)  # For arguments with values like ['--scanner-model', 'frontier']
        else:
            command.append(flag)  # For simple flags like '--simulate-grain'

    command_str = " ".join(command)
    print(f"🚀 Starting job: {os.path.basename(output_file)}")
    if dry_run:
        return f"🔍 DRY RUN: {command_str} (not executed)"

    try:
        # Using subprocess.run to execute the command
        # capture_output=True keeps stdout/stderr from cluttering the main display
        # text=True decodes stdout/stderr as text
        # check=True will raise a CalledProcessError if the command returns a non-zero exit code
        result = subprocess.run(
            command,
            check=True,
            capture_output=True,
            text=True,
            encoding='utf-8'
        )
        return f"✅ SUCCESS: Created {output_file}"
    except FileNotFoundError:
        return f"❌ ERROR: filmcolor executable not found at '{filmcolor_path}'"
    except subprocess.CalledProcessError as e:
        # This block runs if the command fails (returns non-zero exit code)
        error_message = (
            f"❌ FAILURE: Could not process {os.path.basename(input_file)} with {os.path.basename(datasheet)}\n"
            f"  Command: {command_str}\n"
            f"  Exit Code: {e.returncode}\n"
            f"  Stderr: {e.stderr.strip()}"
        )
        return error_message
    except Exception as e:
        return f"❌ UNEXPECTED ERROR: {e}"


# --- Main Script Logic ---

def main():
    parser = argparse.ArgumentParser(
        description="A testbench runner for the 'filmcolor' script.",
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument(
        "input_dir",
        help="The root directory containing subfolders with RAW images (ARW, DNG)."
    )
    parser.add_argument(
        "datasheet_dir",
        help="The directory containing the film datasheet JSON files."
    )
    parser.add_argument(
        "filmcolor_path",
        help="The path to the 'filmcolor' executable script."
    )
    parser.add_argument(
        "-j", "--jobs",
        type=int,
        default=3,
        help="Number of parallel jobs to run. (Default: 3)"
    )
    parser.add_argument(
        "--dry-run",
        action='store_true',
        help="If set, will only print the commands without executing them."
    )
    parser.add_argument(
        "--refresh",
        action='store_true',
        help="If set, will reprocess existing output files. Otherwise, skips files that already exist."
    )
    args = parser.parse_args()

    # 1. Find all input RAW files
    raw_files = []
    print(f"🔎 Scanning for RAW files in '{args.input_dir}'...")
    for root, _, files in os.walk(args.input_dir):
        for file in files:
            if file.lower().endswith(('.arw', '.dng')):
                raw_files.append(os.path.join(root, file))

    if not raw_files:
        print("❌ No RAW (.arW or .DNG) files found. Exiting.")
        sys.exit(1)
    print(f"  Found {len(raw_files)} RAW files.")

    # 2. Find all datasheet JSON files
    datasheet_files = []
    print(f"🔎 Scanning for JSON files in '{args.datasheet_dir}'...")
    try:
        for file in os.listdir(args.datasheet_dir):
            if file.lower().endswith('.json'):
                datasheet_files.append(os.path.join(args.datasheet_dir, file))
    except FileNotFoundError:
        print(f"❌ Datasheet directory not found at '{args.datasheet_dir}'. Exiting.")
        sys.exit(1)

    if not datasheet_files:
        print("❌ No datasheet (.json) files found. Exiting.")
        sys.exit(1)
    print(f"  Found {len(datasheet_files)} datasheet files.")

    # 3. Generate all argument combinations
    # Get regular standalone arguments
    standalone_args = list(ARGS_MAP.keys())

    # Generate all possible combinations of regular args
    standalone_arg_combos = []
    for i in range(len(standalone_args) + 1):
        for combo in itertools.combinations(standalone_args, i):
            standalone_arg_combos.append(sorted(list(combo)))

    # Create all possible combinations with oneof groups
    all_arg_combos = []

    # For each oneof group, we need to include either one option or none
    oneof_options = []
    for group in ONEOF_GROUPS:
        # Add an empty list to represent using no option from this group
        group_options = [None]
        # Add each option from the group
        group_options.extend(group.keys())
        oneof_options.append(group_options)

    # Generate all combinations of oneof options
    for oneof_combo in itertools.product(*oneof_options):
        # Filter out None values
        oneof_combo = [x for x in oneof_combo if x is not None]

        # Combine with standalone args
        for standalone_combo in standalone_arg_combos:
            # Combine the two lists and sort for consistent naming
            combined_combo = sorted(standalone_combo + oneof_combo)
            all_arg_combos.append(combined_combo)

    # Remove any duplicates
    all_arg_combos = [list(x) for x in set(map(tuple, all_arg_combos))]

    # 4. Create the full list of jobs to run
    jobs_to_run = []
    skipped_jobs = 0
    for raw_file_path in raw_files:
        input_dir = os.path.dirname(raw_file_path)
        input_filename = os.path.basename(raw_file_path)

        for datasheet_path in datasheet_files:
            datasheet_name = os.path.splitext(os.path.basename(datasheet_path))[0]

            for arg_combo_abbrs in all_arg_combos:
                # Build the output filename
                arg_suffix = "-".join(arg_combo_abbrs)
                # Handle the case with no arguments to avoid a trailing hyphen
                if arg_suffix:
                    output_name = f"{input_filename}-{datasheet_name}-{arg_suffix}.jpg"
                else:
                    output_name = f"{input_filename}-{datasheet_name}.jpg"

                output_path = os.path.join(input_dir, output_name)

                # Skip if file exists and --refresh is not set
                if os.path.exists(output_path) and not args.refresh:
                    skipped_jobs += 1
                    continue

                # Get the full flags from the abbreviations
                flags = []
                for abbr in arg_combo_abbrs:
                    # Check if this is from a oneof group
                    is_oneof = False
                    for group in ONEOF_GROUPS:
                        if abbr in group:
                            flags.append(group[abbr])
                            is_oneof = True
                            break

                    # If not from a oneof group, use the regular ARGS_MAP
                    if not is_oneof and abbr in ARGS_MAP:
                        flags.append(ARGS_MAP[abbr])

                # Add required flags
                flags.extend(['--perform-negative-correction', "--perform-white-balance", '--perform-exposure-correction'])

                # Add the complete job description to our list
                jobs_to_run.append((raw_file_path, datasheet_path, output_path, flags))

    total_jobs = len(jobs_to_run)
    print(f"\n✨ Generated {total_jobs} total jobs to run.")
    if skipped_jobs > 0:
        print(f"⏭️  Skipped {skipped_jobs} existing output files. Use --refresh to reprocess them.")
    if total_jobs == 0:
        print("Nothing to do. Exiting.")
        sys.exit(0)

    # Ask for confirmation before starting a large number of jobs
    try:
        confirm = input(f"Proceed with running {total_jobs} jobs using {args.jobs} parallel processes? (y/N): ")
        if confirm.lower() != 'y':
            print("Aborted by user.")
            sys.exit(0)
    except KeyboardInterrupt:
        print("\nAborted by user.")
        sys.exit(0)

    # 5. Run the jobs in a multiprocessing pool
    print("\n--- Starting Testbench ---\n")
    # `partial` is used to "pre-fill" the filmcolor_path argument of our worker function
    worker_func = partial(run_filmcolor_command, filmcolor_path=args.filmcolor_path, dry_run=args.dry_run)

    with Pool(processes=args.jobs) as pool:
        # imap_unordered is great for this: it yields results as they complete,
        # providing real-time feedback without waiting for all jobs to finish.
        for i, result in enumerate(pool.imap_unordered(worker_func, jobs_to_run), 1):
            print(f"[{i}/{total_jobs}] {result}")

    print("\n--- Testbench Finished ---")


if __name__ == "__main__":
    main()