ale/main.py

import argparse
import structlog
from functools import partial
from pathlib import Path
import sys
import hashlib
import random
import multiprocessing
import concurrent.futures

import numpy as np

from src.mediautils.audio import extract_audio_from_video
from src.editors.amplitude.editor import AmplitudeEditor
from src.editors.sentiment.editor import SentimentEditor
from src.math.distribution import create_distribution

log = structlog.get_logger()

EDITORS = {
    'amplitude': AmplitudeEditor,
    'sentiment': SentimentEditor
}

def main(args):
    # Check video existance
    input_file = args.file
    in_vid_path = Path(input_file)
    if not in_vid_path.is_file():
        log.error("the specified input path does not exist", path=input_file)
        sys.exit(-1)
    log.info("preparing video", input_video=input_file)

    # Hash the video, we use this to see if we have processed this video before
    # and as a simple way to generate temp file names
    sha1 = hashlib.sha1()
    BUF_SIZE = 1655360
    with open(in_vid_path, 'rb') as f:
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            sha1.update(data)
    temp_file_name = sha1.hexdigest()
    log.info("hash computed", hash=temp_file_name)
    temp_file_name = f"ale-{temp_file_name}"

    # Prepare the input video
    audio_path, audio_cached = extract_audio_from_video(str(in_vid_path.resolve()), temp_file_name)
    if audio_cached:
        log.info("using cached audio file", cache_path=audio_path)
    else:
        log.info("extracted audio", cache_path=audio_path)

    # Initalize Editor
    log.info("initializing editor", editor=args.editor)
    editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args))
    log.info("initialized editor", editor=args.editor)

    # Generate center of large window and small window size
    large_window_center = random.uniform(30, 50)
    small_window_center = random.uniform(5, 15)

    # The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
    spread_multiplier = random.uniform(0.15, 0.18)

    # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
    spread_decay = random.uniform(0.0001, 0.001)

    log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay)
    # Create distribution of large and small
    parallelism = args.parallelism
    large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism)
    np.random.shuffle(large_distribution)
    small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism)
    np.random.shuffle(small_distribution)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        pairs = list(zip(large_distribution, small_distribution))
        for pair in pairs:
            futures.append(
                executor.submit(
                    editor.edit,
                    pair[0],
                    pair[1],
                    vars(args)
                )
            )
        for future in concurrent.futures.as_completed(futures):
            value = future.result()
            log.info("got val", val=value)


    desired = args.duration


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog="ALE", description="ALE: Automatic Linear Editor.",
        formatter_class=partial(argparse.HelpFormatter, width=100)
    )
    parser.add_argument('file', help='Path to the video file to edit')
    parser.add_argument('duration', help='Target length of the edit, in seconds', type=int)
    parser.add_argument('destination', help='Edited video save location')
    subparsers = parser.add_subparsers(dest='editor', help='The editing algorithm to use')

    parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.')
    parser_audio_amp.add_argument(
        "-f",
        "--factor",
        default=16000,
        help="Subsampling factor",
        dest="factor",
        type=int,
    )

    parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments.')

    parser.add_argument("-d", "--dryrun", dest="drun", action="store_true")
    parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")

    parser.add_argument(
        "-i",
        "--minduration",
        default=8,
        help="Minimum clip duration",
        dest="mindur",
        type=int,
    )
    parser.add_argument(
        "-m",
        "--maxduration",
        default=15,
        help="Maximum clip duration",
        dest="maxdur",
        type=int,
    )

    args = parser.parse_args()
    try:
        main(args)
    except Exception:
        log.exception("uncaught error!")
        sys.exit(-2)
    sys.exit(0)