diff --git a/main.py b/main.py index 2e987b2..e118185 100644 --- a/main.py +++ b/main.py @@ -7,12 +7,15 @@ import hashlib import random import multiprocessing import concurrent.futures +import time import numpy as np from src.mediautils.audio import extract_audio_from_video +from src.mediautils.video import render_moments from src.editors.amplitude.editor import AmplitudeEditor from src.editors.sentiment.editor import SentimentEditor +from src.math.cost import quadratic_loss from src.math.distribution import create_distribution log = structlog.get_logger() @@ -22,6 +25,10 @@ EDITORS = { 'sentiment': SentimentEditor } +ERROR_FUNCS = { + 'quadratic': quadratic_loss +} + def main(args): # Check video existance input_file = args.file @@ -56,6 +63,8 @@ def main(args): log.info("initializing editor", editor=args.editor) editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args)) log.info("initialized editor", editor=args.editor) + costfunc = ERROR_FUNCS[args.cost] + desired = args.duration # Generate center of large window and small window size large_window_center = random.uniform(30, 50) @@ -67,32 +76,83 @@ def main(args): # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient spread_decay = random.uniform(0.0001, 0.001) - log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay) - # Create distribution of large and small parallelism = args.parallelism - large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism) - np.random.shuffle(large_distribution) - small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism) - np.random.shuffle(small_distribution) - with concurrent.futures.ThreadPoolExecutor() as executor: - futures = [] - pairs = list(zip(large_distribution, small_distribution)) - for pair in pairs: - futures.append( - executor.submit( - editor.edit, - pair[0], - pair[1], - vars(args) + # The main loop of the program starts here + # we first create distributions + # use workers to simultanously create many possible edits + # find the best edit of the lot -> this is determined by lowest "cost" + # if the best fits within our desitred time range, output, otherwise + # reset the distributions using the best as the new center, then repeat + # Create distribution of large and small + + complete = False + iterations = 0 + while not complete: + log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay) + large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism) + np.random.shuffle(large_distribution) + small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism) + np.random.shuffle(small_distribution) + + # Fire off workers to generate edits + moment_results = [] + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + pairs = list(zip(large_distribution, small_distribution)) + for pair in pairs: + futures.append( + executor.submit( + editor.edit, + pair[0], + pair[1], + vars(args) + ) ) - ) - for future in concurrent.futures.as_completed(futures): - value = future.result() - log.info("got val", val=value) + for future in concurrent.futures.as_completed(futures): + try: + moment_results.append(future.result()) + except Exception: + log.exception("error during editing") + sys.exit(-2) + moment_results + costs = [] + durations = [] + for result in moment_results: + total_duration = 0 + for moment in result[0]: + total_duration = total_duration + moment.get_duration() + costs.append(costfunc(desired, total_duration)) + durations.append(total_duration) + index_min = min(range(len(costs)), key=costs.__getitem__) + large_window_center = moment_results[index_min][1] + small_window_center = moment_results[index_min][2] + log.info("batch complete", best_large=large_window_center, best_small=small_window_center, duration=durations[index_min]) + if durations[index_min] > desired * 0.95 and desired * 1.05 > durations[index_min]: + log.info("found edit within target duration", target=desired, duration=durations[index_min]) + out_path = Path(args.destination) + log.info("rendering...") + start = time.time() + render_moments(moment_results[index_min][0], str(in_vid_path.resolve()), str(out_path.resolve())) + log.info("render complete", duration=time.time() - start, output=str(out_path.resolve())) + sys.exit(0) + iterations = iterations + parallelism + if iterations > 50000: + log.error("could not find a viable edit in the target duration, try other params", target=desired) + sys.exit(-4) + spread_multiplier = spread_multiplier - spread_decay + if spread_multiplier < 0: + log.warn("spread reached 0, resetting") + large_window_center = random.uniform(30, 50) + small_window_center = random.uniform(5, 15) + spread_multiplier = random.uniform(0.15, 0.18) + spread_decay = random.uniform(0.0001, 0.001) + + + + - desired = args.duration if __name__ == "__main__": @@ -115,13 +175,12 @@ if __name__ == "__main__": type=int, ) - parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments.') + parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.') - parser.add_argument("-d", "--dryrun", dest="drun", action="store_true") parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.") + parser.add_argument("--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default='quadratic') parser.add_argument( - "-i", "--minduration", default=8, help="Minimum clip duration", @@ -129,7 +188,6 @@ if __name__ == "__main__": type=int, ) parser.add_argument( - "-m", "--maxduration", default=15, help="Maximum clip duration", diff --git a/src/editors/amplitude/editor.py b/src/editors/amplitude/editor.py index 11a1fd2..6b9d19b 100644 --- a/src/editors/amplitude/editor.py +++ b/src/editors/amplitude/editor.py @@ -25,5 +25,5 @@ class AmplitudeEditor: long_ma = np_moving_average(self.squared_subsample, large_window * window_factor) short_ma = np_moving_average(self.squared_subsample, small_window * window_factor) highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate) - return highlights + return highlights, large_window, small_window