From 174a828988fec4ea00fad345095e3c29495527d8 Mon Sep 17 00:00:00 2001
From: Tanishq Dubey <tdubey@clearstreet.io>
Date: Tue, 7 Mar 2023 19:35:23 -0500
Subject: [PATCH] refactor editors to move core logic into the editors
 themselves

---
 main.py                         | 145 +++++++-------------------------
 src/editors/amplitude/editor.py | 101 ++++++++++++++++++++++
 src/editors/sentiment/editor.py | 101 ++++++++++++++++++++++
 3 files changed, 233 insertions(+), 114 deletions(-)

diff --git a/main.py b/main.py
index 11928f2..049d30b 100644
--- a/main.py
+++ b/main.py
@@ -1,26 +1,22 @@
 import argparse
-import concurrent.futures
 import hashlib
 import multiprocessing
-import random
 import sys
 import time
 from functools import partial
 from pathlib import Path
 
-import numpy as np
 import structlog
 
-from src.utils.prereq import check_ffmpeg, install_ffmpeg
+from src.utils.prereq import check_ffmpeg
 
 check_ffmpeg()
 
 from src.editors.amplitude.editor import AmplitudeEditor
 from src.editors.sentiment.editor import SentimentEditor
 from src.math.cost import quadratic_loss
-from src.math.distribution import create_distribution
 from src.mediautils.audio import extract_audio_from_video
-from src.mediautils.video import filter_moments, render_moments
+from src.mediautils.video import render_moments
 
 log = structlog.get_logger()
 
@@ -90,116 +86,37 @@ def main(args):
     costfunc = ERROR_FUNCS[args.cost]
     desired = args.duration
 
-    # Generate center of large window and small window size
-    large_window_center = random.uniform(30, 50)
-    small_window_center = random.uniform(5, 15)
+    result = []
+    try:
+        result = editor.full_edit(costfunc, desired, vars(args))
+    except Exception as e:
+        log.fatal("there was an error during editing the video", error=e)
+        sys.exit(-1)
 
-    # The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
-    spread_multiplier = random.uniform(0.15, 0.18)
+    if len(result) == 0:
+        log.fatal("no viable edit was found for the provided parameters, please try again with different values")
+        sys.exit(-2)
 
-    # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
-    spread_decay = random.uniform(0.000001, 0.0001)
-
-    parallelism = args.parallelism
-
-    # The main loop of the program starts here
-    #   we first create distributions
-    #   use workers to simultanously create many possible edits
-    #   find the best edit of the lot -> this is determined by lowest "cost"
-    #   if the best fits within our desitred time range, output, otherwise
-    #       reset the distributions using the best as the new center, then repeat
-    # Create distribution of large and small
-
-    complete = False
-    iterations = 0
-    while not complete:
-        large_distribution = create_distribution(
-            large_window_center, spread_multiplier, parallelism
-        )
-        np.random.shuffle(large_distribution)
-        small_distribution = create_distribution(
-            small_window_center, spread_multiplier, parallelism
-        )
-        np.random.shuffle(small_distribution)
-
-        # Fire off workers to generate edits
-        moment_results = []
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            futures = []
-            pairs = list(zip(large_distribution, small_distribution))
-            for pair in pairs:
-                futures.append(
-                    executor.submit(
-                        editor.edit,
-                        pair[0] if pair[0] > pair[1] else pair[1],
-                        pair[1] if pair[0] > pair[1] else pair[0],
-                        vars(args),
-                    )
-                )
-            for future in concurrent.futures.as_completed(futures):
-                try:
-                    moment_results.append(list(future.result()))
-                except Exception:
-                    log.exception("error during editing")
-                    sys.exit(-2)
-        moment_results
-        costs = []
-        durations = []
-        for result in moment_results:
-            total_duration = 0
-            result[0] = filter_moments(result[0], args.mindur, args.maxdur)
-            for moment in result[0]:
-                total_duration = total_duration + moment.get_duration()
-            costs.append(costfunc(desired, total_duration))
-            durations.append(total_duration)
-        index_min = min(range(len(costs)), key=costs.__getitem__)
-        large_window_center = moment_results[index_min][1]
-        small_window_center = moment_results[index_min][2]
-        log.info(
-            "batch complete",
-            best_large=large_window_center,
-            best_small=small_window_center,
-            duration=durations[index_min],
-        )
-        if (
-            durations[index_min] > desired * 0.95
-            and desired * 1.05 > durations[index_min]
-        ):
-            log.info(
-                "found edit within target duration",
-                target=desired,
-                duration=durations[index_min],
-            )
-            out_path = Path(args.destination)
-            log.info("rendering...")
-            start = time.time()
-            render_moments(
-                moment_results[index_min][0],
-                str(in_vid_path.resolve()),
-                str(out_path.resolve()),
-                intro_path=intro_file,
-                parallelism=args.parallelism,
-            )
-            log.info(
-                "render complete",
-                duration=time.time() - start,
-                output=str(out_path.resolve()),
-            )
-            sys.exit(0)
-        iterations = iterations + parallelism
-        if iterations > 50000:
-            log.error(
-                "could not find a viable edit in the target duration, try other params",
-                target=desired,
-            )
-            sys.exit(-4)
-        spread_multiplier = spread_multiplier - spread_decay
-        if spread_multiplier < 0:
-            log.warn("spread reached 0, resetting")
-            large_window_center = random.uniform(30, 50)
-            small_window_center = random.uniform(5, 15)
-            spread_multiplier = random.uniform(0.15, 0.18)
-            spread_decay = random.uniform(0.0001, 0.001)
+    log.info(
+        "found edit within target duration",
+        target=desired,
+    )
+    out_path = Path(args.destination)
+    log.info("rendering...")
+    start = time.time()
+    render_moments(
+        result,
+        str(in_vid_path.resolve()),
+        str(out_path.resolve()),
+        intro_path=intro_file,
+        parallelism=args.parallelism,
+    )
+    log.info(
+        "render complete",
+        duration=time.time() - start,
+        output=str(out_path.resolve()),
+    )
+    sys.exit(0)
 
 
 if __name__ == "__main__":
diff --git a/src/editors/amplitude/editor.py b/src/editors/amplitude/editor.py
index c7e9ae9..011af36 100644
--- a/src/editors/amplitude/editor.py
+++ b/src/editors/amplitude/editor.py
@@ -1,9 +1,13 @@
 import numpy as np
 import structlog
+import random
+import concurrent.futures
 
 from ...math.average import np_moving_average
+from ...math.distribution import create_distribution
 from ...mediautils.audio import process_audio, resample
 from ..common import find_moving_average_highlights
+from ...mediautils.video import filter_moments
 
 
 class AmplitudeEditor:
@@ -33,3 +37,100 @@ class AmplitudeEditor:
             short_ma, long_ma, self.factor / self.bitrate
         )
         return highlights, large_window, small_window
+
+    def full_edit(self, costfunc, desired_time, params):
+        desired = desired_time
+
+        # Generate center of large window and small window size
+        large_window_center = random.uniform(30, 50)
+        small_window_center = random.uniform(5, 15)
+
+        # The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
+        spread_multiplier = random.uniform(0.15, 0.18)
+
+        # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
+        spread_decay = random.uniform(0.000001, 0.0001)
+
+        parallelism = params['parallelism']
+
+        # The main loop of the program starts here
+        #   we first create distributions
+        #   use workers to simultanously create many possible edits
+        #   find the best edit of the lot -> this is determined by lowest "cost"
+        #   if the best fits within our desitred time range, output, otherwise
+        #       reset the distributions using the best as the new center, then repeat
+        # Create distribution of large and small
+
+        complete = False
+        iterations = 0
+        while not complete:
+            large_distribution = create_distribution(
+                large_window_center, spread_multiplier, parallelism
+            )
+            np.random.shuffle(large_distribution)
+            small_distribution = create_distribution(
+                small_window_center, spread_multiplier, parallelism
+            )
+            np.random.shuffle(small_distribution)
+
+            # Fire off workers to generate edits
+            moment_results = []
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                futures = []
+                pairs = list(zip(large_distribution, small_distribution))
+                for pair in pairs:
+                    futures.append(
+                        executor.submit(
+                            self.edit,
+                            pair[0] if pair[0] > pair[1] else pair[1],
+                            pair[1] if pair[0] > pair[1] else pair[0],
+                            vars(params),
+                        )
+                    )
+                failed = None
+                for future in concurrent.futures.as_completed(futures):
+                    try:
+                        moment_results.append(list(future.result()))
+                    except Exception as e:
+                        self.logger.exception("error during editing", error=e)
+                        failed = e
+                if failed is not None:
+                    raise failed
+            costs = []
+            durations = []
+            for result in moment_results:
+                total_duration = 0
+                result[0] = filter_moments(result[0], params['mindur'], params['maxdur'])
+                for moment in result[0]:
+                    total_duration = total_duration + moment.get_duration()
+                costs.append(costfunc(desired, total_duration))
+                durations.append(total_duration)
+            index_min = min(range(len(costs)), key=costs.__getitem__)
+            large_window_center = moment_results[index_min][1]
+            small_window_center = moment_results[index_min][2]
+            self.logger.info(
+                "batch complete",
+                best_large=large_window_center,
+                best_small=small_window_center,
+                duration=durations[index_min],
+            )
+            if (
+                durations[index_min] > desired * 0.95
+                and desired * 1.05 > durations[index_min]
+            ):
+                return moment_results[index_min][0]
+
+            iterations = iterations + parallelism
+            if iterations > 50000:
+                self.logger.warn(
+                    "could not find a viable edit in the target duration, try other params",
+                    target=desired,
+                )
+                return []
+            spread_multiplier = spread_multiplier - spread_decay
+            if spread_multiplier < 0:
+                self.logger.warn("spread reached 0, resetting")
+                large_window_center = random.uniform(30, 50)
+                small_window_center = random.uniform(5, 15)
+                spread_multiplier = random.uniform(0.15, 0.18)
+                spread_decay = random.uniform(0.0001, 0.001)
diff --git a/src/editors/sentiment/editor.py b/src/editors/sentiment/editor.py
index 16d1b9b..00e0f28 100644
--- a/src/editors/sentiment/editor.py
+++ b/src/editors/sentiment/editor.py
@@ -2,6 +2,9 @@ import json
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
+import random
+import concurrent.futures
+from ...math.distribution import create_distribution
 
 import numpy as np
 import structlog
@@ -11,6 +14,7 @@ from flair.models import TextClassifier
 
 from ...math.average import np_moving_average
 from ..common import find_moving_average_highlights
+from ...mediautils.video import filter_moments
 
 
 @dataclass
@@ -69,3 +73,100 @@ class SentimentEditor:
             short_ma, long_ma, 1.0 / window_factor
         )
         return highlights, large_window, small_window
+
+    def full_edit(self, costfunc, desired_time, params):
+        desired = desired_time
+
+        # Generate center of large window and small window size
+        large_window_center = random.uniform(30, 50)
+        small_window_center = random.uniform(5, 15)
+
+        # The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
+        spread_multiplier = random.uniform(0.15, 0.18)
+
+        # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
+        spread_decay = random.uniform(0.000001, 0.0001)
+
+        parallelism = params['parallelism']
+
+        # The main loop of the program starts here
+        #   we first create distributions
+        #   use workers to simultanously create many possible edits
+        #   find the best edit of the lot -> this is determined by lowest "cost"
+        #   if the best fits within our desitred time range, output, otherwise
+        #       reset the distributions using the best as the new center, then repeat
+        # Create distribution of large and small
+
+        complete = False
+        iterations = 0
+        while not complete:
+            large_distribution = create_distribution(
+                large_window_center, spread_multiplier, parallelism
+            )
+            np.random.shuffle(large_distribution)
+            small_distribution = create_distribution(
+                small_window_center, spread_multiplier, parallelism
+            )
+            np.random.shuffle(small_distribution)
+
+            # Fire off workers to generate edits
+            moment_results = []
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                futures = []
+                pairs = list(zip(large_distribution, small_distribution))
+                for pair in pairs:
+                    futures.append(
+                        executor.submit(
+                            self.edit,
+                            pair[0] if pair[0] > pair[1] else pair[1],
+                            pair[1] if pair[0] > pair[1] else pair[0],
+                            params,
+                        )
+                    )
+                failed = None
+                for future in concurrent.futures.as_completed(futures):
+                    try:
+                        moment_results.append(list(future.result()))
+                    except Exception as e:
+                        self.logger.exception("error during editing", error=e)
+                        failed = e
+                if failed is not None:
+                    raise failed
+            costs = []
+            durations = []
+            for result in moment_results:
+                total_duration = 0
+                result[0] = filter_moments(result[0], params['mindur'], params['maxdur'])
+                for moment in result[0]:
+                    total_duration = total_duration + moment.get_duration()
+                costs.append(costfunc(desired, total_duration))
+                durations.append(total_duration)
+            index_min = min(range(len(costs)), key=costs.__getitem__)
+            large_window_center = moment_results[index_min][1]
+            small_window_center = moment_results[index_min][2]
+            self.logger.info(
+                "batch complete",
+                best_large=large_window_center,
+                best_small=small_window_center,
+                duration=durations[index_min],
+            )
+            if (
+                durations[index_min] > desired * 0.95
+                and desired * 1.05 > durations[index_min]
+            ):
+                return moment_results[index_min][0]
+
+            iterations = iterations + parallelism
+            if iterations > 50000:
+                self.logger.warn(
+                    "could not find a viable edit in the target duration, try other params",
+                    target=desired,
+                )
+                return []
+            spread_multiplier = spread_multiplier - spread_decay
+            if spread_multiplier < 0:
+                self.logger.warn("spread reached 0, resetting")
+                large_window_center = random.uniform(30, 50)
+                small_window_center = random.uniform(5, 15)
+                spread_multiplier = random.uniform(0.15, 0.18)
+                spread_decay = random.uniform(0.0001, 0.001)