single iteration working

2022-10-20 23:43:07 -04:00 · 2022-10-20 23:43:07 -04:00 · 6e69bf6b67
commit 6e69bf6b67
18 changed files with 418 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/README.md
+++ b/README.md
@ -0,0 +1,4 @@
+# ALE
+## Automatic Linear Editor
+
+Give ALE a video and a target length, and ALE will automatically create a highlight reel for you.
--- a/init.py
+++ b/init.py
--- a/main.py
+++ b/main.py
@ -0,0 +1,146 @@
+import argparse
+import structlog
+from functools import partial
+from pathlib import Path
+import sys
+import hashlib
+import random
+import multiprocessing
+import concurrent.futures
+
+import numpy as np
+
+from src.mediautils.audio import extract_audio_from_video
+from src.editors.amplitude.editor import AmplitudeEditor
+from src.editors.sentiment.editor import SentimentEditor
+from src.math.distribution import create_distribution
+
+log = structlog.get_logger()
+
+EDITORS = {
+    'amplitude': AmplitudeEditor,
+    'sentiment': SentimentEditor
+}
+
+def main(args):
+    # Check video existance
+    input_file = args.file
+    in_vid_path = Path(input_file)
+    if not in_vid_path.is_file():
+        log.error("the specified input path does not exist", path=input_file)
+        sys.exit(-1)
+    log.info("preparing video", input_video=input_file)
+
+    # Hash the video, we use this to see if we have processed this video before
+    # and as a simple way to generate temp file names
+    sha1 = hashlib.sha1()
+    BUF_SIZE = 1655360
+    with open(in_vid_path, 'rb') as f:
+        while True:
+            data = f.read(BUF_SIZE)
+            if not data:
+                break
+            sha1.update(data)
+    temp_file_name = sha1.hexdigest()
+    log.info("hash computed", hash=temp_file_name)
+    temp_file_name = f"ale-{temp_file_name}"
+
+    # Prepare the input video
+    audio_path, audio_cached = extract_audio_from_video(str(in_vid_path.resolve()), temp_file_name)
+    if audio_cached:
+        log.info("using cached audio file", cache_path=audio_path)
+    else:
+        log.info("extracted audio", cache_path=audio_path)
+    
+    # Initalize Editor
+    log.info("initializing editor", editor=args.editor)
+    editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args))
+    log.info("initialized editor", editor=args.editor)
+
+    # Generate center of large window and small window size
+    large_window_center = random.uniform(30, 50)
+    small_window_center = random.uniform(5, 15)
+
+    # The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
+    spread_multiplier = random.uniform(0.15, 0.18)
+
+    # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
+    spread_decay = random.uniform(0.0001, 0.001)
+
+    log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay)
+    # Create distribution of large and small
+    parallelism = args.parallelism
+    large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism)
+    np.random.shuffle(large_distribution)
+    small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism)
+    np.random.shuffle(small_distribution)
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = []
+        pairs = list(zip(large_distribution, small_distribution))
+        for pair in pairs:
+            futures.append(
+                executor.submit(
+                    editor.edit,
+                    pair[0],
+                    pair[1],
+                    vars(args)
+                )
+            )
+        for future in concurrent.futures.as_completed(futures):
+            value = future.result()
+            log.info("got val", val=value)
+            
+    
+    desired = args.duration
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="ALE", description="ALE: Automatic Linear Editor.",
+        formatter_class=partial(argparse.HelpFormatter, width=100)
+    )
+    parser.add_argument('file', help='Path to the video file to edit')
+    parser.add_argument('duration', help='Target length of the edit, in seconds', type=int)
+    parser.add_argument('destination', help='Edited video save location')
+    subparsers = parser.add_subparsers(dest='editor', help='The editing algorithm to use')
+
+    parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.')
+    parser_audio_amp.add_argument(
+        "-f",
+        "--factor",
+        default=16000,
+        help="Subsampling factor",
+        dest="factor",
+        type=int,
+    )
+
+    parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments.')
+
+    parser.add_argument("-d", "--dryrun", dest="drun", action="store_true")
+    parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")
+
+    parser.add_argument(
+        "-i",
+        "--minduration",
+        default=8,
+        help="Minimum clip duration",
+        dest="mindur",
+        type=int,
+    )
+    parser.add_argument(
+        "-m",
+        "--maxduration",
+        default=15,
+        help="Maximum clip duration",
+        dest="maxdur",
+        type=int,
+    )
+
+    args = parser.parse_args()
+    try:
+        main(args)
+    except Exception:
+        log.exception("uncaught error!")
+        sys.exit(-2)
+    sys.exit(0)
--- a/src/init.py
+++ b/src/init.py
--- a/src/editors/init.py
+++ b/src/editors/init.py
--- a/src/editors/amplitude/init.py
+++ b/src/editors/amplitude/init.py
--- a/src/editors/amplitude/editor.py
+++ b/src/editors/amplitude/editor.py
@ -0,0 +1,29 @@
+from ...mediautils.audio import process_audio, resample
+from ...math.average import np_moving_average
+from ...models.moment import Moment
+from ..common import find_moving_average_highlights
+import numpy as np
+import structlog
+
+class AmplitudeEditor:
+    def __init__(self, video_path, audio_path, params):
+        self.logger = structlog.get_logger("amplitude")
+        self.video_path = video_path
+        self.audio_path = audio_path
+        self.logger.info("processing audio", path=self.audio_path)
+        data, duration, bitrate = process_audio(self.audio_path)
+        self.data = data
+        self.duration = duration
+        self.bitrate = bitrate
+        self.factor = params["factor"]
+        self.logger.info("resampling audio", factor=self.factor)
+        self.subsampled_data = resample(self.data, self.factor)
+        self.squared_subsample = np.square(self.subsampled_data)
+
+    def edit(self, large_window, small_window, params):
+        window_factor = self.bitrate / self.factor
+        long_ma = np_moving_average(self.squared_subsample, large_window * window_factor)
+        short_ma = np_moving_average(self.squared_subsample, small_window * window_factor)
+        highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate)
+        return highlights
+
--- a/src/editors/common.py
+++ b/src/editors/common.py
@ -0,0 +1,25 @@
+from ..models.moment import Moment
+
+def find_moving_average_highlights(short_ma, long_ma, scaling_factor=1):
+    in_a_clip = False
+    m = None
+    ret_list = []
+    for t in range(1, len(long_ma)):
+        if (
+            not in_a_clip
+            and (short_ma[t - 1] < long_ma[t - 1])
+            and (short_ma[t] > long_ma[t])
+        ):
+            in_a_clip = True
+            m = Moment(t * scaling_factor, 0)
+        elif (
+            in_a_clip
+            and (short_ma[t - 1] > long_ma[t - 1])
+            and (short_ma[t] < long_ma[t])
+        ):
+            in_a_clip = False
+            m.stop = t * scaling_factor
+            ret_list.append(m)
+            m = None
+
+    return ret_list
--- a/src/editors/sentiment/init.py
+++ b/src/editors/sentiment/init.py
--- a/src/editors/sentiment/editor.py
+++ b/src/editors/sentiment/editor.py
@ -0,0 +1,3 @@
+
+class SentimentEditor:
+    pass
--- a/src/math/init.py
+++ b/src/math/init.py
--- a/src/math/average.py
+++ b/src/math/average.py
@ -0,0 +1,4 @@
+import numpy as np
+
+def np_moving_average(data: int, window: int) -> np.ndarray:
+    return np.convolve(data, np.ones(int(window)), "valid") / window
--- a/src/math/distribution.py
+++ b/src/math/distribution.py
@ -0,0 +1,8 @@
+import numpy as np
+
+def create_distribution(center, spread, count):
+    high = center * (1.0 + spread)
+    low = center - (center * spread)
+    if low < 0:
+        low = 0
+    return np.random.uniform(low, high, count)
--- a/src/mediautils/init.py
+++ b/src/mediautils/init.py
--- a/src/mediautils/audio.py
+++ b/src/mediautils/audio.py
@ -0,0 +1,30 @@
+import tempfile
+import moviepy.editor as mp
+from pathlib import Path
+import numpy as np
+import scipy.io.wavfile as wav
+
+def extract_audio_from_video(
+    video_path: str,
+    filename: str
+):
+    tempdir = tempfile.gettempdir()
+
+    dest_location = f"{tempdir}/{filename}.wav"
+    if Path(dest_location).is_file():
+        return dest_location, True
+
+    vid = mp.VideoFileClip(video_path)
+    vid.audio.write_audiofile(dest_location, logger=None)
+    vid.close()
+    return dest_location, False
+
+def process_audio(source_audio_path):
+    rate, data_raw = wav.read(source_audio_path)
+    data_raw = data_raw.astype(np.int32)
+    mono = (data_raw[:, 0] + data_raw[:, 1]) / 2
+    duration = len(mono) / rate
+    return mono, duration, rate
+
+def resample(data: np.ndarray, factor: int) -> np.ndarray:
+    return data[::factor].copy()
--- a/src/models/init.py
+++ b/src/models/init.py
--- a/src/models/moment.py
+++ b/src/models/moment.py
@ -0,0 +1,9 @@
+
+class Moment:
+
+    def __init__(self, start, stop):
+        self.start = start
+        self.stop = stop
+
+    def get_duration(self):
+        return self.stop - self.start