single iteration working

This commit is contained in:
Tanishq Dubey 2022-10-20 23:43:07 -04:00
commit 6e69bf6b67
18 changed files with 418 additions and 0 deletions

160
.gitignore vendored Normal file
View File

@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

4
README.md Normal file
View File

@ -0,0 +1,4 @@
# ALE
## Automatic Linear Editor
Give ALE a video and a target length, and ALE will automatically create a highlight reel for you.

0
__init__.py Normal file
View File

146
main.py Normal file
View File

@ -0,0 +1,146 @@
import argparse
import structlog
from functools import partial
from pathlib import Path
import sys
import hashlib
import random
import multiprocessing
import concurrent.futures
import numpy as np
from src.mediautils.audio import extract_audio_from_video
from src.editors.amplitude.editor import AmplitudeEditor
from src.editors.sentiment.editor import SentimentEditor
from src.math.distribution import create_distribution
log = structlog.get_logger()
EDITORS = {
'amplitude': AmplitudeEditor,
'sentiment': SentimentEditor
}
def main(args):
# Check video existance
input_file = args.file
in_vid_path = Path(input_file)
if not in_vid_path.is_file():
log.error("the specified input path does not exist", path=input_file)
sys.exit(-1)
log.info("preparing video", input_video=input_file)
# Hash the video, we use this to see if we have processed this video before
# and as a simple way to generate temp file names
sha1 = hashlib.sha1()
BUF_SIZE = 1655360
with open(in_vid_path, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
sha1.update(data)
temp_file_name = sha1.hexdigest()
log.info("hash computed", hash=temp_file_name)
temp_file_name = f"ale-{temp_file_name}"
# Prepare the input video
audio_path, audio_cached = extract_audio_from_video(str(in_vid_path.resolve()), temp_file_name)
if audio_cached:
log.info("using cached audio file", cache_path=audio_path)
else:
log.info("extracted audio", cache_path=audio_path)
# Initalize Editor
log.info("initializing editor", editor=args.editor)
editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args))
log.info("initialized editor", editor=args.editor)
# Generate center of large window and small window size
large_window_center = random.uniform(30, 50)
small_window_center = random.uniform(5, 15)
# The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
spread_multiplier = random.uniform(0.15, 0.18)
# The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
spread_decay = random.uniform(0.0001, 0.001)
log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay)
# Create distribution of large and small
parallelism = args.parallelism
large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism)
np.random.shuffle(large_distribution)
small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism)
np.random.shuffle(small_distribution)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
pairs = list(zip(large_distribution, small_distribution))
for pair in pairs:
futures.append(
executor.submit(
editor.edit,
pair[0],
pair[1],
vars(args)
)
)
for future in concurrent.futures.as_completed(futures):
value = future.result()
log.info("got val", val=value)
desired = args.duration
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="ALE", description="ALE: Automatic Linear Editor.",
formatter_class=partial(argparse.HelpFormatter, width=100)
)
parser.add_argument('file', help='Path to the video file to edit')
parser.add_argument('duration', help='Target length of the edit, in seconds', type=int)
parser.add_argument('destination', help='Edited video save location')
subparsers = parser.add_subparsers(dest='editor', help='The editing algorithm to use')
parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.')
parser_audio_amp.add_argument(
"-f",
"--factor",
default=16000,
help="Subsampling factor",
dest="factor",
type=int,
)
parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments.')
parser.add_argument("-d", "--dryrun", dest="drun", action="store_true")
parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")
parser.add_argument(
"-i",
"--minduration",
default=8,
help="Minimum clip duration",
dest="mindur",
type=int,
)
parser.add_argument(
"-m",
"--maxduration",
default=15,
help="Maximum clip duration",
dest="maxdur",
type=int,
)
args = parser.parse_args()
try:
main(args)
except Exception:
log.exception("uncaught error!")
sys.exit(-2)
sys.exit(0)

0
src/__init__.py Normal file
View File

0
src/editors/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,29 @@
from ...mediautils.audio import process_audio, resample
from ...math.average import np_moving_average
from ...models.moment import Moment
from ..common import find_moving_average_highlights
import numpy as np
import structlog
class AmplitudeEditor:
def __init__(self, video_path, audio_path, params):
self.logger = structlog.get_logger("amplitude")
self.video_path = video_path
self.audio_path = audio_path
self.logger.info("processing audio", path=self.audio_path)
data, duration, bitrate = process_audio(self.audio_path)
self.data = data
self.duration = duration
self.bitrate = bitrate
self.factor = params["factor"]
self.logger.info("resampling audio", factor=self.factor)
self.subsampled_data = resample(self.data, self.factor)
self.squared_subsample = np.square(self.subsampled_data)
def edit(self, large_window, small_window, params):
window_factor = self.bitrate / self.factor
long_ma = np_moving_average(self.squared_subsample, large_window * window_factor)
short_ma = np_moving_average(self.squared_subsample, small_window * window_factor)
highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate)
return highlights

25
src/editors/common.py Normal file
View File

@ -0,0 +1,25 @@
from ..models.moment import Moment
def find_moving_average_highlights(short_ma, long_ma, scaling_factor=1):
in_a_clip = False
m = None
ret_list = []
for t in range(1, len(long_ma)):
if (
not in_a_clip
and (short_ma[t - 1] < long_ma[t - 1])
and (short_ma[t] > long_ma[t])
):
in_a_clip = True
m = Moment(t * scaling_factor, 0)
elif (
in_a_clip
and (short_ma[t - 1] > long_ma[t - 1])
and (short_ma[t] < long_ma[t])
):
in_a_clip = False
m.stop = t * scaling_factor
ret_list.append(m)
m = None
return ret_list

View File

View File

@ -0,0 +1,3 @@
class SentimentEditor:
pass

0
src/math/__init__.py Normal file
View File

4
src/math/average.py Normal file
View File

@ -0,0 +1,4 @@
import numpy as np
def np_moving_average(data: int, window: int) -> np.ndarray:
return np.convolve(data, np.ones(int(window)), "valid") / window

8
src/math/distribution.py Normal file
View File

@ -0,0 +1,8 @@
import numpy as np
def create_distribution(center, spread, count):
high = center * (1.0 + spread)
low = center - (center * spread)
if low < 0:
low = 0
return np.random.uniform(low, high, count)

View File

30
src/mediautils/audio.py Normal file
View File

@ -0,0 +1,30 @@
import tempfile
import moviepy.editor as mp
from pathlib import Path
import numpy as np
import scipy.io.wavfile as wav
def extract_audio_from_video(
video_path: str,
filename: str
):
tempdir = tempfile.gettempdir()
dest_location = f"{tempdir}/{filename}.wav"
if Path(dest_location).is_file():
return dest_location, True
vid = mp.VideoFileClip(video_path)
vid.audio.write_audiofile(dest_location, logger=None)
vid.close()
return dest_location, False
def process_audio(source_audio_path):
rate, data_raw = wav.read(source_audio_path)
data_raw = data_raw.astype(np.int32)
mono = (data_raw[:, 0] + data_raw[:, 1]) / 2
duration = len(mono) / rate
return mono, duration, rate
def resample(data: np.ndarray, factor: int) -> np.ndarray:
return data[::factor].copy()

0
src/models/__init__.py Normal file
View File

9
src/models/moment.py Normal file
View File

@ -0,0 +1,9 @@
class Moment:
def __init__(self, start, stop):
self.start = start
self.stop = stop
def get_duration(self):
return self.stop - self.start