306 lines
10 KiB
Python
306 lines
10 KiB
Python
import argparse
|
|
import concurrent.futures
|
|
import hashlib
|
|
import multiprocessing
|
|
import random
|
|
import sys
|
|
import time
|
|
from functools import partial
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import structlog
|
|
|
|
from src.utils.prereq import check_ffmpeg, install_ffmpeg
|
|
|
|
check_ffmpeg()
|
|
|
|
from src.editors.amplitude.editor import AmplitudeEditor
|
|
from src.editors.sentiment.editor import SentimentEditor
|
|
from src.math.cost import quadratic_loss
|
|
from src.math.distribution import create_distribution
|
|
from src.mediautils.audio import extract_audio_from_video
|
|
from src.mediautils.video import filter_moments, render_moments
|
|
|
|
log = structlog.get_logger()
|
|
|
|
EDITORS = {"amplitude": AmplitudeEditor, "sentiment": SentimentEditor}
|
|
|
|
ERROR_FUNCS = {"quadratic": quadratic_loss}
|
|
|
|
|
|
def main(args):
|
|
# Check video existance
|
|
input_file = args.file
|
|
in_vid_path = Path(input_file)
|
|
if not in_vid_path.is_file():
|
|
log.error("the specified input path does not exist", path=input_file)
|
|
sys.exit(-1)
|
|
log.info("preparing video", input_video=input_file)
|
|
|
|
intro_file = args.introvidpath
|
|
if intro_file is not None:
|
|
intro_vid_path = Path(intro_file)
|
|
if not in_vid_path.is_file():
|
|
log.error(
|
|
"the specified input path does not exist for the intro", path=intro_file
|
|
)
|
|
sys.exit(-1)
|
|
log.info("found intro", input_video=intro_file)
|
|
|
|
outro_file = args.outrovidpath
|
|
if outro_file is not None:
|
|
outro_vid_path = Path(outro_file)
|
|
if not outro_vid_path.is_file():
|
|
log.error(
|
|
"the specified input path does not exist for the outro", path=outro_file
|
|
)
|
|
sys.exit(-1)
|
|
log.info("found outro", input_video=outro_file)
|
|
|
|
# Hash the video, we use this to see if we have processed this video before
|
|
# and as a simple way to generate temp file names
|
|
sha1 = hashlib.sha1()
|
|
BUF_SIZE = 1655360
|
|
with open(in_vid_path, "rb") as f:
|
|
while True:
|
|
data = f.read(BUF_SIZE)
|
|
if not data:
|
|
break
|
|
sha1.update(data)
|
|
temp_file_name = sha1.hexdigest()
|
|
log.info("hash computed", hash=temp_file_name)
|
|
temp_file_name = f"ale-{temp_file_name}"
|
|
|
|
# Prepare the input video
|
|
audio_path, audio_cached = extract_audio_from_video(
|
|
str(in_vid_path.resolve()), temp_file_name
|
|
)
|
|
if audio_cached:
|
|
log.info("using cached audio file", cache_path=audio_path)
|
|
else:
|
|
log.info("extracted audio", cache_path=audio_path)
|
|
params = vars(args)
|
|
params["temp_file_name"] = temp_file_name
|
|
|
|
# Initalize Editor
|
|
log.info("initializing editor", editor=args.editor)
|
|
editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, params)
|
|
log.info("initialized editor", editor=args.editor)
|
|
costfunc = ERROR_FUNCS[args.cost]
|
|
desired = args.duration
|
|
|
|
# Generate center of large window and small window size
|
|
large_window_center = random.uniform(30, 50)
|
|
small_window_center = random.uniform(5, 15)
|
|
|
|
# The spread multiplier, or epsilon, slowly decays as we approach the center of the gradient
|
|
spread_multiplier = random.uniform(0.15, 0.18)
|
|
|
|
# The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
|
|
spread_decay = random.uniform(0.000001, 0.0001)
|
|
|
|
parallelism = args.parallelism
|
|
|
|
# The main loop of the program starts here
|
|
# we first create distributions
|
|
# use workers to simultanously create many possible edits
|
|
# find the best edit of the lot -> this is determined by lowest "cost"
|
|
# if the best fits within our desitred time range, output, otherwise
|
|
# reset the distributions using the best as the new center, then repeat
|
|
# Create distribution of large and small
|
|
|
|
complete = False
|
|
iterations = 0
|
|
while not complete:
|
|
large_distribution = create_distribution(
|
|
large_window_center, spread_multiplier, parallelism
|
|
)
|
|
np.random.shuffle(large_distribution)
|
|
small_distribution = create_distribution(
|
|
small_window_center, spread_multiplier, parallelism
|
|
)
|
|
np.random.shuffle(small_distribution)
|
|
|
|
# Fire off workers to generate edits
|
|
moment_results = []
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
futures = []
|
|
pairs = list(zip(large_distribution, small_distribution))
|
|
for pair in pairs:
|
|
futures.append(
|
|
executor.submit(
|
|
editor.edit,
|
|
pair[0] if pair[0] > pair[1] else pair[1],
|
|
pair[1] if pair[0] > pair[1] else pair[0],
|
|
vars(args),
|
|
)
|
|
)
|
|
for future in concurrent.futures.as_completed(futures):
|
|
try:
|
|
moment_results.append(list(future.result()))
|
|
except Exception:
|
|
log.exception("error during editing")
|
|
sys.exit(-2)
|
|
moment_results
|
|
costs = []
|
|
durations = []
|
|
for result in moment_results:
|
|
total_duration = 0
|
|
result[0] = filter_moments(result[0], args.mindur, args.maxdur)
|
|
for moment in result[0]:
|
|
total_duration = total_duration + moment.get_duration()
|
|
costs.append(costfunc(desired, total_duration))
|
|
durations.append(total_duration)
|
|
index_min = min(range(len(costs)), key=costs.__getitem__)
|
|
large_window_center = moment_results[index_min][1]
|
|
small_window_center = moment_results[index_min][2]
|
|
log.info(
|
|
"batch complete",
|
|
best_large=large_window_center,
|
|
best_small=small_window_center,
|
|
duration=durations[index_min],
|
|
)
|
|
if (
|
|
durations[index_min] > desired * 0.95
|
|
and desired * 1.05 > durations[index_min]
|
|
):
|
|
log.info(
|
|
"found edit within target duration",
|
|
target=desired,
|
|
duration=durations[index_min],
|
|
)
|
|
out_path = Path(args.destination)
|
|
log.info("rendering...")
|
|
start = time.time()
|
|
render_moments(
|
|
moment_results[index_min][0],
|
|
str(in_vid_path.resolve()),
|
|
str(out_path.resolve()),
|
|
intro_path=intro_file,
|
|
parallelism=args.parallelism,
|
|
)
|
|
log.info(
|
|
"render complete",
|
|
duration=time.time() - start,
|
|
output=str(out_path.resolve()),
|
|
)
|
|
sys.exit(0)
|
|
iterations = iterations + parallelism
|
|
if iterations > 50000:
|
|
log.error(
|
|
"could not find a viable edit in the target duration, try other params",
|
|
target=desired,
|
|
)
|
|
sys.exit(-4)
|
|
spread_multiplier = spread_multiplier - spread_decay
|
|
if spread_multiplier < 0:
|
|
log.warn("spread reached 0, resetting")
|
|
large_window_center = random.uniform(30, 50)
|
|
small_window_center = random.uniform(5, 15)
|
|
spread_multiplier = random.uniform(0.15, 0.18)
|
|
spread_decay = random.uniform(0.0001, 0.001)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
prog="ALE",
|
|
description="ALE: Automatic Linear Editor.",
|
|
formatter_class=partial(argparse.HelpFormatter, width=100),
|
|
)
|
|
parser.add_argument("file", help="Path to the video file to edit")
|
|
parser.add_argument(
|
|
"duration", help="Target length of the edit, in seconds", type=int
|
|
)
|
|
parser.add_argument("destination", help="Edited video save location")
|
|
subparsers = parser.add_subparsers(
|
|
dest="editor", help="The editing algorithm to use"
|
|
)
|
|
|
|
parser_audio_amp = subparsers.add_parser(
|
|
"amplitude",
|
|
help="The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.",
|
|
)
|
|
parser_audio_amp.add_argument(
|
|
"--factor",
|
|
default=16000,
|
|
help="Subsampling factor",
|
|
dest="factor",
|
|
type=int,
|
|
)
|
|
|
|
parser_sentiment = subparsers.add_parser(
|
|
"sentiment",
|
|
help="The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.",
|
|
)
|
|
parser_sentiment.add_argument(
|
|
"--model",
|
|
default="base",
|
|
help="The size of the sentiment analysis model being used. Larger models increase computation time.",
|
|
dest="model_size",
|
|
choices=["base", "tiny", "small", "medium", "large"],
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-p",
|
|
"--parallelism",
|
|
dest="parallelism",
|
|
type=int,
|
|
default=multiprocessing.cpu_count() - 2,
|
|
help="The number of cores to use, defaults to N - 2 cores.",
|
|
)
|
|
parser.add_argument(
|
|
"--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default="quadratic"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--min-duration",
|
|
default=8,
|
|
help="Minimum clip duration",
|
|
dest="mindur",
|
|
type=int,
|
|
)
|
|
parser.add_argument(
|
|
"--max-duration",
|
|
default=15,
|
|
help="Maximum clip duration",
|
|
dest="maxdur",
|
|
type=int,
|
|
)
|
|
parser.add_argument(
|
|
"--intro-video",
|
|
default=None,
|
|
help="Path to a video file to use as an intro",
|
|
dest="introvidpath",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--outro-video",
|
|
default=None,
|
|
help="Path to a video file to use as an outro",
|
|
dest="outrovidpath",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--title-slide-text",
|
|
default=None,
|
|
help="Text to show during a title slide. The title slide is played after any provided intro. If there is no intro, the title slide is the intro",
|
|
dest="outrovidpath",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--title-slide-duration",
|
|
default=5,
|
|
help="Time, in seconds, to show a title slide for",
|
|
dest="outrovidpath",
|
|
type=str,
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
try:
|
|
main(args)
|
|
except Exception:
|
|
log.exception("uncaught error!")
|
|
sys.exit(-2)
|
|
sys.exit(0)
|