diff --git a/main.py b/main.py index f15b30e..d8d03cf 100644 --- a/main.py +++ b/main.py @@ -24,14 +24,10 @@ from src.math.distribution import create_distribution log = structlog.get_logger() -EDITORS = { - 'amplitude': AmplitudeEditor, - 'sentiment': SentimentEditor -} +EDITORS = {"amplitude": AmplitudeEditor, "sentiment": SentimentEditor} + +ERROR_FUNCS = {"quadratic": quadratic_loss} -ERROR_FUNCS = { - 'quadratic': quadratic_loss -} def main(args): # Check video existance @@ -46,7 +42,9 @@ def main(args): if intro_file is not None: intro_vid_path = Path(intro_file) if not in_vid_path.is_file(): - log.error("the specified input path does not exist for the intro", path=intro_file) + log.error( + "the specified input path does not exist for the intro", path=intro_file + ) sys.exit(-1) log.info("found intro", input_video=intro_file) @@ -54,7 +52,9 @@ def main(args): if outro_file is not None: outro_vid_path = Path(outro_file) if not outro_vid_path.is_file(): - log.error("the specified input path does not exist for the outro", path=outro_file) + log.error( + "the specified input path does not exist for the outro", path=outro_file + ) sys.exit(-1) log.info("found outro", input_video=outro_file) @@ -62,7 +62,7 @@ def main(args): # and as a simple way to generate temp file names sha1 = hashlib.sha1() BUF_SIZE = 1655360 - with open(in_vid_path, 'rb') as f: + with open(in_vid_path, "rb") as f: while True: data = f.read(BUF_SIZE) if not data: @@ -73,14 +73,16 @@ def main(args): temp_file_name = f"ale-{temp_file_name}" # Prepare the input video - audio_path, audio_cached = extract_audio_from_video(str(in_vid_path.resolve()), temp_file_name) + audio_path, audio_cached = extract_audio_from_video( + str(in_vid_path.resolve()), temp_file_name + ) if audio_cached: log.info("using cached audio file", cache_path=audio_path) else: log.info("extracted audio", cache_path=audio_path) params = vars(args) params["temp_file_name"] = temp_file_name - + # Initalize Editor log.info("initializing editor", editor=args.editor) editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, params) @@ -111,9 +113,13 @@ def main(args): complete = False iterations = 0 while not complete: - large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism) + large_distribution = create_distribution( + large_window_center, spread_multiplier, parallelism + ) np.random.shuffle(large_distribution) - small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism) + small_distribution = create_distribution( + small_window_center, spread_multiplier, parallelism + ) np.random.shuffle(small_distribution) # Fire off workers to generate edits @@ -127,7 +133,7 @@ def main(args): editor.edit, pair[0] if pair[0] > pair[1] else pair[1], pair[1] if pair[0] > pair[1] else pair[0], - vars(args) + vars(args), ) ) for future in concurrent.futures.as_completed(futures): @@ -143,24 +149,49 @@ def main(args): total_duration = 0 result[0] = filter_moments(result[0], args.mindur, args.maxdur) for moment in result[0]: - total_duration = total_duration + moment.get_duration() + total_duration = total_duration + moment.get_duration() costs.append(costfunc(desired, total_duration)) durations.append(total_duration) index_min = min(range(len(costs)), key=costs.__getitem__) large_window_center = moment_results[index_min][1] small_window_center = moment_results[index_min][2] - log.info("batch complete", best_large=large_window_center, best_small=small_window_center, duration=durations[index_min]) - if durations[index_min] > desired * 0.95 and desired * 1.05 > durations[index_min]: - log.info("found edit within target duration", target=desired, duration=durations[index_min]) + log.info( + "batch complete", + best_large=large_window_center, + best_small=small_window_center, + duration=durations[index_min], + ) + if ( + durations[index_min] > desired * 0.95 + and desired * 1.05 > durations[index_min] + ): + log.info( + "found edit within target duration", + target=desired, + duration=durations[index_min], + ) out_path = Path(args.destination) log.info("rendering...") start = time.time() - render_moments(moment_results[index_min][0], str(in_vid_path.resolve()), str(out_path.resolve()), intro_path=intro_file, parallelism=args.parallelism) - log.info("render complete", duration=time.time() - start, output=str(out_path.resolve())) + render_moments( + moment_results[index_min][0], + str(in_vid_path.resolve()), + str(out_path.resolve()), + intro_path=intro_file, + parallelism=args.parallelism, + ) + log.info( + "render complete", + duration=time.time() - start, + output=str(out_path.resolve()), + ) sys.exit(0) iterations = iterations + parallelism if iterations > 50000: - log.error("could not find a viable edit in the target duration, try other params", target=desired) + log.error( + "could not find a viable edit in the target duration, try other params", + target=desired, + ) sys.exit(-4) spread_multiplier = spread_multiplier - spread_decay if spread_multiplier < 0: @@ -173,15 +204,23 @@ def main(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - prog="ALE", description="ALE: Automatic Linear Editor.", - formatter_class=partial(argparse.HelpFormatter, width=100) + prog="ALE", + description="ALE: Automatic Linear Editor.", + formatter_class=partial(argparse.HelpFormatter, width=100), + ) + parser.add_argument("file", help="Path to the video file to edit") + parser.add_argument( + "duration", help="Target length of the edit, in seconds", type=int + ) + parser.add_argument("destination", help="Edited video save location") + subparsers = parser.add_subparsers( + dest="editor", help="The editing algorithm to use" ) - parser.add_argument('file', help='Path to the video file to edit') - parser.add_argument('duration', help='Target length of the edit, in seconds', type=int) - parser.add_argument('destination', help='Edited video save location') - subparsers = parser.add_subparsers(dest='editor', help='The editing algorithm to use') - parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.') + parser_audio_amp = subparsers.add_parser( + "amplitude", + help="The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.", + ) parser_audio_amp.add_argument( "--factor", default=16000, @@ -190,7 +229,10 @@ if __name__ == "__main__": type=int, ) - parser_sentiment = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.') + parser_sentiment = subparsers.add_parser( + "sentiment", + help="The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.", + ) parser_sentiment.add_argument( "--model", default="base", @@ -199,8 +241,17 @@ if __name__ == "__main__": choices=["base", "tiny", "small", "medium", "large"], ) - parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.") - parser.add_argument("--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default='quadratic') + parser.add_argument( + "-p", + "--parallelism", + dest="parallelism", + type=int, + default=multiprocessing.cpu_count() - 2, + help="The number of cores to use, defaults to N - 2 cores.", + ) + parser.add_argument( + "--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default="quadratic" + ) parser.add_argument( "--min-duration", diff --git a/src/editors/amplitude/editor.py b/src/editors/amplitude/editor.py index 407bfe2..d69760a 100644 --- a/src/editors/amplitude/editor.py +++ b/src/editors/amplitude/editor.py @@ -4,6 +4,7 @@ from ..common import find_moving_average_highlights import numpy as np import structlog + class AmplitudeEditor: def __init__(self, video_path, audio_path, params): self.logger = structlog.get_logger("amplitude") @@ -21,8 +22,13 @@ class AmplitudeEditor: def edit(self, large_window, small_window, params): window_factor = self.bitrate / self.factor - long_ma = np_moving_average(self.squared_subsample, large_window * window_factor) - short_ma = np_moving_average(self.squared_subsample, small_window * window_factor) - highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate) + long_ma = np_moving_average( + self.squared_subsample, large_window * window_factor + ) + short_ma = np_moving_average( + self.squared_subsample, small_window * window_factor + ) + highlights = find_moving_average_highlights( + short_ma, long_ma, self.factor / self.bitrate + ) return highlights, large_window, small_window - diff --git a/src/editors/common.py b/src/editors/common.py index cf6960c..0d0c68d 100644 --- a/src/editors/common.py +++ b/src/editors/common.py @@ -1,5 +1,6 @@ from ..models.moment import Moment + def find_moving_average_highlights(short_ma, long_ma, scaling_factor=1): in_a_clip = False m = None diff --git a/src/editors/sentiment/editor.py b/src/editors/sentiment/editor.py index 62bbdc8..6b87c33 100644 --- a/src/editors/sentiment/editor.py +++ b/src/editors/sentiment/editor.py @@ -12,12 +12,14 @@ from flair.data import Sentence from ...math.average import np_moving_average from ..common import find_moving_average_highlights + @dataclass class TextGlob: - start:float - stop:float - text:str - sentiment:float + start: float + stop: float + text: str + sentiment: float + class SentimentEditor: def __init__(self, video_path, audio_path, params): @@ -30,20 +32,24 @@ class SentimentEditor: self.logger.info("transcribing audio", path=audio_path) self.result = self.model.transcribe(audio_path) - with open(dest_location, 'w') as fp: + with open(dest_location, "w") as fp: json.dump(self.result, fp) else: self.logger.info("cached transcription found", path=dest_location) - with open(dest_location, 'r') as f: - self.result = json.load(f) + with open(dest_location, "r") as f: + self.result = json.load(f) self.segments = [] - for segment in self.result['segments']: - self.segments.append(TextGlob(segment['start'], segment['end'], segment['text'], 0)) - classifier = TextClassifier.load('en-sentiment') + for segment in self.result["segments"]: + self.segments.append( + TextGlob(segment["start"], segment["end"], segment["text"], 0) + ) + classifier = TextClassifier.load("en-sentiment") self.sentiments = [] - self.logger.info("calculating sentiment on segments", segments=len(self.segments)) + self.logger.info( + "calculating sentiment on segments", segments=len(self.segments) + ) for segment in self.segments: sentence = Sentence(segment.text) classifier.predict(sentence) @@ -53,11 +59,13 @@ class SentimentEditor: segment.sentiment = sentsum self.sentiments.append(sentsum) self.sentiments = np.array(self.sentiments) - + def edit(self, large_window, small_window, params): end_time = self.segments[-1].stop window_factor = len(self.sentiments) / end_time long_ma = np_moving_average(self.sentiments, large_window) short_ma = np_moving_average(self.sentiments, small_window) - highlights = find_moving_average_highlights(short_ma, long_ma, 1.0 / window_factor) + highlights = find_moving_average_highlights( + short_ma, long_ma, 1.0 / window_factor + ) return highlights, large_window, small_window diff --git a/src/math/average.py b/src/math/average.py index 6e1ccf2..e56aa6f 100644 --- a/src/math/average.py +++ b/src/math/average.py @@ -1,4 +1,5 @@ import numpy as np + def np_moving_average(data: int, window: int) -> np.ndarray: return np.convolve(data, np.ones(int(window)), "valid") / window diff --git a/src/math/cost.py b/src/math/cost.py index f374a56..1732525 100644 --- a/src/math/cost.py +++ b/src/math/cost.py @@ -3,5 +3,6 @@ Functions in this file should always target for 0 to be the lowest possible error -> smaller values win """ + def quadratic_loss(target, result): - return (target - result)**2.0 + return (target - result) ** 2.0 diff --git a/src/math/distribution.py b/src/math/distribution.py index e419edc..e4876a2 100644 --- a/src/math/distribution.py +++ b/src/math/distribution.py @@ -1,5 +1,6 @@ import numpy as np + def create_distribution(center, spread, count): high = center * (1.0 + spread) low = center - (center * spread) diff --git a/src/mediautils/audio.py b/src/mediautils/audio.py index 31de2c8..f191106 100644 --- a/src/mediautils/audio.py +++ b/src/mediautils/audio.py @@ -4,10 +4,8 @@ from pathlib import Path import numpy as np import scipy.io.wavfile as wav -def extract_audio_from_video( - video_path: str, - filename: str -): + +def extract_audio_from_video(video_path: str, filename: str): tempdir = tempfile.gettempdir() dest_location = f"{tempdir}/{filename}.wav" @@ -19,6 +17,7 @@ def extract_audio_from_video( vid.close() return dest_location, False + def process_audio(source_audio_path): rate, data_raw = wav.read(source_audio_path) data_raw = data_raw.astype(np.int32) @@ -26,5 +25,6 @@ def process_audio(source_audio_path): duration = len(mono) / rate return mono, duration, rate + def resample(data: np.ndarray, factor: int) -> np.ndarray: return data[::factor].copy() diff --git a/src/mediautils/video.py b/src/mediautils/video.py index b2737fc..ead8de0 100644 --- a/src/mediautils/video.py +++ b/src/mediautils/video.py @@ -9,16 +9,27 @@ def get_subclips(source_video_path, moments): return clips, vid -def render_moments(moments, input_video_path, output_path, intro_path=None, outro_path=None, parallelism=1): +def render_moments( + moments, + input_video_path, + output_path, + intro_path=None, + outro_path=None, + parallelism=1, +): clips, _ = get_subclips(input_video_path, moments) if intro_path is not None: size = clips[0].size iclip = mp.VideoFileClip(intro_path) iclip.resize(height=size[1]) clips.insert(0, iclip) - composite = mp.concatenate_videoclips(clips, method='compose') + composite = mp.concatenate_videoclips(clips, method="compose") composite.write_videofile(output_path, logger=None, threads=parallelism) def filter_moments(moments, min_length, max_length): - return [m for m in moments if m.get_duration() > min_length and m.get_duration() < max_length] + return [ + m + for m in moments + if m.get_duration() > min_length and m.get_duration() < max_length + ] diff --git a/src/models/moment.py b/src/models/moment.py index a41e251..2e1ae30 100644 --- a/src/models/moment.py +++ b/src/models/moment.py @@ -1,6 +1,4 @@ - class Moment: - def __init__(self, start, stop): self.start = start self.stop = stop diff --git a/src/utils/prereq.py b/src/utils/prereq.py index e9b9231..1f8c8f2 100644 --- a/src/utils/prereq.py +++ b/src/utils/prereq.py @@ -9,12 +9,18 @@ log = structlog.get_logger() def install_ffmpeg(): - if not click.confirm('Do you want to install ffmpeg? It is required for ALE.', default=False): - log.warn("ffmpeg not installed. Please install it manually or restart ALE. Exiting...") + if not click.confirm( + "Do you want to install ffmpeg? It is required for ALE.", default=False + ): + log.warn( + "ffmpeg not installed. Please install it manually or restart ALE. Exiting..." + ) sys.exit(0) system = platform.system().lower() if system == "linux": - package_manager = "apt-get" if os.path.exists("/etc/apt/sources.list") else "yum" + package_manager = ( + "apt-get" if os.path.exists("/etc/apt/sources.list") else "yum" + ) command = f"sudo {package_manager} install -y ffmpeg" elif system == "darwin": command = "brew install ffmpeg"