From b49b05c4d3f26a9da9fc12264f0e53eed5997476 Mon Sep 17 00:00:00 2001 From: Tanishq Dubey Date: Thu, 16 Feb 2023 19:34:13 -0500 Subject: [PATCH] more cleanup --- main.py | 22 +++++++++------------- src/editors/sentiment/editor.py | 27 +++++++++++++++++++++------ src/mediautils/video.py | 5 ++++- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/main.py b/main.py index efd2e24..bcd5683 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,7 @@ import time import numpy as np from src.mediautils.audio import extract_audio_from_video -from src.mediautils.video import render_moments +from src.mediautils.video import render_moments, filter_moments from src.editors.amplitude.editor import AmplitudeEditor from src.editors.sentiment.editor import SentimentEditor from src.math.cost import quadratic_loss @@ -58,10 +58,12 @@ def main(args): log.info("using cached audio file", cache_path=audio_path) else: log.info("extracted audio", cache_path=audio_path) + params = vars(args) + params["temp_file_name"] = temp_file_name # Initalize Editor log.info("initializing editor", editor=args.editor) - editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args)) + editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, params) log.info("initialized editor", editor=args.editor) costfunc = ERROR_FUNCS[args.cost] desired = args.duration @@ -74,7 +76,7 @@ def main(args): spread_multiplier = random.uniform(0.15, 0.18) # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient - spread_decay = random.uniform(0.0001, 0.001) + spread_decay = random.uniform(0.000001, 0.0001) parallelism = args.parallelism @@ -89,7 +91,6 @@ def main(args): complete = False iterations = 0 while not complete: - log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay) large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism) np.random.shuffle(large_distribution) small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism) @@ -104,14 +105,14 @@ def main(args): futures.append( executor.submit( editor.edit, - pair[0], - pair[1], + pair[0] if pair[0] > pair[1] else pair[1], + pair[1] if pair[0] > pair[1] else pair[0], vars(args) ) ) for future in concurrent.futures.as_completed(futures): try: - moment_results.append(future.result()) + moment_results.append(list(future.result())) except Exception: log.exception("error during editing") sys.exit(-2) @@ -120,6 +121,7 @@ def main(args): durations = [] for result in moment_results: total_duration = 0 + result[0] = filter_moments(result[0], args.mindur, args.maxdur) for moment in result[0]: total_duration = total_duration + moment.get_duration() costs.append(costfunc(desired, total_duration)) @@ -147,12 +149,6 @@ def main(args): small_window_center = random.uniform(5, 15) spread_multiplier = random.uniform(0.15, 0.18) spread_decay = random.uniform(0.0001, 0.001) - - - - - - if __name__ == "__main__": diff --git a/src/editors/sentiment/editor.py b/src/editors/sentiment/editor.py index 9c32ba0..62bbdc8 100644 --- a/src/editors/sentiment/editor.py +++ b/src/editors/sentiment/editor.py @@ -1,4 +1,7 @@ import whisper +import json +from pathlib import Path +import tempfile import numpy as np import structlog @@ -19,10 +22,22 @@ class TextGlob: class SentimentEditor: def __init__(self, video_path, audio_path, params): self.logger = structlog.get_logger("sentiment") - self.logger.info("loading whisper model", size=params["model_size"]) - self.model = whisper.load_model(params["model_size"]) - self.logger.info("transcribing audio", path=audio_path) - self.result = self.model.transcribe(audio_path) + tempdir = tempfile.gettempdir() + dest_location = f"{tempdir}/{params['temp_file_name']}-{params['model_size']}-sentiment.json" + if not Path(dest_location).is_file(): + self.logger.info("loading whisper model", size=params["model_size"]) + self.model = whisper.load_model(params["model_size"]) + self.logger.info("transcribing audio", path=audio_path) + self.result = self.model.transcribe(audio_path) + + with open(dest_location, 'w') as fp: + json.dump(self.result, fp) + else: + self.logger.info("cached transcription found", path=dest_location) + + with open(dest_location, 'r') as f: + self.result = json.load(f) + self.segments = [] for segment in self.result['segments']: self.segments.append(TextGlob(segment['start'], segment['end'], segment['text'], 0)) @@ -42,7 +57,7 @@ class SentimentEditor: def edit(self, large_window, small_window, params): end_time = self.segments[-1].stop window_factor = len(self.sentiments) / end_time - long_ma = np_moving_average(self.squared_subsample, large_window * window_factor) - short_ma = np_moving_average(self.squared_subsample, small_window * window_factor) + long_ma = np_moving_average(self.sentiments, large_window) + short_ma = np_moving_average(self.sentiments, small_window) highlights = find_moving_average_highlights(short_ma, long_ma, 1.0 / window_factor) return highlights, large_window, small_window diff --git a/src/mediautils/video.py b/src/mediautils/video.py index 1e220fa..59d97c7 100644 --- a/src/mediautils/video.py +++ b/src/mediautils/video.py @@ -9,5 +9,8 @@ def get_subclips(source_video_path, moments): def render_moments(moments, input_video_path, output_path): clips, vid = get_subclips(input_video_path, moments) - to_render = mp.concatenate_videoclips(clips, logger=None) + to_render = mp.concatenate_videoclips(clips) to_render.write_videofile(output_path, logger=None) + +def filter_moments(moments, min_length, max_length): + return [m for m in moments if m.get_duration() > min_length and m.get_duration() < max_length]