basic iterations working, need to do duration mapping

This commit is contained in:
Tanishq Dubey 2022-10-21 20:25:46 -04:00
parent 6e69bf6b67
commit 80d8845eda
2 changed files with 84 additions and 26 deletions

76
main.py
View File

@ -7,12 +7,15 @@ import hashlib
import random import random
import multiprocessing import multiprocessing
import concurrent.futures import concurrent.futures
import time
import numpy as np import numpy as np
from src.mediautils.audio import extract_audio_from_video from src.mediautils.audio import extract_audio_from_video
from src.mediautils.video import render_moments
from src.editors.amplitude.editor import AmplitudeEditor from src.editors.amplitude.editor import AmplitudeEditor
from src.editors.sentiment.editor import SentimentEditor from src.editors.sentiment.editor import SentimentEditor
from src.math.cost import quadratic_loss
from src.math.distribution import create_distribution from src.math.distribution import create_distribution
log = structlog.get_logger() log = structlog.get_logger()
@ -22,6 +25,10 @@ EDITORS = {
'sentiment': SentimentEditor 'sentiment': SentimentEditor
} }
ERROR_FUNCS = {
'quadratic': quadratic_loss
}
def main(args): def main(args):
# Check video existance # Check video existance
input_file = args.file input_file = args.file
@ -56,6 +63,8 @@ def main(args):
log.info("initializing editor", editor=args.editor) log.info("initializing editor", editor=args.editor)
editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args)) editor = EDITORS[args.editor](str(in_vid_path.resolve()), audio_path, vars(args))
log.info("initialized editor", editor=args.editor) log.info("initialized editor", editor=args.editor)
costfunc = ERROR_FUNCS[args.cost]
desired = args.duration
# Generate center of large window and small window size # Generate center of large window and small window size
large_window_center = random.uniform(30, 50) large_window_center = random.uniform(30, 50)
@ -67,14 +76,27 @@ def main(args):
# The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient # The decay rate, or how quickly our spread multiplier decreases as we approach the center of the gradient
spread_decay = random.uniform(0.0001, 0.001) spread_decay = random.uniform(0.0001, 0.001)
log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay)
# Create distribution of large and small
parallelism = args.parallelism parallelism = args.parallelism
# The main loop of the program starts here
# we first create distributions
# use workers to simultanously create many possible edits
# find the best edit of the lot -> this is determined by lowest "cost"
# if the best fits within our desitred time range, output, otherwise
# reset the distributions using the best as the new center, then repeat
# Create distribution of large and small
complete = False
iterations = 0
while not complete:
log.info("creating distributions", large_start=large_window_center, small_start=small_window_center, spread=spread_multiplier, decay=spread_decay)
large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism) large_distribution = create_distribution(large_window_center, spread_multiplier, parallelism)
np.random.shuffle(large_distribution) np.random.shuffle(large_distribution)
small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism) small_distribution = create_distribution(small_window_center, spread_multiplier, parallelism)
np.random.shuffle(small_distribution) np.random.shuffle(small_distribution)
# Fire off workers to generate edits
moment_results = []
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [] futures = []
pairs = list(zip(large_distribution, small_distribution)) pairs = list(zip(large_distribution, small_distribution))
@ -88,11 +110,49 @@ def main(args):
) )
) )
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
value = future.result() try:
log.info("got val", val=value) moment_results.append(future.result())
except Exception:
log.exception("error during editing")
sys.exit(-2)
moment_results
costs = []
durations = []
for result in moment_results:
total_duration = 0
for moment in result[0]:
total_duration = total_duration + moment.get_duration()
costs.append(costfunc(desired, total_duration))
durations.append(total_duration)
index_min = min(range(len(costs)), key=costs.__getitem__)
large_window_center = moment_results[index_min][1]
small_window_center = moment_results[index_min][2]
log.info("batch complete", best_large=large_window_center, best_small=small_window_center, duration=durations[index_min])
if durations[index_min] > desired * 0.95 and desired * 1.05 > durations[index_min]:
log.info("found edit within target duration", target=desired, duration=durations[index_min])
out_path = Path(args.destination)
log.info("rendering...")
start = time.time()
render_moments(moment_results[index_min][0], str(in_vid_path.resolve()), str(out_path.resolve()))
log.info("render complete", duration=time.time() - start, output=str(out_path.resolve()))
sys.exit(0)
iterations = iterations + parallelism
if iterations > 50000:
log.error("could not find a viable edit in the target duration, try other params", target=desired)
sys.exit(-4)
spread_multiplier = spread_multiplier - spread_decay
if spread_multiplier < 0:
log.warn("spread reached 0, resetting")
large_window_center = random.uniform(30, 50)
small_window_center = random.uniform(5, 15)
spread_multiplier = random.uniform(0.15, 0.18)
spread_decay = random.uniform(0.0001, 0.001)
desired = args.duration
if __name__ == "__main__": if __name__ == "__main__":
@ -115,13 +175,12 @@ if __name__ == "__main__":
type=int, type=int,
) )
parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments.') parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.')
parser.add_argument("-d", "--dryrun", dest="drun", action="store_true")
parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.") parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")
parser.add_argument("--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default='quadratic')
parser.add_argument( parser.add_argument(
"-i",
"--minduration", "--minduration",
default=8, default=8,
help="Minimum clip duration", help="Minimum clip duration",
@ -129,7 +188,6 @@ if __name__ == "__main__":
type=int, type=int,
) )
parser.add_argument( parser.add_argument(
"-m",
"--maxduration", "--maxduration",
default=15, default=15,
help="Maximum clip duration", help="Maximum clip duration",

View File

@ -25,5 +25,5 @@ class AmplitudeEditor:
long_ma = np_moving_average(self.squared_subsample, large_window * window_factor) long_ma = np_moving_average(self.squared_subsample, large_window * window_factor)
short_ma = np_moving_average(self.squared_subsample, small_window * window_factor) short_ma = np_moving_average(self.squared_subsample, small_window * window_factor)
highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate) highlights = find_moving_average_highlights(short_ma, long_ma, self.factor / self.bitrate)
return highlights return highlights, large_window, small_window