2022-03-08 00:25:54 -05:00
|
|
|
import moviepy.editor as mp
|
|
|
|
import numpy as np
|
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
import pandas as pd
|
|
|
|
import scipy.io.wavfile as wav
|
|
|
|
from typing import Tuple
|
|
|
|
from itertools import zip_longest
|
|
|
|
import argparse
|
|
|
|
import os
|
|
|
|
import tempfile
|
|
|
|
|
2022-03-08 00:28:28 -05:00
|
|
|
|
2022-03-08 00:25:54 -05:00
|
|
|
class Moment:
|
|
|
|
def __init__(self, start, stop):
|
|
|
|
self.start = start
|
|
|
|
self.stop = stop
|
|
|
|
self.duration = 0
|
|
|
|
if stop is not None:
|
|
|
|
self.duration = stop - start
|
2022-03-08 00:28:28 -05:00
|
|
|
|
2022-03-08 00:25:54 -05:00
|
|
|
def __str__(self):
|
|
|
|
return f"Start {self.start} \t\t Stop {self.stop} \t\t Duration {self.duration}"
|
2022-03-08 00:28:28 -05:00
|
|
|
|
2022-03-08 00:25:54 -05:00
|
|
|
def __repr__(self):
|
|
|
|
return f"Start {self.start} \t\t Stop {self.stop} \t\t Duration {self.duration}"
|
|
|
|
|
|
|
|
|
|
|
|
def process_audio(source_audio_path: str) -> Tuple[np.ndarray, int, int]:
|
|
|
|
rate, data_raw = wav.read(source_audio_path)
|
|
|
|
data_raw = data_raw.astype(np.int32)
|
2022-03-08 00:28:28 -05:00
|
|
|
mono = (data_raw[:, 0] + data_raw[:, 1]) / 2
|
2022-03-08 00:25:54 -05:00
|
|
|
duration = len(mono) / rate
|
|
|
|
return mono, duration, rate
|
|
|
|
|
|
|
|
|
2022-03-08 00:28:28 -05:00
|
|
|
def convert_video_to_audio(
|
|
|
|
source_video_path: str, destination_audio_location=None
|
|
|
|
) -> str:
|
2022-03-08 00:25:54 -05:00
|
|
|
tdir = tempfile.gettempdir()
|
|
|
|
dest_location = f"{tdir}/{source_video_path}.wav"
|
|
|
|
print(f"checking to see if {dest_location} exists")
|
|
|
|
if destination_audio_location is not None:
|
|
|
|
dest_location = destination_audio_location
|
|
|
|
if os.path.isfile(dest_location):
|
|
|
|
print(f"{dest_location} exists, using cached")
|
|
|
|
return dest_location
|
|
|
|
vid = mp.VideoFileClip(source_video_path)
|
|
|
|
vid.audio.write_audiofile(dest_location)
|
|
|
|
vid.close()
|
|
|
|
return dest_location
|
|
|
|
|
|
|
|
|
|
|
|
def get_subclips(source_video_path: str, moments):
|
|
|
|
vid = mp.VideoFileClip(source_video_path)
|
|
|
|
clips = []
|
|
|
|
|
|
|
|
for m in moments:
|
|
|
|
if m.duration > 30:
|
|
|
|
clips.append(vid.subclip(m.start, m.stop))
|
|
|
|
return clips
|
|
|
|
|
|
|
|
|
|
|
|
def sub_resample(data: np.ndarray, factor: int):
|
|
|
|
return data[::factor].copy()
|
|
|
|
|
|
|
|
|
|
|
|
def moving_average(x, w):
|
2022-03-08 00:28:28 -05:00
|
|
|
return np.convolve(x, np.ones(w), "valid") / w
|
2022-03-08 00:25:54 -05:00
|
|
|
|
|
|
|
|
|
|
|
def find_highlights(data, threshold, rate, factor):
|
|
|
|
chunks = []
|
|
|
|
for i in range(len(data) - 1):
|
|
|
|
if data[i] < threshold < data[i + 1]:
|
|
|
|
chunks.append(i * factor / rate)
|
|
|
|
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
def find_moving_average_highlights(short_ma, long_ma, bitrate, resample_factor):
|
|
|
|
in_a_clip = False
|
|
|
|
timestamps = []
|
|
|
|
for t in range(1, len(long_ma)):
|
2022-03-08 00:28:28 -05:00
|
|
|
if (
|
|
|
|
not in_a_clip
|
|
|
|
and (short_ma[t - 1] < long_ma[t - 1])
|
|
|
|
and (short_ma[t] > long_ma[t])
|
|
|
|
):
|
2022-03-08 00:25:54 -05:00
|
|
|
in_a_clip = True
|
|
|
|
timestamps.append(t * resample_factor / bitrate)
|
2022-03-08 00:28:28 -05:00
|
|
|
elif (
|
|
|
|
in_a_clip
|
|
|
|
and (short_ma[t - 1] > long_ma[t - 1])
|
|
|
|
and (short_ma[t] < long_ma[t])
|
|
|
|
):
|
2022-03-08 00:25:54 -05:00
|
|
|
in_a_clip = False
|
|
|
|
timestamps.append(t * resample_factor / bitrate)
|
|
|
|
|
|
|
|
ret_list = []
|
|
|
|
raw_moments = list(blockwise(timestamps))
|
|
|
|
for rm in raw_moments:
|
|
|
|
ret_list.append(Moment(rm[0], rm[1]))
|
|
|
|
|
|
|
|
return ret_list
|
|
|
|
|
|
|
|
|
|
|
|
def blockwise(t, size=2, fillvalue=None):
|
|
|
|
it = iter(t)
|
2022-03-08 00:28:28 -05:00
|
|
|
return zip_longest(*[it] * size, fillvalue=fillvalue)
|
2022-03-08 00:25:54 -05:00
|
|
|
|
|
|
|
|
|
|
|
def plot_audio(data):
|
|
|
|
plt.plot(list(range(len(data))), data)
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
def main(vidfilepath, outfile, res_factor, lw, sw, dry_run, minduration, maxduration):
|
|
|
|
try:
|
|
|
|
videofile = vidfilepath
|
|
|
|
|
|
|
|
audiofile = convert_video_to_audio(videofile)
|
|
|
|
data, duration, bitrate = process_audio(audiofile)
|
|
|
|
RESAMPLE_FACTOR = res_factor
|
|
|
|
subsampled_data = sub_resample(data, RESAMPLE_FACTOR)
|
|
|
|
squared_subsample = np.square(subsampled_data)
|
|
|
|
LONG_WINDOW = lw
|
|
|
|
SHORT_WINDOW = sw
|
|
|
|
|
2022-03-08 00:28:28 -05:00
|
|
|
assert LONG_WINDOW > SHORT_WINDOW
|
2022-03-08 00:25:54 -05:00
|
|
|
|
2022-03-08 00:28:28 -05:00
|
|
|
long_ma = moving_average(squared_subsample, LONG_WINDOW)
|
2022-03-08 00:25:54 -05:00
|
|
|
short_ma = moving_average(squared_subsample, SHORT_WINDOW)
|
2022-03-08 00:28:28 -05:00
|
|
|
moments = find_moving_average_highlights(
|
|
|
|
short_ma, long_ma, bitrate, RESAMPLE_FACTOR
|
|
|
|
)
|
2022-03-08 00:25:54 -05:00
|
|
|
total_time = 0
|
|
|
|
for m in moments:
|
|
|
|
if m.duration > minduration and m.duration < maxduration:
|
2022-03-08 00:28:28 -05:00
|
|
|
print(
|
|
|
|
f"Start {round(m.start/60, 2)} \t\t Stop {round(m.stop/60, 2)} \t\t Duration {round(m.duration, 2)}"
|
|
|
|
)
|
2022-03-08 00:25:54 -05:00
|
|
|
total_time = total_time + m.duration
|
|
|
|
|
2022-03-08 00:28:28 -05:00
|
|
|
print(total_time / 60)
|
2022-03-08 00:25:54 -05:00
|
|
|
|
|
|
|
if not dry_run:
|
|
|
|
clips = get_subclips(videofile, moments)
|
|
|
|
clips
|
|
|
|
to_render = mp.concatenate_videoclips(clips)
|
|
|
|
to_render.write_videofile(outfile)
|
2022-03-08 00:28:28 -05:00
|
|
|
os.remove(audiofile)
|
2022-03-08 00:25:54 -05:00
|
|
|
except:
|
2022-03-08 00:28:28 -05:00
|
|
|
os.remove(audiofile)
|
2022-03-08 00:25:54 -05:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(
|
2022-03-08 00:28:28 -05:00
|
|
|
prog="autoditor", description="autoditor is an automatic video editor."
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2022-03-08 00:28:28 -05:00
|
|
|
"-v", "--video", required=True, metavar="Video file path", dest="vpath"
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-f",
|
|
|
|
"--factor",
|
|
|
|
default=16000,
|
|
|
|
metavar="Subsampling factor",
|
|
|
|
dest="factor",
|
2022-03-08 00:28:28 -05:00
|
|
|
type=int,
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-l",
|
|
|
|
"--longwindow",
|
|
|
|
default=128,
|
|
|
|
metavar="Long moving average time",
|
|
|
|
dest="lwindow",
|
2022-03-08 00:28:28 -05:00
|
|
|
type=int,
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-s",
|
|
|
|
"--shortwindow",
|
|
|
|
default=64,
|
|
|
|
metavar="Short moving average time",
|
|
|
|
dest="swindow",
|
2022-03-08 00:28:28 -05:00
|
|
|
type=int,
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
2022-03-08 00:28:28 -05:00
|
|
|
parser.add_argument("-d", "--dryrun", dest="drun", action="store_true")
|
2022-03-08 00:25:54 -05:00
|
|
|
parser.add_argument(
|
2022-03-08 00:28:28 -05:00
|
|
|
"-o", "--output", required=True, metavar="Output file location", dest="opath"
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-i",
|
|
|
|
"--minduration",
|
|
|
|
default=30,
|
|
|
|
metavar="Minimum clip duration",
|
|
|
|
dest="mindur",
|
2022-03-08 00:28:28 -05:00
|
|
|
type=int,
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-m",
|
|
|
|
"--maxduration",
|
|
|
|
default=100,
|
|
|
|
metavar="Maximum clip duration",
|
|
|
|
dest="maxdur",
|
2022-03-08 00:28:28 -05:00
|
|
|
type=int,
|
2022-03-08 00:25:54 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
# def main(vidfilepath, outfile, res_factor, lw, sw, dry_run):
|
2022-03-08 00:28:28 -05:00
|
|
|
main(
|
|
|
|
args.vpath,
|
|
|
|
args.opath,
|
|
|
|
args.factor,
|
|
|
|
args.lwindow,
|
|
|
|
args.swindow,
|
|
|
|
args.drun,
|
|
|
|
args.mindur,
|
|
|
|
args.maxdur,
|
|
|
|
)
|