add sentiment editor
This commit is contained in:
parent
8cef9fe666
commit
12bba40f0a
10
main.py
10
main.py
@ -167,7 +167,6 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.')
|
parser_audio_amp = subparsers.add_parser('amplitude', help='The amplitude editor uses audio amplitude moving averages to find swings from relatively quiet moments to loud moments. This is useful in videos where long moments of quiet are interspersed with loud action filled moments.')
|
||||||
parser_audio_amp.add_argument(
|
parser_audio_amp.add_argument(
|
||||||
"-f",
|
|
||||||
"--factor",
|
"--factor",
|
||||||
default=16000,
|
default=16000,
|
||||||
help="Subsampling factor",
|
help="Subsampling factor",
|
||||||
@ -175,7 +174,14 @@ if __name__ == "__main__":
|
|||||||
type=int,
|
type=int,
|
||||||
)
|
)
|
||||||
|
|
||||||
parser_audio_amp = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.')
|
parser_sentiment = subparsers.add_parser('sentiment', help='The sentiment editor transcribes the speech in a video and runs sentiment analysis on the resulting text. Using moving averages, large swings in sentiment can be correlated to controversial or exciting moments. A GPU with CUDA is recommended for fast results.')
|
||||||
|
parser_sentiment.add_argument(
|
||||||
|
"--model",
|
||||||
|
default="base",
|
||||||
|
help="The size of the sentiment analysis model being used. Larger models increase computation time.",
|
||||||
|
dest="model_size",
|
||||||
|
choices=["base", "tiny", "small", "medium", "large"],
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")
|
parser.add_argument("-p", "--parallelism", dest="parallelism", type=int, default=multiprocessing.cpu_count() - 2, help="The number of cores to use, defaults to N - 2 cores.")
|
||||||
parser.add_argument("--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default='quadratic')
|
parser.add_argument("--cost-function", dest="cost", choices=ERROR_FUNCS.keys(), default='quadratic')
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from ...mediautils.audio import process_audio, resample
|
from ...mediautils.audio import process_audio, resample
|
||||||
from ...math.average import np_moving_average
|
from ...math.average import np_moving_average
|
||||||
from ...models.moment import Moment
|
|
||||||
from ..common import find_moving_average_highlights
|
from ..common import find_moving_average_highlights
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import structlog
|
import structlog
|
||||||
|
@ -1,3 +1,48 @@
|
|||||||
|
import whisper
|
||||||
|
import numpy as np
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from flair.models import TextClassifier
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from flair.data import Sentence
|
||||||
|
|
||||||
|
from ...math.average import np_moving_average
|
||||||
|
from ..common import find_moving_average_highlights
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TextGlob:
|
||||||
|
start:float
|
||||||
|
stop:float
|
||||||
|
text:str
|
||||||
|
sentiment:float
|
||||||
|
|
||||||
class SentimentEditor:
|
class SentimentEditor:
|
||||||
pass
|
def __init__(self, video_path, audio_path, params):
|
||||||
|
self.logger = structlog.get_logger("sentiment")
|
||||||
|
self.logger.info("loading whisper model", size=params["model_size"])
|
||||||
|
self.model = whisper.load_model(params["model_size"])
|
||||||
|
self.logger.info("transcribing audio", path=audio_path)
|
||||||
|
self.result = self.model.transcribe(audio_path)
|
||||||
|
self.segments = []
|
||||||
|
for segment in self.result['segments']:
|
||||||
|
self.segments.append(TextGlob(segment['start'], segment['end'], segment['text'], 0))
|
||||||
|
classifier = TextClassifier.load('en-sentiment')
|
||||||
|
self.sentiments = []
|
||||||
|
self.logger.info("calculating sentiment on segments", segments=len(self.segments))
|
||||||
|
for segment in self.segments:
|
||||||
|
sentence = Sentence(segment.text)
|
||||||
|
classifier.predict(sentence)
|
||||||
|
sentsum = sentence.labels[0].score
|
||||||
|
if sentence.labels[0].value == "NEGATIVE":
|
||||||
|
sentsum = sentsum * -1
|
||||||
|
segment.sentiment = sentsum
|
||||||
|
self.sentiments.append(sentsum)
|
||||||
|
self.sentiments = np.array(self.sentiments)
|
||||||
|
|
||||||
|
def edit(self, large_window, small_window, params):
|
||||||
|
end_time = self.segments[-1].stop
|
||||||
|
window_factor = len(self.sentiments) / end_time
|
||||||
|
long_ma = np_moving_average(self.squared_subsample, large_window * window_factor)
|
||||||
|
short_ma = np_moving_average(self.squared_subsample, small_window * window_factor)
|
||||||
|
highlights = find_moving_average_highlights(short_ma, long_ma, 1.0 / window_factor)
|
||||||
|
return highlights, large_window, small_window
|
||||||
|
Loading…
Reference in New Issue
Block a user