From 6665dec410df70252d045a7bb9584d47de99cc66 Mon Sep 17 00:00:00 2001 From: Tanishq Dubey Date: Tue, 13 Aug 2024 17:18:24 -0400 Subject: [PATCH] Initial commit --- .gitignore | 162 +++++++++++++++++++++++++++++++++++++++++++++++ main.py | 60 ++++++++++++++++++ pyvenv.cfg | 5 ++ requirements.txt | 108 +++++++++++++++++++++++++++++++ subtitle.py | 56 ++++++++++++++++ 5 files changed, 391 insertions(+) create mode 100644 .gitignore create mode 100644 main.py create mode 100644 pyvenv.cfg create mode 100644 requirements.txt create mode 100644 subtitle.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82f9275 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/main.py b/main.py new file mode 100644 index 0000000..3cf24b1 --- /dev/null +++ b/main.py @@ -0,0 +1,60 @@ +import whisper +import ffmpeg +import json +from pyannote.audio import Pipeline +from pyannote.core import Segment + + +def transcribe_video_to_json(video_path, diarization_pipeline): + # Load Whisper model + model = whisper.load_model("base") + + # Extract audio from video + audio_path = "audio.wav" + ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True) + + # Perform speaker diarization + diarization = diarization_pipeline(audio_path) + + # Transcribe the video + print("transcribing") + result = model.transcribe(video_path, language="en", verbose=True) + print("transcribing done") + + # Prepare the JSON data + json_data = [] + for segment in result['segments']: + # Find the speaker for the current segment + start_time = segment['start'] + end_time = segment['end'] + current_segment = Segment(start_time, end_time) + + # Get the speaker for this time interval + speaker = None + for turn, _, speaker_label in diarization.itertracks(yield_label=True): + if turn.intersects(current_segment): + speaker = speaker_label + break + + if speaker is None: + speaker = "unknown" # Handle cases where no speaker is found + + json_data.append({ + "speaker": speaker, + "start_time": start_time, + "end_time": end_time, + "text": segment['text'] + }) + + # Save the transcription to a JSON file + with open('transcription.json', 'w') as json_file: + json.dump(json_data, json_file, indent=4) + + print("Transcription saved to transcription.json") + + +# Load the diarization pipeline +diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="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") + +# Run the transcription with diarization +transcribe_video_to_json('input.mp4', diarization_pipeline) diff --git a/pyvenv.cfg b/pyvenv.cfg new file mode 100644 index 0000000..27c7817 --- /dev/null +++ b/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /opt/homebrew/opt/python@3.12/bin +include-system-site-packages = false +version = 3.12.3 +executable = /opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/bin/python3.12 +command = /opt/homebrew/opt/python@3.12/bin/python3.12 -m venv /Users/tanishqdubey/projects/diarizejson diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fc5d0f0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,108 @@ +aiohappyeyeballs==2.3.5 +aiohttp==3.10.3 +aiosignal==1.3.1 +alembic==1.13.2 +antlr4-python3-runtime==4.9.3 +asteroid-filterbanks==0.4.0 +attrs==24.2.0 +audioread==3.0.1 +certifi==2024.7.4 +cffi==1.17.0 +charset-normalizer==3.3.2 +click==8.1.7 +colorlog==6.8.2 +contourpy==1.2.1 +cycler==0.12.1 +decorator==4.4.2 +docopt==0.6.2 +einops==0.8.0 +ffmpeg-python==0.2.0 +filelock==3.15.4 +fonttools==4.53.1 +frozenlist==1.4.1 +fsspec==2024.6.1 +future==1.0.0 +huggingface-hub==0.24.5 +HyperPyYAML==1.2.2 +idna==3.7 +imageio==2.35.0 +imageio-ffmpeg==0.5.1 +Jinja2==3.1.4 +joblib==1.4.2 +julius==0.2.7 +kiwisolver==1.4.5 +lazy_loader==0.4 +librosa==0.10.2.post1 +lightning==2.4.0 +lightning-utilities==0.11.6 +llvmlite==0.43.0 +Mako==1.3.5 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.9.2 +mdurl==0.1.2 +more-itertools==10.4.0 +moviepy==1.0.3 +mpmath==1.3.0 +msgpack==1.0.8 +multidict==6.0.5 +networkx==3.3 +numba==0.60.0 +numpy==1.26.4 +omegaconf==2.3.0 +openai-whisper==20231117 +optuna==3.6.1 +packaging==24.1 +pandas==2.2.2 +pillow==10.4.0 +platformdirs==4.2.2 +pooch==1.8.2 +primePy==1.3 +proglog==0.1.10 +protobuf==5.27.3 +pyannote.audio==3.3.1 +pyannote.core==5.0.0 +pyannote.database==5.1.0 +pyannote.metrics==3.2.1 +pyannote.pipeline==3.0.1 +pycparser==2.22 +Pygments==2.18.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +pytorch-lightning==2.4.0 +pytorch-metric-learning==2.6.0 +pytz==2024.1 +PyYAML==6.0.2 +regex==2024.7.24 +requests==2.32.3 +rich==13.7.1 +ruamel.yaml==0.18.6 +ruamel.yaml.clib==0.2.8 +scikit-learn==1.5.1 +scipy==1.14.0 +semver==3.0.2 +sentencepiece==0.2.0 +setuptools==72.1.0 +shellingham==1.5.4 +six==1.16.0 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soxr==0.4.0 +speechbrain==1.0.0 +SQLAlchemy==2.0.32 +sympy==1.13.2 +tabulate==0.9.0 +tensorboardX==2.6.2.2 +threadpoolctl==3.5.0 +tiktoken==0.7.0 +torch==2.4.0 +torch-audiomentations==0.11.1 +torch-pitch-shift==1.2.4 +torchaudio==2.4.0 +torchmetrics==1.4.1 +tqdm==4.66.5 +typer==0.12.3 +typing_extensions==4.12.2 +tzdata==2024.1 +urllib3==2.2.2 +yarl==1.9.4 diff --git a/subtitle.py b/subtitle.py new file mode 100644 index 0000000..de6058e --- /dev/null +++ b/subtitle.py @@ -0,0 +1,56 @@ +import json +from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip +from moviepy.video.tools.subtitles import SubtitlesClip +from PIL import ImageFont + +# Function to assign colors to speakers +def get_speaker_color(speaker, color_map): + if speaker not in color_map: + color_map[speaker] = f"hsl({len(color_map) * 60 % 360}, 100%, 50%)" + return color_map[speaker] + +# Function to parse the JSON and create subtitles +def parse_subtitles(json_file): + with open(json_file, 'r') as f: + data = json.load(f) + + subtitles = [] + color_map = {} + + for entry in data: + start_time = entry["start_time"] + end_time = entry["end_time"] + speaker = entry["speaker"] + text = entry["text"] + + color = get_speaker_color(speaker, color_map) + subtitles.append(((start_time, end_time), f"{speaker}: {text}", color)) + + return subtitles + +# Function to generate text clips +def subtitle_generator(txt, color): + return TextClip(txt, fontsize=24, font='Arial', color=color, bg_color='black') + +# Main function to burn subtitles into the video +def burn_subtitles(input_video, subtitle_json, output_video): + video = VideoFileClip(input_video) + + subtitles_data = parse_subtitles(subtitle_json) + + # Create subtitle clips + subtitle_clips = [] + for ((start, end), txt, color) in subtitles_data: + subtitle_clip = (TextClip(txt, fontsize=24, color=color, font='Arial', bg_color='black') + .set_position(('center', 'bottom')) + .set_start(start) + .set_end(end)) + subtitle_clips.append(subtitle_clip) + + # Overlay subtitles on the video + final = CompositeVideoClip([video] + subtitle_clips) + final.write_videofile(output_video, codec="libx264") + +# Example usage +burn_subtitles('input.mp4', 'subtitles.json', 'output.mp4') +