Initial commit

2024-08-13 17:18:24 -04:00
commit 6665dec410
5 changed files with 391 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,162 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/main.py
+++ b/main.py
@@ -0,0 +1,60 @@
+import whisper
+import ffmpeg
+import json
+from pyannote.audio import Pipeline
+from pyannote.core import Segment
+
+
+def transcribe_video_to_json(video_path, diarization_pipeline):
+    # Load Whisper model
+    model = whisper.load_model("base")
+
+    # Extract audio from video
+    audio_path = "audio.wav"
+    ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
+
+    # Perform speaker diarization
+    diarization = diarization_pipeline(audio_path)
+
+    # Transcribe the video
+    print("transcribing")
+    result = model.transcribe(video_path, language="en", verbose=True)
+    print("transcribing done")
+
+    # Prepare the JSON data
+    json_data = []
+    for segment in result['segments']:
+        # Find the speaker for the current segment
+        start_time = segment['start']
+        end_time = segment['end']
+        current_segment = Segment(start_time, end_time)
+
+        # Get the speaker for this time interval
+        speaker = None
+        for turn, _, speaker_label in diarization.itertracks(yield_label=True):
+            if turn.intersects(current_segment):
+                speaker = speaker_label
+                break
+
+        if speaker is None:
+            speaker = "unknown"  # Handle cases where no speaker is found
+
+        json_data.append({
+            "speaker": speaker,
+            "start_time": start_time,
+            "end_time": end_time,
+            "text": segment['text']
+        })
+
+    # Save the transcription to a JSON file
+    with open('transcription.json', 'w') as json_file:
+        json.dump(json_data, json_file, indent=4)
+
+    print("Transcription saved to transcription.json")
+
+
+# Load the diarization pipeline
+diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="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")
+
+# Run the transcription with diarization
+transcribe_video_to_json('input.mp4', diarization_pipeline)
--- a/pyvenv.cfg
+++ b/pyvenv.cfg
@@ -0,0 +1,5 @@
+home = /opt/homebrew/opt/python@3.12/bin
+include-system-site-packages = false
+version = 3.12.3
+executable = /opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/bin/python3.12
+command = /opt/homebrew/opt/python@3.12/bin/python3.12 -m venv /Users/tanishqdubey/projects/diarizejson
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,108 @@
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
+aiosignal==1.3.1
+alembic==1.13.2
+antlr4-python3-runtime==4.9.3
+asteroid-filterbanks==0.4.0
+attrs==24.2.0
+audioread==3.0.1
+certifi==2024.7.4
+cffi==1.17.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorlog==6.8.2
+contourpy==1.2.1
+cycler==0.12.1
+decorator==4.4.2
+docopt==0.6.2
+einops==0.8.0
+ffmpeg-python==0.2.0
+filelock==3.15.4
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+future==1.0.0
+huggingface-hub==0.24.5
+HyperPyYAML==1.2.2
+idna==3.7
+imageio==2.35.0
+imageio-ffmpeg==0.5.1
+Jinja2==3.1.4
+joblib==1.4.2
+julius==0.2.7
+kiwisolver==1.4.5
+lazy_loader==0.4
+librosa==0.10.2.post1
+lightning==2.4.0
+lightning-utilities==0.11.6
+llvmlite==0.43.0
+Mako==1.3.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mdurl==0.1.2
+more-itertools==10.4.0
+moviepy==1.0.3
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+networkx==3.3
+numba==0.60.0
+numpy==1.26.4
+omegaconf==2.3.0
+openai-whisper==20231117
+optuna==3.6.1
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+platformdirs==4.2.2
+pooch==1.8.2
+primePy==1.3
+proglog==0.1.10
+protobuf==5.27.3
+pyannote.audio==3.3.1
+pyannote.core==5.0.0
+pyannote.database==5.1.0
+pyannote.metrics==3.2.1
+pyannote.pipeline==3.0.1
+pycparser==2.22
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+pytorch-lightning==2.4.0
+pytorch-metric-learning==2.6.0
+pytz==2024.1
+PyYAML==6.0.2
+regex==2024.7.24
+requests==2.32.3
+rich==13.7.1
+ruamel.yaml==0.18.6
+ruamel.yaml.clib==0.2.8
+scikit-learn==1.5.1
+scipy==1.14.0
+semver==3.0.2
+sentencepiece==0.2.0
+setuptools==72.1.0
+shellingham==1.5.4
+six==1.16.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soxr==0.4.0
+speechbrain==1.0.0
+SQLAlchemy==2.0.32
+sympy==1.13.2
+tabulate==0.9.0
+tensorboardX==2.6.2.2
+threadpoolctl==3.5.0
+tiktoken==0.7.0
+torch==2.4.0
+torch-audiomentations==0.11.1
+torch-pitch-shift==1.2.4
+torchaudio==2.4.0
+torchmetrics==1.4.1
+tqdm==4.66.5
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+yarl==1.9.4
--- a/subtitle.py
+++ b/subtitle.py
@@ -0,0 +1,56 @@
+import json
+from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
+from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont
+
+# Function to assign colors to speakers
+def get_speaker_color(speaker, color_map):
+    if speaker not in color_map:
+        color_map[speaker] = f"hsl({len(color_map) * 60 % 360}, 100%, 50%)"
+    return color_map[speaker]
+
+# Function to parse the JSON and create subtitles
+def parse_subtitles(json_file):
+    with open(json_file, 'r') as f:
+        data = json.load(f)
+
+    subtitles = []
+    color_map = {}
+
+    for entry in data:
+        start_time = entry["start_time"]
+        end_time = entry["end_time"]
+        speaker = entry["speaker"]
+        text = entry["text"]
+        
+        color = get_speaker_color(speaker, color_map)
+        subtitles.append(((start_time, end_time), f"{speaker}: {text}", color))
+    
+    return subtitles
+
+# Function to generate text clips
+def subtitle_generator(txt, color):
+    return TextClip(txt, fontsize=24, font='Arial', color=color, bg_color='black')
+
+# Main function to burn subtitles into the video
+def burn_subtitles(input_video, subtitle_json, output_video):
+    video = VideoFileClip(input_video)
+
+    subtitles_data = parse_subtitles(subtitle_json)
+
+    # Create subtitle clips
+    subtitle_clips = []
+    for ((start, end), txt, color) in subtitles_data:
+        subtitle_clip = (TextClip(txt, fontsize=24, color=color, font='Arial', bg_color='black')
+                         .set_position(('center', 'bottom'))
+                         .set_start(start)
+                         .set_end(end))
+        subtitle_clips.append(subtitle_clip)
+    
+    # Overlay subtitles on the video
+    final = CompositeVideoClip([video] + subtitle_clips)
+    final.write_videofile(output_video, codec="libx264")
+
+# Example usage
+burn_subtitles('input.mp4', 'subtitles.json', 'output.mp4')
+