Initial commit

This commit is contained in:
Tanishq Dubey 2024-08-13 17:18:24 -04:00
commit 6665dec410
5 changed files with 391 additions and 0 deletions

162
.gitignore vendored Normal file
View File

@ -0,0 +1,162 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

60
main.py Normal file
View File

@ -0,0 +1,60 @@
import whisper
import ffmpeg
import json
from pyannote.audio import Pipeline
from pyannote.core import Segment
def transcribe_video_to_json(video_path, diarization_pipeline):
# Load Whisper model
model = whisper.load_model("base")
# Extract audio from video
audio_path = "audio.wav"
ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
# Perform speaker diarization
diarization = diarization_pipeline(audio_path)
# Transcribe the video
print("transcribing")
result = model.transcribe(video_path, language="en", verbose=True)
print("transcribing done")
# Prepare the JSON data
json_data = []
for segment in result['segments']:
# Find the speaker for the current segment
start_time = segment['start']
end_time = segment['end']
current_segment = Segment(start_time, end_time)
# Get the speaker for this time interval
speaker = None
for turn, _, speaker_label in diarization.itertracks(yield_label=True):
if turn.intersects(current_segment):
speaker = speaker_label
break
if speaker is None:
speaker = "unknown" # Handle cases where no speaker is found
json_data.append({
"speaker": speaker,
"start_time": start_time,
"end_time": end_time,
"text": segment['text']
})
# Save the transcription to a JSON file
with open('transcription.json', 'w') as json_file:
json.dump(json_data, json_file, indent=4)
print("Transcription saved to transcription.json")
# Load the diarization pipeline
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="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")
# Run the transcription with diarization
transcribe_video_to_json('input.mp4', diarization_pipeline)

5
pyvenv.cfg Normal file
View File

@ -0,0 +1,5 @@
home = /opt/homebrew/opt/python@3.12/bin
include-system-site-packages = false
version = 3.12.3
executable = /opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/bin/python3.12
command = /opt/homebrew/opt/python@3.12/bin/python3.12 -m venv /Users/tanishqdubey/projects/diarizejson

108
requirements.txt Normal file
View File

@ -0,0 +1,108 @@
aiohappyeyeballs==2.3.5
aiohttp==3.10.3
aiosignal==1.3.1
alembic==1.13.2
antlr4-python3-runtime==4.9.3
asteroid-filterbanks==0.4.0
attrs==24.2.0
audioread==3.0.1
certifi==2024.7.4
cffi==1.17.0
charset-normalizer==3.3.2
click==8.1.7
colorlog==6.8.2
contourpy==1.2.1
cycler==0.12.1
decorator==4.4.2
docopt==0.6.2
einops==0.8.0
ffmpeg-python==0.2.0
filelock==3.15.4
fonttools==4.53.1
frozenlist==1.4.1
fsspec==2024.6.1
future==1.0.0
huggingface-hub==0.24.5
HyperPyYAML==1.2.2
idna==3.7
imageio==2.35.0
imageio-ffmpeg==0.5.1
Jinja2==3.1.4
joblib==1.4.2
julius==0.2.7
kiwisolver==1.4.5
lazy_loader==0.4
librosa==0.10.2.post1
lightning==2.4.0
lightning-utilities==0.11.6
llvmlite==0.43.0
Mako==1.3.5
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.9.2
mdurl==0.1.2
more-itertools==10.4.0
moviepy==1.0.3
mpmath==1.3.0
msgpack==1.0.8
multidict==6.0.5
networkx==3.3
numba==0.60.0
numpy==1.26.4
omegaconf==2.3.0
openai-whisper==20231117
optuna==3.6.1
packaging==24.1
pandas==2.2.2
pillow==10.4.0
platformdirs==4.2.2
pooch==1.8.2
primePy==1.3
proglog==0.1.10
protobuf==5.27.3
pyannote.audio==3.3.1
pyannote.core==5.0.0
pyannote.database==5.1.0
pyannote.metrics==3.2.1
pyannote.pipeline==3.0.1
pycparser==2.22
Pygments==2.18.0
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytorch-lightning==2.4.0
pytorch-metric-learning==2.6.0
pytz==2024.1
PyYAML==6.0.2
regex==2024.7.24
requests==2.32.3
rich==13.7.1
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.8
scikit-learn==1.5.1
scipy==1.14.0
semver==3.0.2
sentencepiece==0.2.0
setuptools==72.1.0
shellingham==1.5.4
six==1.16.0
sortedcontainers==2.4.0
soundfile==0.12.1
soxr==0.4.0
speechbrain==1.0.0
SQLAlchemy==2.0.32
sympy==1.13.2
tabulate==0.9.0
tensorboardX==2.6.2.2
threadpoolctl==3.5.0
tiktoken==0.7.0
torch==2.4.0
torch-audiomentations==0.11.1
torch-pitch-shift==1.2.4
torchaudio==2.4.0
torchmetrics==1.4.1
tqdm==4.66.5
typer==0.12.3
typing_extensions==4.12.2
tzdata==2024.1
urllib3==2.2.2
yarl==1.9.4

56
subtitle.py Normal file
View File

@ -0,0 +1,56 @@
import json
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
# Function to assign colors to speakers
def get_speaker_color(speaker, color_map):
if speaker not in color_map:
color_map[speaker] = f"hsl({len(color_map) * 60 % 360}, 100%, 50%)"
return color_map[speaker]
# Function to parse the JSON and create subtitles
def parse_subtitles(json_file):
with open(json_file, 'r') as f:
data = json.load(f)
subtitles = []
color_map = {}
for entry in data:
start_time = entry["start_time"]
end_time = entry["end_time"]
speaker = entry["speaker"]
text = entry["text"]
color = get_speaker_color(speaker, color_map)
subtitles.append(((start_time, end_time), f"{speaker}: {text}", color))
return subtitles
# Function to generate text clips
def subtitle_generator(txt, color):
return TextClip(txt, fontsize=24, font='Arial', color=color, bg_color='black')
# Main function to burn subtitles into the video
def burn_subtitles(input_video, subtitle_json, output_video):
video = VideoFileClip(input_video)
subtitles_data = parse_subtitles(subtitle_json)
# Create subtitle clips
subtitle_clips = []
for ((start, end), txt, color) in subtitles_data:
subtitle_clip = (TextClip(txt, fontsize=24, color=color, font='Arial', bg_color='black')
.set_position(('center', 'bottom'))
.set_start(start)
.set_end(end))
subtitle_clips.append(subtitle_clip)
# Overlay subtitles on the video
final = CompositeVideoClip([video] + subtitle_clips)
final.write_videofile(output_video, codec="libx264")
# Example usage
burn_subtitles('input.mp4', 'subtitles.json', 'output.mp4')