Initial commit
This commit is contained in:
commit
6665dec410
162
.gitignore
vendored
Normal file
162
.gitignore
vendored
Normal file
@ -0,0 +1,162 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
60
main.py
Normal file
60
main.py
Normal file
@ -0,0 +1,60 @@
|
||||
import whisper
|
||||
import ffmpeg
|
||||
import json
|
||||
from pyannote.audio import Pipeline
|
||||
from pyannote.core import Segment
|
||||
|
||||
|
||||
def transcribe_video_to_json(video_path, diarization_pipeline):
|
||||
# Load Whisper model
|
||||
model = whisper.load_model("base")
|
||||
|
||||
# Extract audio from video
|
||||
audio_path = "audio.wav"
|
||||
ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
|
||||
|
||||
# Perform speaker diarization
|
||||
diarization = diarization_pipeline(audio_path)
|
||||
|
||||
# Transcribe the video
|
||||
print("transcribing")
|
||||
result = model.transcribe(video_path, language="en", verbose=True)
|
||||
print("transcribing done")
|
||||
|
||||
# Prepare the JSON data
|
||||
json_data = []
|
||||
for segment in result['segments']:
|
||||
# Find the speaker for the current segment
|
||||
start_time = segment['start']
|
||||
end_time = segment['end']
|
||||
current_segment = Segment(start_time, end_time)
|
||||
|
||||
# Get the speaker for this time interval
|
||||
speaker = None
|
||||
for turn, _, speaker_label in diarization.itertracks(yield_label=True):
|
||||
if turn.intersects(current_segment):
|
||||
speaker = speaker_label
|
||||
break
|
||||
|
||||
if speaker is None:
|
||||
speaker = "unknown" # Handle cases where no speaker is found
|
||||
|
||||
json_data.append({
|
||||
"speaker": speaker,
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"text": segment['text']
|
||||
})
|
||||
|
||||
# Save the transcription to a JSON file
|
||||
with open('transcription.json', 'w') as json_file:
|
||||
json.dump(json_data, json_file, indent=4)
|
||||
|
||||
print("Transcription saved to transcription.json")
|
||||
|
||||
|
||||
# Load the diarization pipeline
|
||||
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="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")
|
||||
|
||||
# Run the transcription with diarization
|
||||
transcribe_video_to_json('input.mp4', diarization_pipeline)
|
5
pyvenv.cfg
Normal file
5
pyvenv.cfg
Normal file
@ -0,0 +1,5 @@
|
||||
home = /opt/homebrew/opt/python@3.12/bin
|
||||
include-system-site-packages = false
|
||||
version = 3.12.3
|
||||
executable = /opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/bin/python3.12
|
||||
command = /opt/homebrew/opt/python@3.12/bin/python3.12 -m venv /Users/tanishqdubey/projects/diarizejson
|
108
requirements.txt
Normal file
108
requirements.txt
Normal file
@ -0,0 +1,108 @@
|
||||
aiohappyeyeballs==2.3.5
|
||||
aiohttp==3.10.3
|
||||
aiosignal==1.3.1
|
||||
alembic==1.13.2
|
||||
antlr4-python3-runtime==4.9.3
|
||||
asteroid-filterbanks==0.4.0
|
||||
attrs==24.2.0
|
||||
audioread==3.0.1
|
||||
certifi==2024.7.4
|
||||
cffi==1.17.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
colorlog==6.8.2
|
||||
contourpy==1.2.1
|
||||
cycler==0.12.1
|
||||
decorator==4.4.2
|
||||
docopt==0.6.2
|
||||
einops==0.8.0
|
||||
ffmpeg-python==0.2.0
|
||||
filelock==3.15.4
|
||||
fonttools==4.53.1
|
||||
frozenlist==1.4.1
|
||||
fsspec==2024.6.1
|
||||
future==1.0.0
|
||||
huggingface-hub==0.24.5
|
||||
HyperPyYAML==1.2.2
|
||||
idna==3.7
|
||||
imageio==2.35.0
|
||||
imageio-ffmpeg==0.5.1
|
||||
Jinja2==3.1.4
|
||||
joblib==1.4.2
|
||||
julius==0.2.7
|
||||
kiwisolver==1.4.5
|
||||
lazy_loader==0.4
|
||||
librosa==0.10.2.post1
|
||||
lightning==2.4.0
|
||||
lightning-utilities==0.11.6
|
||||
llvmlite==0.43.0
|
||||
Mako==1.3.5
|
||||
markdown-it-py==3.0.0
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib==3.9.2
|
||||
mdurl==0.1.2
|
||||
more-itertools==10.4.0
|
||||
moviepy==1.0.3
|
||||
mpmath==1.3.0
|
||||
msgpack==1.0.8
|
||||
multidict==6.0.5
|
||||
networkx==3.3
|
||||
numba==0.60.0
|
||||
numpy==1.26.4
|
||||
omegaconf==2.3.0
|
||||
openai-whisper==20231117
|
||||
optuna==3.6.1
|
||||
packaging==24.1
|
||||
pandas==2.2.2
|
||||
pillow==10.4.0
|
||||
platformdirs==4.2.2
|
||||
pooch==1.8.2
|
||||
primePy==1.3
|
||||
proglog==0.1.10
|
||||
protobuf==5.27.3
|
||||
pyannote.audio==3.3.1
|
||||
pyannote.core==5.0.0
|
||||
pyannote.database==5.1.0
|
||||
pyannote.metrics==3.2.1
|
||||
pyannote.pipeline==3.0.1
|
||||
pycparser==2.22
|
||||
Pygments==2.18.0
|
||||
pyparsing==3.1.2
|
||||
python-dateutil==2.9.0.post0
|
||||
pytorch-lightning==2.4.0
|
||||
pytorch-metric-learning==2.6.0
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.2
|
||||
regex==2024.7.24
|
||||
requests==2.32.3
|
||||
rich==13.7.1
|
||||
ruamel.yaml==0.18.6
|
||||
ruamel.yaml.clib==0.2.8
|
||||
scikit-learn==1.5.1
|
||||
scipy==1.14.0
|
||||
semver==3.0.2
|
||||
sentencepiece==0.2.0
|
||||
setuptools==72.1.0
|
||||
shellingham==1.5.4
|
||||
six==1.16.0
|
||||
sortedcontainers==2.4.0
|
||||
soundfile==0.12.1
|
||||
soxr==0.4.0
|
||||
speechbrain==1.0.0
|
||||
SQLAlchemy==2.0.32
|
||||
sympy==1.13.2
|
||||
tabulate==0.9.0
|
||||
tensorboardX==2.6.2.2
|
||||
threadpoolctl==3.5.0
|
||||
tiktoken==0.7.0
|
||||
torch==2.4.0
|
||||
torch-audiomentations==0.11.1
|
||||
torch-pitch-shift==1.2.4
|
||||
torchaudio==2.4.0
|
||||
torchmetrics==1.4.1
|
||||
tqdm==4.66.5
|
||||
typer==0.12.3
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.1
|
||||
urllib3==2.2.2
|
||||
yarl==1.9.4
|
56
subtitle.py
Normal file
56
subtitle.py
Normal file
@ -0,0 +1,56 @@
|
||||
import json
|
||||
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
|
||||
from moviepy.video.tools.subtitles import SubtitlesClip
|
||||
from PIL import ImageFont
|
||||
|
||||
# Function to assign colors to speakers
|
||||
def get_speaker_color(speaker, color_map):
|
||||
if speaker not in color_map:
|
||||
color_map[speaker] = f"hsl({len(color_map) * 60 % 360}, 100%, 50%)"
|
||||
return color_map[speaker]
|
||||
|
||||
# Function to parse the JSON and create subtitles
|
||||
def parse_subtitles(json_file):
|
||||
with open(json_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
subtitles = []
|
||||
color_map = {}
|
||||
|
||||
for entry in data:
|
||||
start_time = entry["start_time"]
|
||||
end_time = entry["end_time"]
|
||||
speaker = entry["speaker"]
|
||||
text = entry["text"]
|
||||
|
||||
color = get_speaker_color(speaker, color_map)
|
||||
subtitles.append(((start_time, end_time), f"{speaker}: {text}", color))
|
||||
|
||||
return subtitles
|
||||
|
||||
# Function to generate text clips
|
||||
def subtitle_generator(txt, color):
|
||||
return TextClip(txt, fontsize=24, font='Arial', color=color, bg_color='black')
|
||||
|
||||
# Main function to burn subtitles into the video
|
||||
def burn_subtitles(input_video, subtitle_json, output_video):
|
||||
video = VideoFileClip(input_video)
|
||||
|
||||
subtitles_data = parse_subtitles(subtitle_json)
|
||||
|
||||
# Create subtitle clips
|
||||
subtitle_clips = []
|
||||
for ((start, end), txt, color) in subtitles_data:
|
||||
subtitle_clip = (TextClip(txt, fontsize=24, color=color, font='Arial', bg_color='black')
|
||||
.set_position(('center', 'bottom'))
|
||||
.set_start(start)
|
||||
.set_end(end))
|
||||
subtitle_clips.append(subtitle_clip)
|
||||
|
||||
# Overlay subtitles on the video
|
||||
final = CompositeVideoClip([video] + subtitle_clips)
|
||||
final.write_videofile(output_video, codec="libx264")
|
||||
|
||||
# Example usage
|
||||
burn_subtitles('input.mp4', 'subtitles.json', 'output.mp4')
|
||||
|
Loading…
x
Reference in New Issue
Block a user