Initial commit
This commit is contained in:
commit
6665dec410
162
.gitignore
vendored
Normal file
162
.gitignore
vendored
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||||
|
.pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
60
main.py
Normal file
60
main.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import whisper
|
||||||
|
import ffmpeg
|
||||||
|
import json
|
||||||
|
from pyannote.audio import Pipeline
|
||||||
|
from pyannote.core import Segment
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_video_to_json(video_path, diarization_pipeline):
|
||||||
|
# Load Whisper model
|
||||||
|
model = whisper.load_model("base")
|
||||||
|
|
||||||
|
# Extract audio from video
|
||||||
|
audio_path = "audio.wav"
|
||||||
|
ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True)
|
||||||
|
|
||||||
|
# Perform speaker diarization
|
||||||
|
diarization = diarization_pipeline(audio_path)
|
||||||
|
|
||||||
|
# Transcribe the video
|
||||||
|
print("transcribing")
|
||||||
|
result = model.transcribe(video_path, language="en", verbose=True)
|
||||||
|
print("transcribing done")
|
||||||
|
|
||||||
|
# Prepare the JSON data
|
||||||
|
json_data = []
|
||||||
|
for segment in result['segments']:
|
||||||
|
# Find the speaker for the current segment
|
||||||
|
start_time = segment['start']
|
||||||
|
end_time = segment['end']
|
||||||
|
current_segment = Segment(start_time, end_time)
|
||||||
|
|
||||||
|
# Get the speaker for this time interval
|
||||||
|
speaker = None
|
||||||
|
for turn, _, speaker_label in diarization.itertracks(yield_label=True):
|
||||||
|
if turn.intersects(current_segment):
|
||||||
|
speaker = speaker_label
|
||||||
|
break
|
||||||
|
|
||||||
|
if speaker is None:
|
||||||
|
speaker = "unknown" # Handle cases where no speaker is found
|
||||||
|
|
||||||
|
json_data.append({
|
||||||
|
"speaker": speaker,
|
||||||
|
"start_time": start_time,
|
||||||
|
"end_time": end_time,
|
||||||
|
"text": segment['text']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Save the transcription to a JSON file
|
||||||
|
with open('transcription.json', 'w') as json_file:
|
||||||
|
json.dump(json_data, json_file, indent=4)
|
||||||
|
|
||||||
|
print("Transcription saved to transcription.json")
|
||||||
|
|
||||||
|
|
||||||
|
# Load the diarization pipeline
|
||||||
|
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="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")
|
||||||
|
|
||||||
|
# Run the transcription with diarization
|
||||||
|
transcribe_video_to_json('input.mp4', diarization_pipeline)
|
5
pyvenv.cfg
Normal file
5
pyvenv.cfg
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
home = /opt/homebrew/opt/python@3.12/bin
|
||||||
|
include-system-site-packages = false
|
||||||
|
version = 3.12.3
|
||||||
|
executable = /opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/bin/python3.12
|
||||||
|
command = /opt/homebrew/opt/python@3.12/bin/python3.12 -m venv /Users/tanishqdubey/projects/diarizejson
|
108
requirements.txt
Normal file
108
requirements.txt
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
aiohappyeyeballs==2.3.5
|
||||||
|
aiohttp==3.10.3
|
||||||
|
aiosignal==1.3.1
|
||||||
|
alembic==1.13.2
|
||||||
|
antlr4-python3-runtime==4.9.3
|
||||||
|
asteroid-filterbanks==0.4.0
|
||||||
|
attrs==24.2.0
|
||||||
|
audioread==3.0.1
|
||||||
|
certifi==2024.7.4
|
||||||
|
cffi==1.17.0
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
click==8.1.7
|
||||||
|
colorlog==6.8.2
|
||||||
|
contourpy==1.2.1
|
||||||
|
cycler==0.12.1
|
||||||
|
decorator==4.4.2
|
||||||
|
docopt==0.6.2
|
||||||
|
einops==0.8.0
|
||||||
|
ffmpeg-python==0.2.0
|
||||||
|
filelock==3.15.4
|
||||||
|
fonttools==4.53.1
|
||||||
|
frozenlist==1.4.1
|
||||||
|
fsspec==2024.6.1
|
||||||
|
future==1.0.0
|
||||||
|
huggingface-hub==0.24.5
|
||||||
|
HyperPyYAML==1.2.2
|
||||||
|
idna==3.7
|
||||||
|
imageio==2.35.0
|
||||||
|
imageio-ffmpeg==0.5.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
joblib==1.4.2
|
||||||
|
julius==0.2.7
|
||||||
|
kiwisolver==1.4.5
|
||||||
|
lazy_loader==0.4
|
||||||
|
librosa==0.10.2.post1
|
||||||
|
lightning==2.4.0
|
||||||
|
lightning-utilities==0.11.6
|
||||||
|
llvmlite==0.43.0
|
||||||
|
Mako==1.3.5
|
||||||
|
markdown-it-py==3.0.0
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
matplotlib==3.9.2
|
||||||
|
mdurl==0.1.2
|
||||||
|
more-itertools==10.4.0
|
||||||
|
moviepy==1.0.3
|
||||||
|
mpmath==1.3.0
|
||||||
|
msgpack==1.0.8
|
||||||
|
multidict==6.0.5
|
||||||
|
networkx==3.3
|
||||||
|
numba==0.60.0
|
||||||
|
numpy==1.26.4
|
||||||
|
omegaconf==2.3.0
|
||||||
|
openai-whisper==20231117
|
||||||
|
optuna==3.6.1
|
||||||
|
packaging==24.1
|
||||||
|
pandas==2.2.2
|
||||||
|
pillow==10.4.0
|
||||||
|
platformdirs==4.2.2
|
||||||
|
pooch==1.8.2
|
||||||
|
primePy==1.3
|
||||||
|
proglog==0.1.10
|
||||||
|
protobuf==5.27.3
|
||||||
|
pyannote.audio==3.3.1
|
||||||
|
pyannote.core==5.0.0
|
||||||
|
pyannote.database==5.1.0
|
||||||
|
pyannote.metrics==3.2.1
|
||||||
|
pyannote.pipeline==3.0.1
|
||||||
|
pycparser==2.22
|
||||||
|
Pygments==2.18.0
|
||||||
|
pyparsing==3.1.2
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
pytorch-lightning==2.4.0
|
||||||
|
pytorch-metric-learning==2.6.0
|
||||||
|
pytz==2024.1
|
||||||
|
PyYAML==6.0.2
|
||||||
|
regex==2024.7.24
|
||||||
|
requests==2.32.3
|
||||||
|
rich==13.7.1
|
||||||
|
ruamel.yaml==0.18.6
|
||||||
|
ruamel.yaml.clib==0.2.8
|
||||||
|
scikit-learn==1.5.1
|
||||||
|
scipy==1.14.0
|
||||||
|
semver==3.0.2
|
||||||
|
sentencepiece==0.2.0
|
||||||
|
setuptools==72.1.0
|
||||||
|
shellingham==1.5.4
|
||||||
|
six==1.16.0
|
||||||
|
sortedcontainers==2.4.0
|
||||||
|
soundfile==0.12.1
|
||||||
|
soxr==0.4.0
|
||||||
|
speechbrain==1.0.0
|
||||||
|
SQLAlchemy==2.0.32
|
||||||
|
sympy==1.13.2
|
||||||
|
tabulate==0.9.0
|
||||||
|
tensorboardX==2.6.2.2
|
||||||
|
threadpoolctl==3.5.0
|
||||||
|
tiktoken==0.7.0
|
||||||
|
torch==2.4.0
|
||||||
|
torch-audiomentations==0.11.1
|
||||||
|
torch-pitch-shift==1.2.4
|
||||||
|
torchaudio==2.4.0
|
||||||
|
torchmetrics==1.4.1
|
||||||
|
tqdm==4.66.5
|
||||||
|
typer==0.12.3
|
||||||
|
typing_extensions==4.12.2
|
||||||
|
tzdata==2024.1
|
||||||
|
urllib3==2.2.2
|
||||||
|
yarl==1.9.4
|
56
subtitle.py
Normal file
56
subtitle.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import json
|
||||||
|
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
|
||||||
|
from moviepy.video.tools.subtitles import SubtitlesClip
|
||||||
|
from PIL import ImageFont
|
||||||
|
|
||||||
|
# Function to assign colors to speakers
|
||||||
|
def get_speaker_color(speaker, color_map):
|
||||||
|
if speaker not in color_map:
|
||||||
|
color_map[speaker] = f"hsl({len(color_map) * 60 % 360}, 100%, 50%)"
|
||||||
|
return color_map[speaker]
|
||||||
|
|
||||||
|
# Function to parse the JSON and create subtitles
|
||||||
|
def parse_subtitles(json_file):
|
||||||
|
with open(json_file, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
subtitles = []
|
||||||
|
color_map = {}
|
||||||
|
|
||||||
|
for entry in data:
|
||||||
|
start_time = entry["start_time"]
|
||||||
|
end_time = entry["end_time"]
|
||||||
|
speaker = entry["speaker"]
|
||||||
|
text = entry["text"]
|
||||||
|
|
||||||
|
color = get_speaker_color(speaker, color_map)
|
||||||
|
subtitles.append(((start_time, end_time), f"{speaker}: {text}", color))
|
||||||
|
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
# Function to generate text clips
|
||||||
|
def subtitle_generator(txt, color):
|
||||||
|
return TextClip(txt, fontsize=24, font='Arial', color=color, bg_color='black')
|
||||||
|
|
||||||
|
# Main function to burn subtitles into the video
|
||||||
|
def burn_subtitles(input_video, subtitle_json, output_video):
|
||||||
|
video = VideoFileClip(input_video)
|
||||||
|
|
||||||
|
subtitles_data = parse_subtitles(subtitle_json)
|
||||||
|
|
||||||
|
# Create subtitle clips
|
||||||
|
subtitle_clips = []
|
||||||
|
for ((start, end), txt, color) in subtitles_data:
|
||||||
|
subtitle_clip = (TextClip(txt, fontsize=24, color=color, font='Arial', bg_color='black')
|
||||||
|
.set_position(('center', 'bottom'))
|
||||||
|
.set_start(start)
|
||||||
|
.set_end(end))
|
||||||
|
subtitle_clips.append(subtitle_clip)
|
||||||
|
|
||||||
|
# Overlay subtitles on the video
|
||||||
|
final = CompositeVideoClip([video] + subtitle_clips)
|
||||||
|
final.write_videofile(output_video, codec="libx264")
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
burn_subtitles('input.mp4', 'subtitles.json', 'output.mp4')
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user