Skip to content

Commit

Permalink
feat: Extract CC from youtube video (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab authored Feb 2, 2024
1 parent 935513b commit 37711f2
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 25 deletions.
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \
--lang en-IN
```

Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`,
and `--pitch`, for example:

```bash
$ echo "Do you want some coffee?" > /tmp/artbox/text.md
$ artbox voice text-to-speech \
--title artbox \
--text-path /tmp/artbox/text.md \
--output-path /tmp/artbox/voice.mp3 \
--engine edge-tts \
--lang en \
--rate +10% \
--volume -10% \
--pitch -5Hz
```

### Download a youtube video

If you want to download videos from the youtube, you can use the following
Expand Down Expand Up @@ -152,10 +168,3 @@ If you want to use Python to play your audio files, you can install `playsound`:
```bash
$ pip wheel --use-pep517 "playsound (==1.3.0)"
```

## Troubleshoot

After installing with `poetry install`:

- Patch `pytube` (ref: https://github.com/pytube/pytube/issues/1773):
`sed -i 's/(r"^$\\w+\\W")/(r"^\\w+\\W")/' $CONDA_PREFIX/lib/python3.*/site-packages/pytube/cipher.py`
16 changes: 16 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \
--lang en-IN
```

Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`,
and `--pitch`, for example:

```bash
$ echo "Do you want some coffee?" > /tmp/artbox/text.md
$ artbox voice text-to-speech \
--title artbox \
--text-path /tmp/artbox/text.md \
--output-path /tmp/artbox/voice.mp3 \
--engine edge-tts \
--lang en \
--rate +10% \
--volume -10% \
--pitch -5Hz
```

### Download a youtube video

If you want to download videos from the youtube, you can use the following
Expand Down
13 changes: 6 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ exclude = [

[tool.poetry.dependencies]
python = ">3.8.1,<3.12"
pytube = ">=15.0.0"
pycairo = ">=1.24.0"
pygobject = ">=3.44.1"
openai = ">=1"
Expand All @@ -32,6 +31,7 @@ gtts = ">=2.3.2"
edge-tts = ">=6.1.8"
numpy = ">=1.20"
typer = ">=0.9.0"
pytubefix = ">=1.13.3"

[tool.poetry.group.dev.dependencies]
pytest = ">=7.3.2"
Expand Down Expand Up @@ -113,6 +113,6 @@ module = [
"noisereduce",
"pydub",
"pydub.generators",
"pytube",
"pytubefix",
]
ignore_missing_imports = true
66 changes: 60 additions & 6 deletions src/artbox/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def main(


@app_voice.command("text-to-speech")
def text_to_speech(
def voice_text_to_speech(
title: Annotated[
str, typer.Option("--title", help="Specify the name of the audio file")
] = "artbox",
Expand All @@ -93,6 +93,18 @@ def text_to_speech(
"--lang", help="Choose the language for audio generation"
),
] = "en",
rate: Annotated[
str,
typer.Option("--rate", help="Decrease/Increase the rate level"),
] = "+0%",
volume: Annotated[
str,
typer.Option("--volume", help="Decrease/Increase the volume level"),
] = "+0%",
pitch: Annotated[
str,
typer.Option("--pitch", help="Decrease/Increase the pitch level"),
] = "+0Hz",
) -> None:
"""Convert text to speech."""
args_dict = {
Expand All @@ -101,14 +113,17 @@ def text_to_speech(
"output-path": output_path,
"engine": engine,
"lang": lang,
"rate": rate,
"volume": volume,
"pitch": pitch,
}

runner = Voice(args_dict)
runner.text_to_speech()


@app_sound.command("notes-to-audio")
def notes_to_audio(
def sound_notes_to_audio(
input_path: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -138,7 +153,7 @@ def notes_to_audio(


@app_video.command("remove-audio")
def remove_audio(
def video_remove_audio(
input_path: Annotated[
str,
typer.Option(
Expand All @@ -163,7 +178,7 @@ def remove_audio(


@app_video.command("extract-audio")
def extract_audio(
def video_extract_audio(
input_path: Annotated[
str,
typer.Option(
Expand All @@ -189,7 +204,7 @@ def extract_audio(


@app_video.command("combine-video-and-audio")
def combine_audio_and_video(
def video_combine_audio_and_video(
video_path: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -222,7 +237,7 @@ def combine_audio_and_video(


@app_youtube.command("download")
def download_youtube_video(
def youtube_download(
url: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -252,3 +267,42 @@ def download_youtube_video(

runner = Youtube(args_dict)
runner.download()


@app_youtube.command("cc")
def youtube_cc(
url: Annotated[
str,
typer.Option(
"--url", help="Specify the URL of the YouTube video to download"
),
] = "",
output_path: Annotated[
str,
typer.Option(
"--output-path",
help=(
"Specify the path to store the downloaded video file "
"(.srt, .txt)"
),
),
] = "/tmp/cc.txt",
lang: Annotated[
str,
typer.Option("--lang", help="Set the CC language to be downloaded"),
] = "en",
format: Annotated[
str,
typer.Option("--format", help="Set the CC format (srt, text)"),
] = "text",
) -> None:
"""Download youtube video CC."""
args_dict = {
"url": url,
"output-path": output_path,
"lang": lang,
"format": format,
}

runner = Youtube(args_dict)
runner.download_captions()
44 changes: 43 additions & 1 deletion src/artbox/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from abc import abstractmethod

from moviepy.editor import AudioFileClip, VideoFileClip
from pytube import YouTube as PyYouTube
from pytubefix import YouTube as PyYouTube

from artbox.base import ArtBox

Expand All @@ -20,6 +20,27 @@ def download(self):
...


def _convert_srt_to_plain_text(srt_text: str) -> str:
"""
Convert an SRT file to plain text by removing timestamps and formatting.
Parameters
----------
srt_file_path (str): Path to the SRT file.
Returns
-------
str: The extracted plain text from the SRT file.
"""
plain_text = []
# Skip lines that are part of SRT formatting (timestamps, etc.)
for line in srt_text.split("\n"):
if line.strip() and not line.strip().isdigit() and "-->" not in line:
plain_text.append(line.strip())

return "\n".join(plain_text)


class Youtube(DownloadBase):
"""Set of tools for handing videos."""

Expand Down Expand Up @@ -52,6 +73,27 @@ def download(self):
except Exception as e:
print(f"Failed to download video: {e}")

def download_captions(self):
"""Download the English closed captions of a YouTube video."""
video_url = self.args.get("url", "")
lang = self.args.get("lang", "en")
format = self.args.get("format", "text")

yt = PyYouTube(video_url)
caption = yt.captions.get_by_language_code(f"a.{lang}")

if not caption:
print(f"No captions found for language {lang}.")
return

# Save the captions to a file
cc = caption.generate_srt_captions()
with open(str(self.output_path), "w") as f:
if format == "text":
cc = _convert_srt_to_plain_text(cc)
f.write(cc)
print("Captions downloaded successfully.")


class Video(ArtBox):
"""Set of tools for handing videos."""
Expand Down
8 changes: 6 additions & 2 deletions src/artbox/voices.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ async def async_text_to_speech(self) -> None:
title: str = self.args.get("title", "")
text_path: str = self.args.get("text-path", "")
lang: str = self.args.get("lang", "en")
rate = self.args.get("rate", "+0%")
volume = self.args.get("volume", "+0%")
pitch = self.args.get("pitch", "+0Hz")

if not title:
raise Exception("Argument `title` not given")
Expand All @@ -96,8 +99,9 @@ async def async_text_to_speech(self) -> None:
communicate = edge_tts.Communicate(
text=text,
voice=random.choice(voice_options)["Name"],
rate="+5%",
volume="+0%",
rate=rate,
volume=volume,
pitch=pitch,
)
with open(self.output_path, "wb") as file:
async for chunk in communicate.stream():
Expand Down

0 comments on commit 37711f2

Please sign in to comment.