From 37711f24be3158c6df7f0f353def8fb8f41aa261 Mon Sep 17 00:00:00 2001 From: Ivan Ogasawara Date: Fri, 2 Feb 2024 14:40:59 -0400 Subject: [PATCH] feat: Extract CC from youtube video (#24) --- README.md | 23 ++++++++++----- docs/index.md | 16 +++++++++++ poetry.lock | 13 ++++----- pyproject.toml | 4 +-- src/artbox/cli.py | 66 ++++++++++++++++++++++++++++++++++++++++---- src/artbox/videos.py | 44 ++++++++++++++++++++++++++++- src/artbox/voices.py | 8 ++++-- 7 files changed, 149 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 41629bc..33f3300 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \ --lang en-IN ``` +Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`, +and `--pitch`, for example: + +```bash +$ echo "Do you want some coffee?" > /tmp/artbox/text.md +$ artbox voice text-to-speech \ + --title artbox \ + --text-path /tmp/artbox/text.md \ + --output-path /tmp/artbox/voice.mp3 \ + --engine edge-tts \ + --lang en \ + --rate +10% \ + --volume -10% \ + --pitch -5Hz +``` + ### Download a youtube video If you want to download videos from the youtube, you can use the following @@ -152,10 +168,3 @@ If you want to use Python to play your audio files, you can install `playsound`: ```bash $ pip wheel --use-pep517 "playsound (==1.3.0)" ``` - -## Troubleshoot - -After installing with `poetry install`: - -- Patch `pytube` (ref: https://github.com/pytube/pytube/issues/1773): - `sed -i 's/(r"^$\\w+\\W")/(r"^\\w+\\W")/' $CONDA_PREFIX/lib/python3.*/site-packages/pytube/cipher.py` diff --git a/docs/index.md b/docs/index.md index 41629bc..2b78971 100644 --- a/docs/index.md +++ b/docs/index.md @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \ --lang en-IN ``` +Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`, +and `--pitch`, for example: + +```bash +$ echo "Do you want some coffee?" > /tmp/artbox/text.md +$ artbox voice text-to-speech \ + --title artbox \ + --text-path /tmp/artbox/text.md \ + --output-path /tmp/artbox/voice.mp3 \ + --engine edge-tts \ + --lang en \ + --rate +10% \ + --volume -10% \ + --pitch -5Hz +``` + ### Download a youtube video If you want to download videos from the youtube, you can use the following diff --git a/poetry.lock b/poetry.lock index 7f8b49e..5a97ec4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3718,14 +3718,14 @@ files = [ Levenshtein = "0.23.0" [[package]] -name = "pytube" -version = "15.0.0" -description = "Python 3 library for downloading YouTube Videos." +name = "pytubefix" +version = "1.13.3" +description = "Python3 library for downloading YouTube Videos." optional = false python-versions = ">=3.7" files = [ - {file = "pytube-15.0.0-py3-none-any.whl", hash = "sha256:07b9904749e213485780d7eb606e5e5b8e4341aa4dccf699160876da00e12d78"}, - {file = "pytube-15.0.0.tar.gz", hash = "sha256:076052efe76f390dfa24b1194ff821d4e86c17d41cb5562f3a276a8bcbfc9d1d"}, + {file = "pytubefix-1.13.3-py3-none-any.whl", hash = "sha256:66a3a7cc4035961aeab0174a03d196d5a90797f40956b00c88f0204a6df2c62c"}, + {file = "pytubefix-1.13.3.tar.gz", hash = "sha256:e58528907e6afefc502e31175c5d7c72c85d2952e87d20ae05c84197750046f5"}, ] [[package]] @@ -3802,7 +3802,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5125,4 +5124,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">3.8.1,<3.12" -content-hash = "178f6645fbd1d29010d5ec1d834fc7e6a9af3105f0524773cfb595f6f734b1e3" +content-hash = "507bab25cdfcdb01af5c0d2869dfc46cfd3276f760ce0f702600b20140bf7d24" diff --git a/pyproject.toml b/pyproject.toml index 2e88e21..7ebd99b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ exclude = [ [tool.poetry.dependencies] python = ">3.8.1,<3.12" -pytube = ">=15.0.0" pycairo = ">=1.24.0" pygobject = ">=3.44.1" openai = ">=1" @@ -32,6 +31,7 @@ gtts = ">=2.3.2" edge-tts = ">=6.1.8" numpy = ">=1.20" typer = ">=0.9.0" +pytubefix = ">=1.13.3" [tool.poetry.group.dev.dependencies] pytest = ">=7.3.2" @@ -113,6 +113,6 @@ module = [ "noisereduce", "pydub", "pydub.generators", - "pytube", + "pytubefix", ] ignore_missing_imports = true diff --git a/src/artbox/cli.py b/src/artbox/cli.py index 247624d..26e40a3 100644 --- a/src/artbox/cli.py +++ b/src/artbox/cli.py @@ -66,7 +66,7 @@ def main( @app_voice.command("text-to-speech") -def text_to_speech( +def voice_text_to_speech( title: Annotated[ str, typer.Option("--title", help="Specify the name of the audio file") ] = "artbox", @@ -93,6 +93,18 @@ def text_to_speech( "--lang", help="Choose the language for audio generation" ), ] = "en", + rate: Annotated[ + str, + typer.Option("--rate", help="Decrease/Increase the rate level"), + ] = "+0%", + volume: Annotated[ + str, + typer.Option("--volume", help="Decrease/Increase the volume level"), + ] = "+0%", + pitch: Annotated[ + str, + typer.Option("--pitch", help="Decrease/Increase the pitch level"), + ] = "+0Hz", ) -> None: """Convert text to speech.""" args_dict = { @@ -101,6 +113,9 @@ def text_to_speech( "output-path": output_path, "engine": engine, "lang": lang, + "rate": rate, + "volume": volume, + "pitch": pitch, } runner = Voice(args_dict) @@ -108,7 +123,7 @@ def text_to_speech( @app_sound.command("notes-to-audio") -def notes_to_audio( +def sound_notes_to_audio( input_path: Annotated[ str, typer.Option( @@ -138,7 +153,7 @@ def notes_to_audio( @app_video.command("remove-audio") -def remove_audio( +def video_remove_audio( input_path: Annotated[ str, typer.Option( @@ -163,7 +178,7 @@ def remove_audio( @app_video.command("extract-audio") -def extract_audio( +def video_extract_audio( input_path: Annotated[ str, typer.Option( @@ -189,7 +204,7 @@ def extract_audio( @app_video.command("combine-video-and-audio") -def combine_audio_and_video( +def video_combine_audio_and_video( video_path: Annotated[ str, typer.Option( @@ -222,7 +237,7 @@ def combine_audio_and_video( @app_youtube.command("download") -def download_youtube_video( +def youtube_download( url: Annotated[ str, typer.Option( @@ -252,3 +267,42 @@ def download_youtube_video( runner = Youtube(args_dict) runner.download() + + +@app_youtube.command("cc") +def youtube_cc( + url: Annotated[ + str, + typer.Option( + "--url", help="Specify the URL of the YouTube video to download" + ), + ] = "", + output_path: Annotated[ + str, + typer.Option( + "--output-path", + help=( + "Specify the path to store the downloaded video file " + "(.srt, .txt)" + ), + ), + ] = "/tmp/cc.txt", + lang: Annotated[ + str, + typer.Option("--lang", help="Set the CC language to be downloaded"), + ] = "en", + format: Annotated[ + str, + typer.Option("--format", help="Set the CC format (srt, text)"), + ] = "text", +) -> None: + """Download youtube video CC.""" + args_dict = { + "url": url, + "output-path": output_path, + "lang": lang, + "format": format, + } + + runner = Youtube(args_dict) + runner.download_captions() diff --git a/src/artbox/videos.py b/src/artbox/videos.py index f1f1b09..a89ffae 100644 --- a/src/artbox/videos.py +++ b/src/artbox/videos.py @@ -6,7 +6,7 @@ from abc import abstractmethod from moviepy.editor import AudioFileClip, VideoFileClip -from pytube import YouTube as PyYouTube +from pytubefix import YouTube as PyYouTube from artbox.base import ArtBox @@ -20,6 +20,27 @@ def download(self): ... +def _convert_srt_to_plain_text(srt_text: str) -> str: + """ + Convert an SRT file to plain text by removing timestamps and formatting. + + Parameters + ---------- + srt_file_path (str): Path to the SRT file. + + Returns + ------- + str: The extracted plain text from the SRT file. + """ + plain_text = [] + # Skip lines that are part of SRT formatting (timestamps, etc.) + for line in srt_text.split("\n"): + if line.strip() and not line.strip().isdigit() and "-->" not in line: + plain_text.append(line.strip()) + + return "\n".join(plain_text) + + class Youtube(DownloadBase): """Set of tools for handing videos.""" @@ -52,6 +73,27 @@ def download(self): except Exception as e: print(f"Failed to download video: {e}") + def download_captions(self): + """Download the English closed captions of a YouTube video.""" + video_url = self.args.get("url", "") + lang = self.args.get("lang", "en") + format = self.args.get("format", "text") + + yt = PyYouTube(video_url) + caption = yt.captions.get_by_language_code(f"a.{lang}") + + if not caption: + print(f"No captions found for language {lang}.") + return + + # Save the captions to a file + cc = caption.generate_srt_captions() + with open(str(self.output_path), "w") as f: + if format == "text": + cc = _convert_srt_to_plain_text(cc) + f.write(cc) + print("Captions downloaded successfully.") + class Video(ArtBox): """Set of tools for handing videos.""" diff --git a/src/artbox/voices.py b/src/artbox/voices.py index c9ecd7e..420386f 100644 --- a/src/artbox/voices.py +++ b/src/artbox/voices.py @@ -79,6 +79,9 @@ async def async_text_to_speech(self) -> None: title: str = self.args.get("title", "") text_path: str = self.args.get("text-path", "") lang: str = self.args.get("lang", "en") + rate = self.args.get("rate", "+0%") + volume = self.args.get("volume", "+0%") + pitch = self.args.get("pitch", "+0Hz") if not title: raise Exception("Argument `title` not given") @@ -96,8 +99,9 @@ async def async_text_to_speech(self) -> None: communicate = edge_tts.Communicate( text=text, voice=random.choice(voice_options)["Name"], - rate="+5%", - volume="+0%", + rate=rate, + volume=volume, + pitch=pitch, ) with open(self.output_path, "wb") as file: async for chunk in communicate.stream():