Spaces:
Running
Running
yt-wsp.sh : script to easily transcribe VODs
Browse filesThanks to
@DaniruKun
ref: https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818
Usage:
cd whisper.cpp
make
./examples/yt-wsp.sh <video-url>
- README.md +1 -0
- examples/yt-wsp.sh +132 -0
README.md
CHANGED
|
@@ -466,5 +466,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
|
|
| 466 |
| [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
| 467 |
| [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
| 468 |
| [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
|
|
|
| 469 |
|
| 470 |
## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
|
|
|
| 466 |
| [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
| 467 |
| [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
| 468 |
| [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
| 469 |
+
| [examples/yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
| 470 |
|
| 471 |
## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
examples/yt-wsp.sh
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# Small shell script to more easily automatically download and transcribe live stream VODs.
|
| 4 |
+
# This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp
|
| 5 |
+
# Use `./transcribe-vod help` to print help info.
|
| 6 |
+
|
| 7 |
+
# MIT License
|
| 8 |
+
|
| 9 |
+
# Copyright (c) 2022 Daniils Petrovs
|
| 10 |
+
|
| 11 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 12 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 13 |
+
# in the Software without restriction, including without limitation the rights
|
| 14 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 15 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 16 |
+
# furnished to do so, subject to the following conditions:
|
| 17 |
+
|
| 18 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 19 |
+
# copies or substantial portions of the Software.
|
| 20 |
+
|
| 21 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 22 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 23 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 24 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 25 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 26 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 27 |
+
# SOFTWARE.
|
| 28 |
+
|
| 29 |
+
set -Eeuo pipefail
|
| 30 |
+
|
| 31 |
+
# You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage
|
| 32 |
+
MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en
|
| 33 |
+
WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable
|
| 34 |
+
WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from
|
| 35 |
+
|
| 36 |
+
msg() {
|
| 37 |
+
echo >&2 -e "${1-}"
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
cleanup() {
|
| 41 |
+
msg "Cleaning up..."
|
| 42 |
+
rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
print_help() {
|
| 46 |
+
echo "Usage: ./transcribe-vod <video_url>"
|
| 47 |
+
echo "See configurable env variables in the script"
|
| 48 |
+
echo "This will produce an MP4 muxed file called res.mp4 in the working directory"
|
| 49 |
+
echo "Requirements: ffmpeg yt-dlp whisper"
|
| 50 |
+
echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience."
|
| 51 |
+
echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'"
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
check_requirements() {
|
| 55 |
+
if ! command -v ffmpeg &>/dev/null; then
|
| 56 |
+
echo "ffmpeg is required (https://ffmpeg.org)."
|
| 57 |
+
exit 1
|
| 58 |
+
fi
|
| 59 |
+
|
| 60 |
+
if ! command -v yt-dlp &>/dev/null; then
|
| 61 |
+
echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)."
|
| 62 |
+
exit 1
|
| 63 |
+
fi
|
| 64 |
+
|
| 65 |
+
if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
|
| 66 |
+
WHISPER_EXECUTABLE="./main"
|
| 67 |
+
if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
|
| 68 |
+
echo "Whisper is required (https://github.com/ggerganov/whisper.cpp)."
|
| 69 |
+
exit 1
|
| 70 |
+
fi
|
| 71 |
+
fi
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
if [[ $# -lt 1 ]]; then
|
| 75 |
+
print_help
|
| 76 |
+
exit 1
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
if [[ "$1" == "help" ]]; then
|
| 80 |
+
print_help
|
| 81 |
+
exit 0
|
| 82 |
+
fi
|
| 83 |
+
|
| 84 |
+
temp_dir="tmp"
|
| 85 |
+
source_url="$1"
|
| 86 |
+
|
| 87 |
+
check_requirements
|
| 88 |
+
|
| 89 |
+
msg "Downloading VOD..."
|
| 90 |
+
|
| 91 |
+
# Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs
|
| 92 |
+
yt-dlp \
|
| 93 |
+
-f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
|
| 94 |
+
--embed-thumbnail \
|
| 95 |
+
--embed-chapters \
|
| 96 |
+
--xattrs \
|
| 97 |
+
"${source_url}" -o "${temp_dir}/vod.mp4"
|
| 98 |
+
|
| 99 |
+
msg "Extracting audio and resampling..."
|
| 100 |
+
|
| 101 |
+
ffmpeg -i "${temp_dir}/vod.mp4" \
|
| 102 |
+
-hide_banner \
|
| 103 |
+
-loglevel error \
|
| 104 |
+
-ar 16000 \
|
| 105 |
+
-ac 1 \
|
| 106 |
+
-c:a \
|
| 107 |
+
pcm_s16le -y "vod-resampled.wav"
|
| 108 |
+
|
| 109 |
+
msg "Transcribing to subtitle file..."
|
| 110 |
+
msg "Whisper specified at: ${WHISPER_EXECUTABLE}"
|
| 111 |
+
|
| 112 |
+
$WHISPER_EXECUTABLE \
|
| 113 |
+
-m "${MODEL_PATH}" \
|
| 114 |
+
-l "${WHISPER_LANG}" \
|
| 115 |
+
-f "vod-resampled.wav" \
|
| 116 |
+
-t 8 \
|
| 117 |
+
-osrt \
|
| 118 |
+
--translate
|
| 119 |
+
|
| 120 |
+
msg "Embedding subtitle track..."
|
| 121 |
+
|
| 122 |
+
ffmpeg -i "${temp_dir}/vod.mp4" \
|
| 123 |
+
-hide_banner \
|
| 124 |
+
-loglevel error \
|
| 125 |
+
-i "vod-resampled.wav.srt" \
|
| 126 |
+
-c copy \
|
| 127 |
+
-c:s mov_text \
|
| 128 |
+
-y res.mp4
|
| 129 |
+
|
| 130 |
+
cleanup
|
| 131 |
+
|
| 132 |
+
msg "Done! Your finished file is ready: res.mp4"
|