ggerganov commited on
Commit
a7c58c8
·
unverified ·
1 Parent(s): 8526ad3

yt-wsp.sh : script to easily transcribe VODs

Browse files

Thanks to

@DaniruKun

ref: https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818

Usage:

cd whisper.cpp
make

./examples/yt-wsp.sh <video-url>

Files changed (2) hide show
  1. README.md +1 -0
  2. examples/yt-wsp.sh +132 -0
README.md CHANGED
@@ -466,5 +466,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
466
  | [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
467
  | [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
468
  | [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
 
469
 
470
  ## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
 
466
  | [examples/whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
467
  | [examples/generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
468
  | [examples/livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
469
+ | [examples/yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
470
 
471
  ## [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126)
examples/yt-wsp.sh ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # Small shell script to more easily automatically download and transcribe live stream VODs.
4
+ # This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp
5
+ # Use `./transcribe-vod help` to print help info.
6
+
7
+ # MIT License
8
+
9
+ # Copyright (c) 2022 Daniils Petrovs
10
+
11
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ # of this software and associated documentation files (the "Software"), to deal
13
+ # in the Software without restriction, including without limitation the rights
14
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ # copies of the Software, and to permit persons to whom the Software is
16
+ # furnished to do so, subject to the following conditions:
17
+
18
+ # The above copyright notice and this permission notice shall be included in all
19
+ # copies or substantial portions of the Software.
20
+
21
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ # SOFTWARE.
28
+
29
+ set -Eeuo pipefail
30
+
31
+ # You can find how to download models in the OG repo: https://github.com/ggerganov/whisper.cpp/#usage
32
+ MODEL_PATH="${MODEL_PATH:-models/ggml-base.en.bin}" # Set to a multilingual model if you want to translate from foreign lang to en
33
+ WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-whisper}" # Where to find the whisper.cpp executable
34
+ WHISPER_LANG="${WHISPER_LANG:-en}" # Set to desired lang to translate from
35
+
36
+ msg() {
37
+ echo >&2 -e "${1-}"
38
+ }
39
+
40
+ cleanup() {
41
+ msg "Cleaning up..."
42
+ rm -rf "${temp_dir}" "vod-resampled.wav" "vod-resampled.wav.srt"
43
+ }
44
+
45
+ print_help() {
46
+ echo "Usage: ./transcribe-vod <video_url>"
47
+ echo "See configurable env variables in the script"
48
+ echo "This will produce an MP4 muxed file called res.mp4 in the working directory"
49
+ echo "Requirements: ffmpeg yt-dlp whisper"
50
+ echo "Whisper needs to be built into the main binary with make, then you can rename it to something like 'whisper' and add it to your PATH for convenience."
51
+ echo "E.g. in the root of Whisper.cpp, run: 'make && cp ./main /usr/local/bin/whisper'"
52
+ }
53
+
54
+ check_requirements() {
55
+ if ! command -v ffmpeg &>/dev/null; then
56
+ echo "ffmpeg is required (https://ffmpeg.org)."
57
+ exit 1
58
+ fi
59
+
60
+ if ! command -v yt-dlp &>/dev/null; then
61
+ echo "yt-dlp is required (https://github.com/yt-dlp/yt-dlp)."
62
+ exit 1
63
+ fi
64
+
65
+ if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
66
+ WHISPER_EXECUTABLE="./main"
67
+ if ! command -v "$WHISPER_EXECUTABLE" &>/dev/null; then
68
+ echo "Whisper is required (https://github.com/ggerganov/whisper.cpp)."
69
+ exit 1
70
+ fi
71
+ fi
72
+ }
73
+
74
+ if [[ $# -lt 1 ]]; then
75
+ print_help
76
+ exit 1
77
+ fi
78
+
79
+ if [[ "$1" == "help" ]]; then
80
+ print_help
81
+ exit 0
82
+ fi
83
+
84
+ temp_dir="tmp"
85
+ source_url="$1"
86
+
87
+ check_requirements
88
+
89
+ msg "Downloading VOD..."
90
+
91
+ # Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] for members only VODs
92
+ yt-dlp \
93
+ -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \
94
+ --embed-thumbnail \
95
+ --embed-chapters \
96
+ --xattrs \
97
+ "${source_url}" -o "${temp_dir}/vod.mp4"
98
+
99
+ msg "Extracting audio and resampling..."
100
+
101
+ ffmpeg -i "${temp_dir}/vod.mp4" \
102
+ -hide_banner \
103
+ -loglevel error \
104
+ -ar 16000 \
105
+ -ac 1 \
106
+ -c:a \
107
+ pcm_s16le -y "vod-resampled.wav"
108
+
109
+ msg "Transcribing to subtitle file..."
110
+ msg "Whisper specified at: ${WHISPER_EXECUTABLE}"
111
+
112
+ $WHISPER_EXECUTABLE \
113
+ -m "${MODEL_PATH}" \
114
+ -l "${WHISPER_LANG}" \
115
+ -f "vod-resampled.wav" \
116
+ -t 8 \
117
+ -osrt \
118
+ --translate
119
+
120
+ msg "Embedding subtitle track..."
121
+
122
+ ffmpeg -i "${temp_dir}/vod.mp4" \
123
+ -hide_banner \
124
+ -loglevel error \
125
+ -i "vod-resampled.wav.srt" \
126
+ -c copy \
127
+ -c:s mov_text \
128
+ -y res.mp4
129
+
130
+ cleanup
131
+
132
+ msg "Done! Your finished file is ready: res.mp4"