Spaces:
Running
Running
Update README.md and simplify usage
Browse files- Makefile +11 -73
- README.md +22 -6
- download-ggml-model.sh +59 -0
- main.cpp +5 -3
Makefile
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
main: ggml.o main.o
|
| 2 |
g++ -o main ggml.o main.o
|
|
|
|
| 3 |
|
| 4 |
ggml.o: ggml.c ggml.h
|
| 5 |
gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
|
|
@@ -11,11 +12,7 @@ main.o: main.cpp ggml.h
|
|
| 11 |
clean:
|
| 12 |
rm -f *.o main
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
run: main
|
| 16 |
-
./main
|
| 17 |
-
|
| 18 |
-
# download the following audio samples into folder "./samples":
|
| 19 |
.PHONY: samples
|
| 20 |
samples:
|
| 21 |
@echo "Downloading samples..."
|
|
@@ -28,79 +25,20 @@ samples:
|
|
| 28 |
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
| 29 |
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
| 30 |
|
| 31 |
-
.PHONY: tiny.en
|
| 32 |
-
tiny.en: main
|
| 33 |
-
@echo "Downloading tiny.en (75 MB just once)"
|
| 34 |
-
@mkdir -p models
|
| 35 |
-
@if [ ! -f models/ggml-tiny.en.bin ]; then \
|
| 36 |
-
wget --quiet --show-progress -O models/ggml-tiny.en.bin https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin ; \
|
| 37 |
-
fi
|
| 38 |
-
@echo ""
|
| 39 |
-
@echo "==============================================="
|
| 40 |
-
@echo "Running tiny.en on all samples in ./samples ..."
|
| 41 |
-
@echo "==============================================="
|
| 42 |
-
@echo ""
|
| 43 |
-
@for f in samples/*.wav; do \
|
| 44 |
-
echo "----------------------------------------------" ; \
|
| 45 |
-
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
|
| 46 |
-
echo "----------------------------------------------" ; \
|
| 47 |
-
echo "" ; \
|
| 48 |
-
./main -m models/ggml-tiny.en.bin -f $$f ; \
|
| 49 |
-
echo "" ; \
|
| 50 |
-
done
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
@echo "Downloading base.en (142 MB just once)"
|
| 55 |
-
@mkdir -p models
|
| 56 |
-
@if [ ! -f models/ggml-base.en.bin ]; then \
|
| 57 |
-
wget --quiet --show-progress -O models/ggml-base.en.bin https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin ; \
|
| 58 |
-
fi
|
| 59 |
-
@echo ""
|
| 60 |
-
@echo "==============================================="
|
| 61 |
-
@echo "Running base.en on all samples in ./samples ..."
|
| 62 |
-
@echo "==============================================="
|
| 63 |
-
@echo ""
|
| 64 |
-
@for f in samples/*.wav; do \
|
| 65 |
-
echo "----------------------------------------------" ; \
|
| 66 |
-
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
|
| 67 |
-
echo "----------------------------------------------" ; \
|
| 68 |
-
echo "" ; \
|
| 69 |
-
./main -m models/ggml-base.en.bin -f $$f ; \
|
| 70 |
-
echo "" ; \
|
| 71 |
-
done
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
.PHONY: small.en
|
| 74 |
-
small.en: main
|
| 75 |
-
@echo "Downloading small.en (466 MB just once)"
|
| 76 |
-
@mkdir -p models
|
| 77 |
-
@if [ ! -f models/ggml-small.en.bin ]; then \
|
| 78 |
-
wget --quiet --show-progress -O models/ggml-small.en.bin https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin ; \
|
| 79 |
-
fi
|
| 80 |
-
@echo ""
|
| 81 |
-
@echo "==============================================="
|
| 82 |
-
@echo "Running small.en on all samples in ./samples ..."
|
| 83 |
-
@echo "==============================================="
|
| 84 |
-
@echo ""
|
| 85 |
-
@for f in samples/*.wav; do \
|
| 86 |
-
echo "----------------------------------------------" ; \
|
| 87 |
-
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
|
| 88 |
-
echo "----------------------------------------------" ; \
|
| 89 |
-
echo "" ; \
|
| 90 |
-
./main -m models/ggml-small.en.bin -f $$f ; \
|
| 91 |
-
echo "" ; \
|
| 92 |
-
done
|
| 93 |
|
| 94 |
-
.
|
| 95 |
-
|
| 96 |
-
@echo "Downloading medium.en (1.5 GB just once)"
|
| 97 |
-
@mkdir -p models
|
| 98 |
-
@if [ ! -f models/ggml-medium.en.bin ]; then \
|
| 99 |
-
wget --quiet --show-progress -O models/ggml-medium.en.bin https://ggml.ggerganov.com/ggml-model-whisper-medium.en.bin ; \
|
| 100 |
-
fi
|
| 101 |
@echo ""
|
| 102 |
@echo "==============================================="
|
| 103 |
-
@echo "Running
|
| 104 |
@echo "==============================================="
|
| 105 |
@echo ""
|
| 106 |
@for f in samples/*.wav; do \
|
|
@@ -108,6 +46,6 @@ medium.en: main
|
|
| 108 |
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
|
| 109 |
echo "----------------------------------------------" ; \
|
| 110 |
echo "" ; \
|
| 111 |
-
./main -m models/ggml
|
| 112 |
echo "" ; \
|
| 113 |
done
|
|
|
|
| 1 |
main: ggml.o main.o
|
| 2 |
g++ -o main ggml.o main.o
|
| 3 |
+
./main -h
|
| 4 |
|
| 5 |
ggml.o: ggml.c ggml.h
|
| 6 |
gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
|
|
|
|
| 12 |
clean:
|
| 13 |
rm -f *.o main
|
| 14 |
|
| 15 |
+
# download a few audio samples into folder "./samples":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
.PHONY: samples
|
| 17 |
samples:
|
| 18 |
@echo "Downloading samples..."
|
|
|
|
| 25 |
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
| 26 |
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
# if not already downloaded, the following targets download the specified model and
|
| 30 |
+
# runs it on all samples in the folder "./samples":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
.PHONY: tiny.en
|
| 33 |
+
.PHONY: base.en
|
| 34 |
+
.PHONY: medium.en
|
| 35 |
.PHONY: small.en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
tiny.en base.en medium.en small.en: main
|
| 38 |
+
bash ./download-ggml-model.sh $@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
@echo ""
|
| 40 |
@echo "==============================================="
|
| 41 |
+
@echo "Running $@ on all samples in ./samples ..."
|
| 42 |
@echo "==============================================="
|
| 43 |
@echo ""
|
| 44 |
@for f in samples/*.wav; do \
|
|
|
|
| 46 |
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
|
| 47 |
echo "----------------------------------------------" ; \
|
| 48 |
echo "" ; \
|
| 49 |
+
./main -m models/ggml-$@.bin -f $$f ; \
|
| 50 |
echo "" ; \
|
| 51 |
done
|
README.md
CHANGED
|
@@ -8,6 +8,20 @@ C/C++ port of [OpenAI's Whisper](https://github.com/openai/whisper) speech-to-te
|
|
| 8 |
|
| 9 |
## Usage
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
For a quick demo, simply run `make base.en`:
|
| 12 |
|
| 13 |
```bash
|
|
@@ -97,12 +111,12 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
|
| 97 |
|
| 98 |
## Memory usage
|
| 99 |
|
| 100 |
-
| Model | Mem |
|
| 101 |
-
| --- | --- |
|
| 102 |
-
| tiny.en | ~600 MB |
|
| 103 |
-
| base.en | ~800 MB |
|
| 104 |
-
| small.en | ~1.6 GB |
|
| 105 |
-
| medium.en | ~3.5 GB |
|
| 106 |
|
| 107 |
## ggml format
|
| 108 |
|
|
@@ -113,4 +127,6 @@ The original models are converted to a custom binary format. This allows to pack
|
|
| 113 |
- vocabulary
|
| 114 |
- weights
|
| 115 |
|
|
|
|
|
|
|
| 116 |
For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py)
|
|
|
|
| 8 |
|
| 9 |
## Usage
|
| 10 |
|
| 11 |
+
To build the main program, run `make`. You can then transribe a `.wav` file like this:
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
$ ./main -f input.wav
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Before running the program, make sure to download one of the ggml Whisper models. For example:
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
bash ./download-ggml-model.sh base.en
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
For a quick demo, simply run `make base.en`:
|
| 26 |
|
| 27 |
```bash
|
|
|
|
| 111 |
|
| 112 |
## Memory usage
|
| 113 |
|
| 114 |
+
| Model | Disk | Mem |
|
| 115 |
+
| --- | --- | --- |
|
| 116 |
+
| tiny.en | 75 MB | ~600 MB |
|
| 117 |
+
| base.en | 142 MB | ~800 MB |
|
| 118 |
+
| small.en | 466 MB | ~1.6 GB |
|
| 119 |
+
| medium.en | 1.5 GB | ~3.5 GB |
|
| 120 |
|
| 121 |
## ggml format
|
| 122 |
|
|
|
|
| 127 |
- vocabulary
|
| 128 |
- weights
|
| 129 |
|
| 130 |
+
You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script.
|
| 131 |
+
|
| 132 |
For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py)
|
download-ggml-model.sh
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# This script downloads Whisper model files that have already been converted to ggml format.
|
| 4 |
+
# This way you don't have to convert them yourself.
|
| 5 |
+
|
| 6 |
+
ggml_path=$(dirname $(realpath $0))
|
| 7 |
+
|
| 8 |
+
# Whisper models
|
| 9 |
+
models=( "tiny.en" "base.en" "small.en" "medium.en" )
|
| 10 |
+
|
| 11 |
+
# list available models
|
| 12 |
+
function list_models {
|
| 13 |
+
printf "\n"
|
| 14 |
+
printf " Available models:"
|
| 15 |
+
for model in "${models[@]}"; do
|
| 16 |
+
printf " $model"
|
| 17 |
+
done
|
| 18 |
+
printf "\n\n"
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
if [ "$#" -ne 1 ]; then
|
| 22 |
+
printf "Usage: $0 <model>\n"
|
| 23 |
+
list_models
|
| 24 |
+
|
| 25 |
+
exit 1
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
model=$1
|
| 29 |
+
|
| 30 |
+
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
|
| 31 |
+
printf "Invalid model: $model\n"
|
| 32 |
+
list_models
|
| 33 |
+
|
| 34 |
+
exit 1
|
| 35 |
+
fi
|
| 36 |
+
|
| 37 |
+
# download ggml model
|
| 38 |
+
|
| 39 |
+
printf "Downloading ggml model $model ...\n"
|
| 40 |
+
|
| 41 |
+
mkdir -p models
|
| 42 |
+
|
| 43 |
+
if [ -f "models/ggml-$model.bin" ]; then
|
| 44 |
+
printf "Model $model already exists. Skipping download.\n"
|
| 45 |
+
exit 0
|
| 46 |
+
fi
|
| 47 |
+
|
| 48 |
+
wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
|
| 49 |
+
|
| 50 |
+
if [ $? -ne 0 ]; then
|
| 51 |
+
printf "Failed to download ggml model $model \n"
|
| 52 |
+
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
| 53 |
+
exit 1
|
| 54 |
+
fi
|
| 55 |
+
|
| 56 |
+
printf "Done! Model '$model' saved in 'models/ggml-$model.bin'\n"
|
| 57 |
+
printf "You can now use it like this:\n\n"
|
| 58 |
+
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
|
| 59 |
+
printf "\n"
|
main.cpp
CHANGED
|
@@ -117,9 +117,9 @@ struct whisper_params {
|
|
| 117 |
bool verbose = false;
|
| 118 |
bool print_special_tokens = false;
|
| 119 |
|
| 120 |
-
std::string model = "models/
|
| 121 |
|
| 122 |
-
std::string fname_inp = "
|
| 123 |
};
|
| 124 |
|
| 125 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
@@ -156,6 +156,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 156 |
}
|
| 157 |
|
| 158 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
|
|
|
| 159 |
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
| 160 |
fprintf(stderr, "\n");
|
| 161 |
fprintf(stderr, "options:\n");
|
|
@@ -1898,7 +1899,6 @@ int main(int argc, char ** argv) {
|
|
| 1898 |
const int64_t t_main_start_us = ggml_time_us();
|
| 1899 |
|
| 1900 |
whisper_params params;
|
| 1901 |
-
params.model = "models/whisper-tiny.en/ggml-model.bin";
|
| 1902 |
|
| 1903 |
if (whisper_params_parse(argc, argv, params) == false) {
|
| 1904 |
return 1;
|
|
@@ -1927,6 +1927,7 @@ int main(int argc, char ** argv) {
|
|
| 1927 |
|
| 1928 |
if (!whisper_model_load(params.model, model, vocab)) {
|
| 1929 |
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
|
|
|
|
| 1930 |
return 1;
|
| 1931 |
}
|
| 1932 |
|
|
@@ -1939,6 +1940,7 @@ int main(int argc, char ** argv) {
|
|
| 1939 |
drwav wav;
|
| 1940 |
if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) {
|
| 1941 |
fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str());
|
|
|
|
| 1942 |
return 2;
|
| 1943 |
}
|
| 1944 |
|
|
|
|
| 117 |
bool verbose = false;
|
| 118 |
bool print_special_tokens = false;
|
| 119 |
|
| 120 |
+
std::string model = "models/ggml-base.en.bin"; // model path
|
| 121 |
|
| 122 |
+
std::string fname_inp = "samples/jfk.wav";
|
| 123 |
};
|
| 124 |
|
| 125 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
|
|
|
| 156 |
}
|
| 157 |
|
| 158 |
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
| 159 |
+
fprintf(stderr, "\n");
|
| 160 |
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
| 161 |
fprintf(stderr, "\n");
|
| 162 |
fprintf(stderr, "options:\n");
|
|
|
|
| 1899 |
const int64_t t_main_start_us = ggml_time_us();
|
| 1900 |
|
| 1901 |
whisper_params params;
|
|
|
|
| 1902 |
|
| 1903 |
if (whisper_params_parse(argc, argv, params) == false) {
|
| 1904 |
return 1;
|
|
|
|
| 1927 |
|
| 1928 |
if (!whisper_model_load(params.model, model, vocab)) {
|
| 1929 |
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
|
| 1930 |
+
whisper_print_usage(argc, argv, {});
|
| 1931 |
return 1;
|
| 1932 |
}
|
| 1933 |
|
|
|
|
| 1940 |
drwav wav;
|
| 1941 |
if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) {
|
| 1942 |
fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str());
|
| 1943 |
+
whisper_print_usage(argc, argv, {});
|
| 1944 |
return 2;
|
| 1945 |
}
|
| 1946 |
|