ggerganov commited on
Commit
cfd2dd9
·
unverified ·
1 Parent(s): 65c8446

Update README.md and simplify usage

Browse files
Files changed (4) hide show
  1. Makefile +11 -73
  2. README.md +22 -6
  3. download-ggml-model.sh +59 -0
  4. main.cpp +5 -3
Makefile CHANGED
@@ -1,5 +1,6 @@
1
  main: ggml.o main.o
2
  g++ -o main ggml.o main.o
 
3
 
4
  ggml.o: ggml.c ggml.h
5
  gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
@@ -11,11 +12,7 @@ main.o: main.cpp ggml.h
11
  clean:
12
  rm -f *.o main
13
 
14
- # run the program
15
- run: main
16
- ./main
17
-
18
- # download the following audio samples into folder "./samples":
19
  .PHONY: samples
20
  samples:
21
  @echo "Downloading samples..."
@@ -28,79 +25,20 @@ samples:
28
  @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
29
  @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
30
 
31
- .PHONY: tiny.en
32
- tiny.en: main
33
- @echo "Downloading tiny.en (75 MB just once)"
34
- @mkdir -p models
35
- @if [ ! -f models/ggml-tiny.en.bin ]; then \
36
- wget --quiet --show-progress -O models/ggml-tiny.en.bin https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin ; \
37
- fi
38
- @echo ""
39
- @echo "==============================================="
40
- @echo "Running tiny.en on all samples in ./samples ..."
41
- @echo "==============================================="
42
- @echo ""
43
- @for f in samples/*.wav; do \
44
- echo "----------------------------------------------" ; \
45
- echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
46
- echo "----------------------------------------------" ; \
47
- echo "" ; \
48
- ./main -m models/ggml-tiny.en.bin -f $$f ; \
49
- echo "" ; \
50
- done
51
 
52
- .PHONY: base.en
53
- base.en: main
54
- @echo "Downloading base.en (142 MB just once)"
55
- @mkdir -p models
56
- @if [ ! -f models/ggml-base.en.bin ]; then \
57
- wget --quiet --show-progress -O models/ggml-base.en.bin https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin ; \
58
- fi
59
- @echo ""
60
- @echo "==============================================="
61
- @echo "Running base.en on all samples in ./samples ..."
62
- @echo "==============================================="
63
- @echo ""
64
- @for f in samples/*.wav; do \
65
- echo "----------------------------------------------" ; \
66
- echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
67
- echo "----------------------------------------------" ; \
68
- echo "" ; \
69
- ./main -m models/ggml-base.en.bin -f $$f ; \
70
- echo "" ; \
71
- done
72
 
 
 
 
73
  .PHONY: small.en
74
- small.en: main
75
- @echo "Downloading small.en (466 MB just once)"
76
- @mkdir -p models
77
- @if [ ! -f models/ggml-small.en.bin ]; then \
78
- wget --quiet --show-progress -O models/ggml-small.en.bin https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin ; \
79
- fi
80
- @echo ""
81
- @echo "==============================================="
82
- @echo "Running small.en on all samples in ./samples ..."
83
- @echo "==============================================="
84
- @echo ""
85
- @for f in samples/*.wav; do \
86
- echo "----------------------------------------------" ; \
87
- echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
88
- echo "----------------------------------------------" ; \
89
- echo "" ; \
90
- ./main -m models/ggml-small.en.bin -f $$f ; \
91
- echo "" ; \
92
- done
93
 
94
- .PHONY: medium.en
95
- medium.en: main
96
- @echo "Downloading medium.en (1.5 GB just once)"
97
- @mkdir -p models
98
- @if [ ! -f models/ggml-medium.en.bin ]; then \
99
- wget --quiet --show-progress -O models/ggml-medium.en.bin https://ggml.ggerganov.com/ggml-model-whisper-medium.en.bin ; \
100
- fi
101
  @echo ""
102
  @echo "==============================================="
103
- @echo "Running medium.en on all samples in ./samples ..."
104
  @echo "==============================================="
105
  @echo ""
106
  @for f in samples/*.wav; do \
@@ -108,6 +46,6 @@ medium.en: main
108
  echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
109
  echo "----------------------------------------------" ; \
110
  echo "" ; \
111
- ./main -m models/ggml-medium.en.bin -f $$f ; \
112
  echo "" ; \
113
  done
 
1
  main: ggml.o main.o
2
  g++ -o main ggml.o main.o
3
+ ./main -h
4
 
5
  ggml.o: ggml.c ggml.h
6
  gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
 
12
  clean:
13
  rm -f *.o main
14
 
15
+ # download a few audio samples into folder "./samples":
 
 
 
 
16
  .PHONY: samples
17
  samples:
18
  @echo "Downloading samples..."
 
25
  @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
26
  @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # if not already downloaded, the following targets download the specified model and
30
+ # runs it on all samples in the folder "./samples":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ .PHONY: tiny.en
33
+ .PHONY: base.en
34
+ .PHONY: medium.en
35
  .PHONY: small.en
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ tiny.en base.en medium.en small.en: main
38
+ bash ./download-ggml-model.sh $@
 
 
 
 
 
39
  @echo ""
40
  @echo "==============================================="
41
+ @echo "Running $@ on all samples in ./samples ..."
42
  @echo "==============================================="
43
  @echo ""
44
  @for f in samples/*.wav; do \
 
46
  echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
47
  echo "----------------------------------------------" ; \
48
  echo "" ; \
49
+ ./main -m models/ggml-$@.bin -f $$f ; \
50
  echo "" ; \
51
  done
README.md CHANGED
@@ -8,6 +8,20 @@ C/C++ port of [OpenAI's Whisper](https://github.com/openai/whisper) speech-to-te
8
 
9
  ## Usage
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  For a quick demo, simply run `make base.en`:
12
 
13
  ```bash
@@ -97,12 +111,12 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
97
 
98
  ## Memory usage
99
 
100
- | Model | Mem |
101
- | --- | --- |
102
- | tiny.en | ~600 MB |
103
- | base.en | ~800 MB |
104
- | small.en | ~1.6 GB |
105
- | medium.en | ~3.5 GB |
106
 
107
  ## ggml format
108
 
@@ -113,4 +127,6 @@ The original models are converted to a custom binary format. This allows to pack
113
  - vocabulary
114
  - weights
115
 
 
 
116
  For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py)
 
8
 
9
  ## Usage
10
 
11
+ To build the main program, run `make`. You can then transribe a `.wav` file like this:
12
+
13
+ ```bash
14
+ $ ./main -f input.wav
15
+ ```
16
+
17
+ Before running the program, make sure to download one of the ggml Whisper models. For example:
18
+
19
+ ```bash
20
+ bash ./download-ggml-model.sh base.en
21
+ ```
22
+
23
+ ---
24
+
25
  For a quick demo, simply run `make base.en`:
26
 
27
  ```bash
 
111
 
112
  ## Memory usage
113
 
114
+ | Model | Disk | Mem |
115
+ | --- | --- | --- |
116
+ | tiny.en | 75 MB | ~600 MB |
117
+ | base.en | 142 MB | ~800 MB |
118
+ | small.en | 466 MB | ~1.6 GB |
119
+ | medium.en | 1.5 GB | ~3.5 GB |
120
 
121
  ## ggml format
122
 
 
127
  - vocabulary
128
  - weights
129
 
130
+ You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script.
131
+
132
  For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py)
download-ggml-model.sh ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # This script downloads Whisper model files that have already been converted to ggml format.
4
+ # This way you don't have to convert them yourself.
5
+
6
+ ggml_path=$(dirname $(realpath $0))
7
+
8
+ # Whisper models
9
+ models=( "tiny.en" "base.en" "small.en" "medium.en" )
10
+
11
+ # list available models
12
+ function list_models {
13
+ printf "\n"
14
+ printf " Available models:"
15
+ for model in "${models[@]}"; do
16
+ printf " $model"
17
+ done
18
+ printf "\n\n"
19
+ }
20
+
21
+ if [ "$#" -ne 1 ]; then
22
+ printf "Usage: $0 <model>\n"
23
+ list_models
24
+
25
+ exit 1
26
+ fi
27
+
28
+ model=$1
29
+
30
+ if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
31
+ printf "Invalid model: $model\n"
32
+ list_models
33
+
34
+ exit 1
35
+ fi
36
+
37
+ # download ggml model
38
+
39
+ printf "Downloading ggml model $model ...\n"
40
+
41
+ mkdir -p models
42
+
43
+ if [ -f "models/ggml-$model.bin" ]; then
44
+ printf "Model $model already exists. Skipping download.\n"
45
+ exit 0
46
+ fi
47
+
48
+ wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
49
+
50
+ if [ $? -ne 0 ]; then
51
+ printf "Failed to download ggml model $model \n"
52
+ printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
53
+ exit 1
54
+ fi
55
+
56
+ printf "Done! Model '$model' saved in 'models/ggml-$model.bin'\n"
57
+ printf "You can now use it like this:\n\n"
58
+ printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
59
+ printf "\n"
main.cpp CHANGED
@@ -117,9 +117,9 @@ struct whisper_params {
117
  bool verbose = false;
118
  bool print_special_tokens = false;
119
 
120
- std::string model = "models/whisper-tiny.en/ggml-model.bin"; // model path
121
 
122
- std::string fname_inp = "default.wav";
123
  };
124
 
125
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@@ -156,6 +156,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
156
  }
157
 
158
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
 
159
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
160
  fprintf(stderr, "\n");
161
  fprintf(stderr, "options:\n");
@@ -1898,7 +1899,6 @@ int main(int argc, char ** argv) {
1898
  const int64_t t_main_start_us = ggml_time_us();
1899
 
1900
  whisper_params params;
1901
- params.model = "models/whisper-tiny.en/ggml-model.bin";
1902
 
1903
  if (whisper_params_parse(argc, argv, params) == false) {
1904
  return 1;
@@ -1927,6 +1927,7 @@ int main(int argc, char ** argv) {
1927
 
1928
  if (!whisper_model_load(params.model, model, vocab)) {
1929
  fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
 
1930
  return 1;
1931
  }
1932
 
@@ -1939,6 +1940,7 @@ int main(int argc, char ** argv) {
1939
  drwav wav;
1940
  if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) {
1941
  fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str());
 
1942
  return 2;
1943
  }
1944
 
 
117
  bool verbose = false;
118
  bool print_special_tokens = false;
119
 
120
+ std::string model = "models/ggml-base.en.bin"; // model path
121
 
122
+ std::string fname_inp = "samples/jfk.wav";
123
  };
124
 
125
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
 
156
  }
157
 
158
  void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
159
+ fprintf(stderr, "\n");
160
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
161
  fprintf(stderr, "\n");
162
  fprintf(stderr, "options:\n");
 
1899
  const int64_t t_main_start_us = ggml_time_us();
1900
 
1901
  whisper_params params;
 
1902
 
1903
  if (whisper_params_parse(argc, argv, params) == false) {
1904
  return 1;
 
1927
 
1928
  if (!whisper_model_load(params.model, model, vocab)) {
1929
  fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
1930
+ whisper_print_usage(argc, argv, {});
1931
  return 1;
1932
  }
1933
 
 
1940
  drwav wav;
1941
  if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) {
1942
  fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str());
1943
+ whisper_print_usage(argc, argv, {});
1944
  return 2;
1945
  }
1946