ggerganov commited on
Commit
4c78bf0
·
1 Parent(s): 89f9481

wip : improve makefile

Browse files
Files changed (4) hide show
  1. Makefile +61 -12
  2. main.cpp +2 -2
  3. stream.cpp +1 -1
  4. whisper.cpp +2 -8
Makefile CHANGED
@@ -1,25 +1,71 @@
1
- CC_SDL=`sdl2-config --cflags --libs`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- main: ggml.o whisper.o main.o
4
- g++ -pthread -o main ggml.o whisper.o main.o
5
  ./main -h
6
 
7
  ggml.o: ggml.c ggml.h
8
- gcc -pthread -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
9
 
10
  whisper.o: whisper.cpp whisper.h
11
- gcc -pthread -O3 -std=c++11 -c whisper.cpp
12
 
13
- main.o: main.cpp ggml.h
14
- g++ -pthread -O3 -std=c++11 -c main.cpp
15
-
16
- stream: stream.cpp
17
- g++ -pthread -O3 -std=c++11 -o stream stream.cpp ggml.o whisper.o $(CC_SDL)
18
-
19
- # clean up the directory
20
  clean:
21
  rm -f *.o main
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # download a few audio samples into folder "./samples":
24
  .PHONY: samples
25
  samples:
@@ -36,6 +82,9 @@ samples:
36
  @ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
37
  @rm samples/mm1.wav
38
 
 
 
 
39
 
40
  # if not already downloaded, the following targets download the specified model and
41
  # runs it on all samples in the folder "./samples":
 
1
+ UNAME_S := $(shell uname -s)
2
+ UNAME_P := $(shell uname -p)
3
+ UNAME_M := $(shell uname -m)
4
+
5
+ #
6
+ # Compile flags
7
+ #
8
+
9
+ CFLAGS = -O3 -std=c11
10
+ CXXFLAGS = -O3 -std=c++11
11
+
12
+ CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
13
+ CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
14
+
15
+ # OS specific
16
+ # TODO: support Windows
17
+ ifeq ($(UNAME_S),Linux)
18
+ CFLAGS += -pthread
19
+ endif
20
+ ifeq ($(UNAME_S),Darwin)
21
+ CFLAGS += -pthread
22
+ endif
23
+
24
+ # Architecture specific
25
+ ifeq ($(UNAME_P),x86_64)
26
+ CFLAGS += -mavx -mavx2 -mfma -mf16c
27
+ endif
28
+ ifneq ($(filter arm%,$(UNAME_P)),)
29
+ CFLAGS += -mfpu=neon
30
+ endif
31
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
32
+ CFLAGS += -mfpu=neon
33
+ endif
34
+ ifneq ($(filter armv%,$(UNAME_M)),)
35
+ # Raspberry Pi 4
36
+ CFLAGS += -mcpu=cortex-a72 -mfloat-abi=hard -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
37
+ endif
38
+
39
+ #
40
+ # Build library + main
41
+ #
42
 
43
+ main: main.cpp ggml.o whisper.o
44
+ $(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main
45
  ./main -h
46
 
47
  ggml.o: ggml.c ggml.h
48
+ $(CC) $(CFLAGS) -c ggml.c
49
 
50
  whisper.o: whisper.cpp whisper.h
51
+ $(CXX) $(CXXFLAGS) -c whisper.cpp
52
 
 
 
 
 
 
 
 
53
  clean:
54
  rm -f *.o main
55
 
56
+ #
57
+ # Examples
58
+ #
59
+
60
+ CC_SDL=`sdl2-config --cflags --libs`
61
+
62
+ stream: stream.cpp ggml.o whisper.o
63
+ $(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL)
64
+
65
+ #
66
+ # Audio samples
67
+ #
68
+
69
  # download a few audio samples into folder "./samples":
70
  .PHONY: samples
71
  samples:
 
82
  @ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
83
  @rm samples/mm1.wav
84
 
85
+ #
86
+ # Models
87
+ #
88
 
89
  # if not already downloaded, the following targets download the specified model and
90
  # runs it on all samples in the folder "./samples":
main.cpp CHANGED
@@ -149,11 +149,11 @@ int main(int argc, char ** argv) {
149
  // convert to mono, float
150
  pcmf32.resize(n);
151
  if (wav.channels == 1) {
152
- for (size_t i = 0; i < n; i++) {
153
  pcmf32[i] = float(pcm16[i])/32768.0f;
154
  }
155
  } else {
156
- for (size_t i = 0; i < n; i++) {
157
  pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
158
  }
159
  }
 
149
  // convert to mono, float
150
  pcmf32.resize(n);
151
  if (wav.channels == 1) {
152
+ for (int i = 0; i < n; i++) {
153
  pcmf32[i] = float(pcm16[i])/32768.0f;
154
  }
155
  } else {
156
+ for (int i = 0; i < n; i++) {
157
  pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
158
  }
159
  }
stream.cpp CHANGED
@@ -238,7 +238,7 @@ int main(int argc, char ** argv) {
238
  }
239
 
240
  // process 3 seconds of new audio
241
- while ((int) SDL_GetQueuedAudioSize(g_dev_id_in) < 3*WHISPER_SAMPLE_RATE*sizeof(float)) {
242
  SDL_Delay(1);
243
  }
244
  const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
 
238
  }
239
 
240
  // process 3 seconds of new audio
241
+ while (SDL_GetQueuedAudioSize(g_dev_id_in) < 3*WHISPER_SAMPLE_RATE*sizeof(float)) {
242
  SDL_Delay(1);
243
  }
244
  const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);
whisper.cpp CHANGED
@@ -1031,8 +1031,6 @@ bool whisper_encode(
1031
  const auto & mel_inp = wctx.mel;
1032
  const auto & hparams = model.hparams;
1033
 
1034
- const int n_vocab = hparams.n_vocab;
1035
-
1036
  const int n_ctx = hparams.n_audio_ctx;
1037
  const int n_state = hparams.n_audio_state;
1038
  const int n_head = hparams.n_audio_head;
@@ -2365,7 +2363,6 @@ int whisper_full(
2365
 
2366
  bool done = false;
2367
  int seek_delta = 100*WHISPER_CHUNK_SIZE;
2368
- whisper_token last_id = 0;
2369
 
2370
  // print the prompt
2371
  //printf("\n\n");
@@ -2395,8 +2392,6 @@ int whisper_full(
2395
  // feel free to experiment!
2396
  //
2397
  {
2398
- const int n_vocab = whisper_n_vocab(ctx);
2399
-
2400
  whisper_token id = 0;
2401
  whisper_token tid = whisper_token_beg(ctx);
2402
 
@@ -2410,7 +2405,6 @@ int whisper_full(
2410
  seek_delta = 2*(id - whisper_token_beg(ctx));
2411
  result_len = i + 1;
2412
  }
2413
- last_id = id;
2414
 
2415
  // add it to the context
2416
  prompt.push_back(id);
@@ -2444,7 +2438,7 @@ int whisper_full(
2444
 
2445
  std::string text = "";
2446
 
2447
- for (int i = 0; i < result_cur.size(); i++) {
2448
  if (params.print_special_tokens == false && result_cur[i].id >= whisper_token_eot(ctx)) {
2449
  } else {
2450
  text += whisper_token_to_str(ctx, result_cur[i].id);
@@ -2464,7 +2458,7 @@ int whisper_full(
2464
  result_all.push_back({ t0, t1, text });
2465
  }
2466
  text = "";
2467
- while (result_cur[i].id > whisper_token_beg(ctx) && i < result_cur.size()) {
2468
  i++;
2469
  }
2470
  i--;
 
1031
  const auto & mel_inp = wctx.mel;
1032
  const auto & hparams = model.hparams;
1033
 
 
 
1034
  const int n_ctx = hparams.n_audio_ctx;
1035
  const int n_state = hparams.n_audio_state;
1036
  const int n_head = hparams.n_audio_head;
 
2363
 
2364
  bool done = false;
2365
  int seek_delta = 100*WHISPER_CHUNK_SIZE;
 
2366
 
2367
  // print the prompt
2368
  //printf("\n\n");
 
2392
  // feel free to experiment!
2393
  //
2394
  {
 
 
2395
  whisper_token id = 0;
2396
  whisper_token tid = whisper_token_beg(ctx);
2397
 
 
2405
  seek_delta = 2*(id - whisper_token_beg(ctx));
2406
  result_len = i + 1;
2407
  }
 
2408
 
2409
  // add it to the context
2410
  prompt.push_back(id);
 
2438
 
2439
  std::string text = "";
2440
 
2441
+ for (int i = 0; i < (int) result_cur.size(); i++) {
2442
  if (params.print_special_tokens == false && result_cur[i].id >= whisper_token_eot(ctx)) {
2443
  } else {
2444
  text += whisper_token_to_str(ctx, result_cur[i].id);
 
2458
  result_all.push_back({ t0, t1, text });
2459
  }
2460
  text = "";
2461
+ while (result_cur[i].id > whisper_token_beg(ctx) && i < (int) result_cur.size()) {
2462
  i++;
2463
  }
2464
  i--;