Ryan Metcalfe ggerganov commited on
Commit
49bcee6
·
unverified ·
1 Parent(s): 3c9afe6

whisper : add OpenVINO support (#1037)

Browse files

* openvino: use OpenVINO encoder inference

* openvino: add python script for OpenVINO model generation

* whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <[email protected]>

* whisper: Fix compilation error

* whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures

* cmake: Add openvino-encoder as separate object target

* whisper : minor style fixes

* minor : indentation fixes

---------

Co-authored-by: Georgi Gerganov <[email protected]>

CMakeLists.txt CHANGED
@@ -54,6 +54,8 @@ option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
54
  option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
55
  option(WHISPER_NO_F16C "whisper: disable F16c" OFF)
56
 
 
 
57
  if (APPLE)
58
  option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
59
  option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
@@ -192,6 +194,10 @@ if (WHISPER_CLBLAST)
192
  endif()
193
  endif()
194
 
 
 
 
 
195
  # compiler flags
196
 
197
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -297,6 +303,24 @@ if (WHISPER_COREML)
297
  )
298
  endif()
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  #
301
  # whisper - this is the main library of the project
302
  #
@@ -322,6 +346,10 @@ if (WHISPER_COREML)
322
  target_link_libraries(${TARGET} PRIVATE whisper.coreml)
323
  endif()
324
 
 
 
 
 
325
  if (MSVC)
326
  target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
327
 
 
54
  option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
55
  option(WHISPER_NO_F16C "whisper: disable F16c" OFF)
56
 
57
+ option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
58
+
59
  if (APPLE)
60
  option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
61
  option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
 
194
  endif()
195
  endif()
196
 
197
+ if( WHISPER_OPENVINO )
198
+ find_package(OpenVINO REQUIRED COMPONENTS Runtime)
199
+ endif()
200
+
201
  # compiler flags
202
 
203
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 
303
  )
304
  endif()
305
 
306
+ if (WHISPER_OPENVINO)
307
+ set(TARGET whisper.openvino)
308
+
309
+ add_library(${TARGET} OBJECT
310
+ openvino/whisper-openvino-encoder.h
311
+ openvino/whisper-openvino-encoder.cpp
312
+ )
313
+
314
+ target_include_directories(${TARGET} PUBLIC
315
+ .
316
+ )
317
+
318
+ set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
319
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
320
+
321
+ target_link_libraries(${TARGET} PRIVATE openvino::runtime)
322
+ endif()
323
+
324
  #
325
  # whisper - this is the main library of the project
326
  #
 
346
  target_link_libraries(${TARGET} PRIVATE whisper.coreml)
347
  endif()
348
 
349
+ if (WHISPER_OPENVINO)
350
+ target_link_libraries(${TARGET} PRIVATE whisper.openvino)
351
+ endif()
352
+
353
  if (MSVC)
354
  target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
355
 
examples/main/main.cpp CHANGED
@@ -95,6 +95,8 @@ struct whisper_params {
95
  // [TDRZ] speaker turn string
96
  std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
97
 
 
 
98
  std::vector<std::string> fname_inp = {};
99
  std::vector<std::string> fname_out = {};
100
  };
@@ -155,6 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
155
  else if ( arg == "--prompt") { params.prompt = argv[++i]; }
156
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
157
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
 
158
  else {
159
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
160
  whisper_print_usage(argc, argv, params);
@@ -207,6 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
207
  fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
208
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
209
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
 
210
  fprintf(stderr, "\n");
211
  }
212
 
@@ -809,6 +813,9 @@ int main(int argc, char ** argv) {
809
  return 3;
810
  }
811
 
 
 
 
812
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
813
  const auto fname_inp = params.fname_inp[f];
814
  const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
 
95
  // [TDRZ] speaker turn string
96
  std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
97
 
98
+ std::string openvino_encode_device = "CPU";
99
+
100
  std::vector<std::string> fname_inp = {};
101
  std::vector<std::string> fname_out = {};
102
  };
 
157
  else if ( arg == "--prompt") { params.prompt = argv[++i]; }
158
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
159
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
160
+ else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
161
  else {
162
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
163
  whisper_print_usage(argc, argv, params);
 
210
  fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
211
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
212
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
213
+ fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
214
  fprintf(stderr, "\n");
215
  }
216
 
 
813
  return 3;
814
  }
815
 
816
+ // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
817
+ whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
818
+
819
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
820
  const auto fname_inp = params.fname_inp[f];
821
  const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
models/convert-whisper-to-openvino.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import torch
3
+ from whisper import load_model
4
+ import os
5
+ from openvino.tools import mo
6
+ from openvino.runtime import serialize
7
+ import shutil
8
+
9
+ def convert_encoder(hparams, encoder, mname):
10
+ encoder.eval()
11
+
12
+ mel = torch.zeros((1, 80, 3000))
13
+
14
+ onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
15
+
16
+ #create a directory to store the onnx model, and other collateral that is saved during onnx export procedure
17
+ if not os.path.isdir(onnx_folder):
18
+ os.makedirs(onnx_folder)
19
+
20
+ onnx_path = os.path.join(onnx_folder, "whisper_encoder.onnx")
21
+
22
+ torch.onnx.export(
23
+ encoder,
24
+ mel,
25
+ onnx_path,
26
+ input_names=["mel"],
27
+ output_names=["output_features"]
28
+ )
29
+
30
+ # use model optimizer to convert onnx to OpenVINO IR format
31
+ encoder_model = mo.convert_model(onnx_path, compress_to_fp16=True)
32
+ serialize(encoder_model, xml_path='ggml-' + mname + '-encoder-openvino.xml')
33
+
34
+ #cleanup
35
+ if os.path.isdir(onnx_folder):
36
+ shutil.rmtree(onnx_folder)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ parser = argparse.ArgumentParser()
41
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
42
+ args = parser.parse_args()
43
+
44
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
45
+ raise ValueError("Invalid model name")
46
+
47
+ whisper = load_model(args.model).cpu()
48
+ hparams = whisper.dims
49
+
50
+ encoder = whisper.encoder
51
+
52
+ # Convert encoder to onnx
53
+ convert_encoder(hparams, encoder, args.model)
models/openvino-conversion-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ openvino-dev[pytorch,onnx]
2
+ openai-whisper
openvino/whisper-openvino-encoder.cpp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "openvino/whisper-openvino-encoder.h"
2
+ #include "ggml.h"
3
+ #include <openvino/openvino.hpp>
4
+ #include <iostream>
5
+
6
+ struct whisper_openvino_context {
7
+ ov::InferRequest inferRequest;
8
+ };
9
+
10
+ struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
11
+ const char* device,
12
+ const char* cache_dir)
13
+ {
14
+ if (!path_model || !device) {
15
+ fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
16
+ return nullptr;
17
+ }
18
+
19
+ fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
20
+ __func__, path_model, device, cache_dir ? cache_dir : "(not set)");
21
+
22
+ whisper_openvino_context *context = new whisper_openvino_context;
23
+ try {
24
+ ov::Core core;
25
+
26
+ if (cache_dir) {
27
+ // enables caching of device-specific 'blobs' during core.compile_model
28
+ // routine. This speeds up calls to compile_model for successive runs.
29
+ core.set_property(ov::cache_dir(cache_dir));
30
+ }
31
+
32
+ //Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
33
+ std::shared_ptr<ov::Model> model = core.read_model(path_model);
34
+
35
+ // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
36
+ auto compiledModel = core.compile_model(model, device);
37
+
38
+ // From the compiled model object, create an infer request. This is the thing that we
39
+ // we will use later on to trigger inference execution.
40
+ context->inferRequest = compiledModel.create_infer_request();
41
+ }
42
+ catch (const std::exception& error) {
43
+ std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
44
+ delete context;
45
+ context = nullptr;
46
+ }
47
+
48
+ return context;
49
+ }
50
+
51
+ void whisper_openvino_free(struct whisper_openvino_context * ctx) {
52
+ if( ctx ) {
53
+ delete ctx;
54
+ }
55
+ }
56
+
57
+ int whisper_openvino_encode(
58
+ whisper_openvino_context* ctx,
59
+ ggml_tensor* mel,
60
+ ggml_tensor* out) {
61
+
62
+ if (!ctx || !mel || !out) {
63
+ fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
64
+ return 0;
65
+ }
66
+
67
+ if (mel->n_dims != 2) {
68
+ fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
69
+ __func__, mel->n_dims);
70
+ return 0;
71
+ }
72
+
73
+ if (out->n_dims != 2) {
74
+ fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
75
+ __func__, out->n_dims);
76
+ return 0;
77
+ }
78
+
79
+ try {
80
+
81
+ //wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
82
+ {
83
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
84
+ ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
85
+ ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
86
+ ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
87
+ ctx->inferRequest.set_input_tensor(input_tensor);
88
+ }
89
+
90
+ //wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
91
+ {
92
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
93
+ ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
94
+ ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
95
+ ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
96
+ ctx->inferRequest.set_output_tensor(out_tensor);
97
+ }
98
+
99
+ //run inference
100
+ ctx->inferRequest.infer();
101
+ }
102
+ catch (const std::exception& error) {
103
+ std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
104
+ return 0;
105
+ }
106
+
107
+ return 1;
108
+ }
openvino/whisper-openvino-encoder.h ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Wrapper of the OpenVINO Whisper Encoder model
2
+ //
3
+
4
+ #if __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ struct whisper_openvino_context;
9
+
10
+ // initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and
11
+ // path to cache_dir. Returns null upon failure.
12
+ struct whisper_openvino_context * whisper_openvino_init(const char * path_model,
13
+ const char * device,
14
+ const char * cache_dir);
15
+
16
+ // clean up a ctx previously returned from whisper_openvino_init()
17
+ void whisper_openvino_free(struct whisper_openvino_context * ctx);
18
+
19
+ struct ggml_tensor;
20
+
21
+ // Perform encode using OpenVINO.
22
+ // Returns 1 on success
23
+ // Returns 0 on failure
24
+ int whisper_openvino_encode(
25
+ whisper_openvino_context* ctx,
26
+ ggml_tensor* mel,
27
+ ggml_tensor* out);
28
+
29
+ #if __cplusplus
30
+ }
31
+ #endif
whisper.cpp CHANGED
@@ -3,6 +3,10 @@
3
  #include "coreml/whisper-encoder.h"
4
  #endif
5
 
 
 
 
 
6
  #include "ggml.h"
7
 
8
  #include <algorithm>
@@ -660,6 +664,10 @@ struct whisper_state {
660
  whisper_coreml_context * ctx_coreml = nullptr;
661
  #endif
662
 
 
 
 
 
663
  // [EXPERIMENTAL] token-level timestamps data
664
  int64_t t_beg = 0;
665
  int64_t t_last = 0;
@@ -1478,7 +1486,13 @@ static bool whisper_encode_internal(
1478
  const bool use_coreml = wstate.ctx_coreml != nullptr;
1479
  #endif
1480
 
1481
- if (!use_coreml) {
 
 
 
 
 
 
1482
  // convolution + gelu
1483
  {
1484
  wstate.use_buf(ctx0, 1);
@@ -1777,8 +1791,7 @@ static bool whisper_encode_internal(
1777
  }
1778
  }
1779
  #ifdef WHISPER_USE_COREML
1780
- else
1781
- {
1782
  wstate.use_buf(ctx0, -1);
1783
 
1784
  cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
@@ -1786,6 +1799,17 @@ static bool whisper_encode_internal(
1786
  whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
1787
  }
1788
  #endif
 
 
 
 
 
 
 
 
 
 
 
1789
 
1790
  // cur
1791
  //{
@@ -2628,6 +2652,31 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
2628
  }
2629
  #endif
2630
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2631
  struct whisper_state * whisper_init_state(whisper_context * ctx) {
2632
  whisper_state * state = new whisper_state;
2633
 
@@ -2694,6 +2743,58 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
2694
  return state;
2695
  }
2696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2697
  struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
2698
 
2699
  fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model);
@@ -2848,6 +2949,13 @@ void whisper_free_state(struct whisper_state * state)
2848
  }
2849
  #endif
2850
 
 
 
 
 
 
 
 
2851
  delete state;
2852
  }
2853
  }
@@ -3287,6 +3395,14 @@ static int whisper_has_coreml(void) {
3287
  #endif
3288
  }
3289
 
 
 
 
 
 
 
 
 
3290
  const char * whisper_print_system_info(void) {
3291
  static std::string s;
3292
 
@@ -3304,6 +3420,7 @@ const char * whisper_print_system_info(void) {
3304
  s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
3305
  s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
3306
  s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
 
3307
 
3308
  return s.c_str();
3309
  }
 
3
  #include "coreml/whisper-encoder.h"
4
  #endif
5
 
6
+ #if WHISPER_USE_OPENVINO
7
+ #include "openvino/whisper-openvino-encoder.h"
8
+ #endif
9
+
10
  #include "ggml.h"
11
 
12
  #include <algorithm>
 
664
  whisper_coreml_context * ctx_coreml = nullptr;
665
  #endif
666
 
667
+ #ifdef WHISPER_USE_OPENVINO
668
+ whisper_openvino_context * ctx_openvino = nullptr;
669
+ #endif
670
+
671
  // [EXPERIMENTAL] token-level timestamps data
672
  int64_t t_beg = 0;
673
  int64_t t_last = 0;
 
1486
  const bool use_coreml = wstate.ctx_coreml != nullptr;
1487
  #endif
1488
 
1489
+ #ifndef WHISPER_USE_OPENVINO
1490
+ const bool use_openvino = false;
1491
+ #else
1492
+ const bool use_openvino = wstate.ctx_openvino != nullptr;
1493
+ #endif
1494
+
1495
+ if (!use_coreml && !use_openvino) {
1496
  // convolution + gelu
1497
  {
1498
  wstate.use_buf(ctx0, 1);
 
1791
  }
1792
  }
1793
  #ifdef WHISPER_USE_COREML
1794
+ else if (use_coreml) {
 
1795
  wstate.use_buf(ctx0, -1);
1796
 
1797
  cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
 
1799
  whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
1800
  }
1801
  #endif
1802
+ #ifdef WHISPER_USE_OPENVINO
1803
+ else if (use_openvino) {
1804
+ wstate.use_buf(ctx0, -1);
1805
+
1806
+ cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
1807
+
1808
+ if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) {
1809
+ return false;
1810
+ }
1811
+ }
1812
+ #endif
1813
 
1814
  // cur
1815
  //{
 
2652
  }
2653
  #endif
2654
 
2655
+ #ifdef WHISPER_USE_OPENVINO
2656
+ // replace .bin with-encoder-openvino.xml
2657
+ static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
2658
+ auto pos = path_bin.rfind('.');
2659
+ if (pos != std::string::npos) {
2660
+ path_bin = path_bin.substr(0, pos);
2661
+ }
2662
+
2663
+ path_bin += "-encoder-openvino.xml";
2664
+
2665
+ return path_bin;
2666
+ }
2667
+
2668
+ static std::string whisper_get_openvino_path_cache(std::string path_bin) {
2669
+ auto pos = path_bin.rfind('.');
2670
+ if (pos != std::string::npos) {
2671
+ path_bin = path_bin.substr(0, pos);
2672
+ }
2673
+
2674
+ path_bin += "-encoder-openvino-cache";
2675
+
2676
+ return path_bin;
2677
+ }
2678
+ #endif
2679
+
2680
  struct whisper_state * whisper_init_state(whisper_context * ctx) {
2681
  whisper_state * state = new whisper_state;
2682
 
 
2743
  return state;
2744
  }
2745
 
2746
+ int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
2747
+ const char* openvino_model_path,
2748
+ const char* openvino_device,
2749
+ const char* openvino_cache_dir)
2750
+ {
2751
+ #ifndef WHISPER_USE_OPENVINO
2752
+ (void)(ctx);
2753
+ (void)(openvino_model_path);
2754
+ (void)(openvino_device);
2755
+ (void)(openvino_cache_dir);
2756
+ return 0;
2757
+ #else
2758
+ if (!openvino_model_path && ctx->path_model.empty())
2759
+ {
2760
+ fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__);
2761
+ return 0;
2762
+ }
2763
+
2764
+ std::string path_openvino;
2765
+ if (!openvino_model_path) {
2766
+ //if openvino_model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
2767
+ path_openvino = whisper_get_openvino_path_encoder(ctx->path_model);
2768
+ }
2769
+ else {
2770
+ path_openvino = openvino_model_path;
2771
+ }
2772
+
2773
+ std::string path_openvino_cache_dir;
2774
+ if (!openvino_cache_dir) {
2775
+ //if openvino_cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
2776
+ path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model);
2777
+ }
2778
+ else {
2779
+ path_openvino_cache_dir = openvino_cache_dir;
2780
+ }
2781
+
2782
+ fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str());
2783
+ fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
2784
+
2785
+ ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str());
2786
+ if (!ctx->state->ctx_openvino) {
2787
+ fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str());
2788
+ return 0;
2789
+ }
2790
+ else {
2791
+ fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
2792
+ }
2793
+
2794
+ return 1;
2795
+ #endif
2796
+ }
2797
+
2798
  struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
2799
 
2800
  fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model);
 
2949
  }
2950
  #endif
2951
 
2952
+ #ifdef WHISPER_USE_OPENVINO
2953
+ if (state->ctx_openvino != nullptr) {
2954
+ whisper_openvino_free(state->ctx_openvino);
2955
+ state->ctx_openvino = nullptr;
2956
+ }
2957
+ #endif
2958
+
2959
  delete state;
2960
  }
2961
  }
 
3395
  #endif
3396
  }
3397
 
3398
+ static int whisper_has_openvino(void) {
3399
+ #ifdef WHISPER_USE_OPENVINO
3400
+ return 1;
3401
+ #else
3402
+ return 0;
3403
+ #endif
3404
+ }
3405
+
3406
  const char * whisper_print_system_info(void) {
3407
  static std::string s;
3408
 
 
3420
  s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
3421
  s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
3422
  s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
3423
+ s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
3424
 
3425
  return s.c_str();
3426
  }
whisper.h CHANGED
@@ -110,6 +110,24 @@ extern "C" {
110
 
111
  WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  // Frees all allocated memory
114
  WHISPER_API void whisper_free (struct whisper_context * ctx);
115
  WHISPER_API void whisper_free_state(struct whisper_state * state);
 
110
 
111
  WHISPER_API struct whisper_state * whisper_init_state(struct whisper_context * ctx);
112
 
113
+ // Given a context, enable use of OpenVINO for encode inference.
114
+ // model_path: Optional path to OpenVINO encoder IR model. If set to nullptr,
115
+ // the path will be generated from the ggml model path that was passed
116
+ // in to whisper_init_from_file. For example, if 'path_model' was
117
+ // "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be
118
+ // assumed to be "/path/to/ggml-base.en-encoder-openvino.xml".
119
+ // device: OpenVINO device to run inference on ("CPU", "GPU", etc.)
120
+ // cache_dir: Optional cache directory that can speed up init time, especially for
121
+ // GPU, by caching compiled 'blobs' there.
122
+ // Set to nullptr if not used.
123
+ // Returns 1 on success. If OpenVINO is not enabled in build, this
124
+ // simply returns 0.
125
+ WHISPER_API int whisper_ctx_init_openvino_encoder(
126
+ struct whisper_context * ctx,
127
+ const char * model_path,
128
+ const char * device,
129
+ const char * cache_dir);
130
+
131
  // Frees all allocated memory
132
  WHISPER_API void whisper_free (struct whisper_context * ctx);
133
  WHISPER_API void whisper_free_state(struct whisper_state * state);