ggerganov commited on
Commit
49fb1c6
·
unverified ·
1 Parent(s): ebe63a9

build : add WHISPER_COREML_ALLOW_FALLBACK to make / CMake (#812)

Browse files
Files changed (3) hide show
  1. CMakeLists.txt +21 -16
  2. Makefile +13 -0
  3. whisper.cpp +59 -60
CMakeLists.txt CHANGED
@@ -39,32 +39,33 @@ endif()
39
 
40
  # options
41
 
42
- option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
43
 
44
- option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
45
- option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
46
 
47
- option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
48
- option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
49
- option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
50
 
51
- option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
52
- option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
53
 
54
- option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
55
 
56
  if (APPLE)
57
- option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
58
- option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
59
- option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
60
- option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
61
 
62
- option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
 
63
  else()
64
- option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
65
  endif()
66
 
67
- option(WHISPER_PERF "whisper: enable perf timings" OFF)
68
 
69
  # sanitizers
70
 
@@ -119,6 +120,10 @@ if (APPLE)
119
  else()
120
  message(WARNING "CoreML framework not found")
121
  endif()
 
 
 
 
122
  endif()
123
  endif()
124
 
 
39
 
40
  # options
41
 
42
+ option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
43
 
44
+ option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
45
+ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
46
 
47
+ option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
48
+ option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
49
+ option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
50
 
51
+ option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
52
+ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
53
 
54
+ option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
55
 
56
  if (APPLE)
57
+ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
58
+ option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
59
+ option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
60
+ option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
61
 
62
+ option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
63
+ option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
64
  else()
65
+ option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
66
  endif()
67
 
68
+ option(WHISPER_PERF "whisper: enable perf timings" OFF)
69
 
70
  # sanitizers
71
 
 
120
  else()
121
  message(WARNING "CoreML framework not found")
122
  endif()
123
+
124
+ if (WHISPER_COREML_ALLOW_FALLBACK)
125
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
126
+ endif()
127
  endif()
128
  endif()
129
 
Makefile CHANGED
@@ -123,6 +123,7 @@ endif
123
  ifeq ($(UNAME_M),amd64)
124
  CFLAGS += -mavx -mavx2 -mfma -mf16c
125
  endif
 
126
  ifneq ($(filter ppc64%,$(UNAME_M)),)
127
  POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
128
  ifneq (,$(findstring POWER9,$(POWER9_M)))
@@ -133,6 +134,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
133
  CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
134
  endif
135
  endif
 
136
  ifndef WHISPER_NO_ACCELERATE
137
  # Mac M1 - include Accelerate framework
138
  ifeq ($(UNAME_S),Darwin)
@@ -140,26 +142,36 @@ ifndef WHISPER_NO_ACCELERATE
140
  LDFLAGS += -framework Accelerate
141
  endif
142
  endif
 
143
  ifdef WHISPER_COREML
144
  CXXFLAGS += -DWHISPER_USE_COREML
145
  LDFLAGS += -framework Foundation -framework CoreML
 
 
 
 
146
  endif
 
147
  ifdef WHISPER_OPENBLAS
148
  CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
149
  LDFLAGS += -lopenblas
150
  endif
 
151
  ifdef WHISPER_GPROF
152
  CFLAGS += -pg
153
  CXXFLAGS += -pg
154
  endif
 
155
  ifneq ($(filter aarch64%,$(UNAME_M)),)
156
  CFLAGS += -mcpu=native
157
  CXXFLAGS += -mcpu=native
158
  endif
 
159
  ifneq ($(filter armv6%,$(UNAME_M)),)
160
  # 32-bit Raspberry Pi 1, 2, 3
161
  CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
162
  endif
 
163
  ifneq ($(filter armv7%,$(UNAME_M)),)
164
  # 32-bit ARM, for example on Armbian or possibly raspbian
165
  CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
@@ -167,6 +179,7 @@ ifneq ($(filter armv7%,$(UNAME_M)),)
167
  # 64-bit ARM, use these (TODO: auto-detect 64-bit)
168
  # CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
169
  endif
 
170
  ifneq ($(filter armv8%,$(UNAME_M)),)
171
  # Raspberry Pi 4
172
  CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 
123
  ifeq ($(UNAME_M),amd64)
124
  CFLAGS += -mavx -mavx2 -mfma -mf16c
125
  endif
126
+
127
  ifneq ($(filter ppc64%,$(UNAME_M)),)
128
  POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
129
  ifneq (,$(findstring POWER9,$(POWER9_M)))
 
134
  CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
135
  endif
136
  endif
137
+
138
  ifndef WHISPER_NO_ACCELERATE
139
  # Mac M1 - include Accelerate framework
140
  ifeq ($(UNAME_S),Darwin)
 
142
  LDFLAGS += -framework Accelerate
143
  endif
144
  endif
145
+
146
  ifdef WHISPER_COREML
147
  CXXFLAGS += -DWHISPER_USE_COREML
148
  LDFLAGS += -framework Foundation -framework CoreML
149
+
150
+ ifdef WHISPER_COREML_ALLOW_FALLBACK
151
+ CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
152
+ endif
153
  endif
154
+
155
  ifdef WHISPER_OPENBLAS
156
  CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
157
  LDFLAGS += -lopenblas
158
  endif
159
+
160
  ifdef WHISPER_GPROF
161
  CFLAGS += -pg
162
  CXXFLAGS += -pg
163
  endif
164
+
165
  ifneq ($(filter aarch64%,$(UNAME_M)),)
166
  CFLAGS += -mcpu=native
167
  CXXFLAGS += -mcpu=native
168
  endif
169
+
170
  ifneq ($(filter armv6%,$(UNAME_M)),)
171
  # 32-bit Raspberry Pi 1, 2, 3
172
  CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
173
  endif
174
+
175
  ifneq ($(filter armv7%,$(UNAME_M)),)
176
  # 32-bit ARM, for example on Armbian or possibly raspbian
177
  CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 
179
  # 64-bit ARM, use these (TODO: auto-detect 64-bit)
180
  # CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
181
  endif
182
+
183
  ifneq ($(filter armv8%,$(UNAME_M)),)
184
  # Raspberry Pi 4
185
  CFLAGS += -mfp16-format=ieee -mno-unaligned-access
whisper.cpp CHANGED
@@ -1393,18 +1393,17 @@ static bool whisper_encode_internal(
1393
  const bool use_coreml = wstate.ctx_coreml != nullptr;
1394
  #endif
1395
 
1396
- if (!use_coreml)
1397
- {
1398
  // convolution + gelu
1399
  {
1400
  wstate.use_buf(ctx0, 1);
1401
 
1402
  cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
1403
  cur = ggml_add(ctx0,
1404
- ggml_repeat(ctx0,
1405
- model.e_conv_1_b,
1406
- cur),
1407
- cur);
1408
 
1409
  cur = ggml_gelu(ctx0, cur);
1410
 
@@ -1412,10 +1411,10 @@ static bool whisper_encode_internal(
1412
 
1413
  cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
1414
  cur = ggml_add(ctx0,
1415
- ggml_repeat(ctx0,
1416
- model.e_conv_2_b,
1417
- cur),
1418
- cur);
1419
 
1420
  cur = ggml_gelu(ctx0, cur);
1421
  }
@@ -1461,10 +1460,10 @@ static bool whisper_encode_internal(
1461
 
1462
  // cur = ln_0_w*cur + ln_0_b
1463
  cur = ggml_add(ctx0,
1464
- ggml_mul(ctx0,
1465
- ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
1466
- cur),
1467
- ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
1468
  }
1469
 
1470
  // self-attention
@@ -1472,39 +1471,39 @@ static bool whisper_encode_internal(
1472
  wstate.use_buf(ctx0, 1);
1473
 
1474
  struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
1475
- layer.attn_q_w,
1476
- cur);
1477
 
1478
  Qcur = ggml_add(ctx0,
1479
- ggml_repeat(ctx0,
1480
- layer.attn_q_b,
1481
- Qcur),
1482
- Qcur);
1483
 
1484
  //Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1485
 
1486
  // note: no bias for Key
1487
  struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
1488
- layer.attn_k_w,
1489
- cur);
1490
 
1491
  //Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1492
 
1493
  struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
1494
- layer.attn_v_w,
1495
- cur);
1496
 
1497
  Vcur = ggml_add(ctx0,
1498
- ggml_repeat(ctx0,
1499
- layer.attn_v_b,
1500
- Vcur),
1501
- Vcur);
1502
 
1503
  // ------
1504
 
1505
  wstate.use_buf(ctx0, 0);
1506
 
1507
- #ifdef WHISPER_USE_FLASH_ATTN
1508
  struct ggml_tensor * Q =
1509
  ggml_permute(ctx0,
1510
  ggml_cpy(ctx0,
@@ -1529,7 +1528,7 @@ static bool whisper_encode_internal(
1529
  ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
1530
 
1531
  struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
1532
- #else
1533
  struct ggml_tensor * Q =
1534
  ggml_permute(ctx0,
1535
  ggml_cpy(ctx0,
@@ -1575,14 +1574,14 @@ static bool whisper_encode_internal(
1575
  );
1576
 
1577
  struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
1578
- #endif
1579
  struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
1580
 
1581
  wstate.use_buf(ctx0, 1);
1582
 
1583
  cur = ggml_cpy(ctx0,
1584
- KQV_merged,
1585
- ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
1586
  }
1587
 
1588
  // projection
@@ -1590,14 +1589,14 @@ static bool whisper_encode_internal(
1590
  wstate.use_buf(ctx0, 0);
1591
 
1592
  cur = ggml_mul_mat(ctx0,
1593
- layer.attn_ln_1_w,
1594
- cur);
1595
 
1596
  wstate.use_buf(ctx0, 1);
1597
 
1598
  cur = ggml_add(ctx0,
1599
- ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
1600
- cur);
1601
  }
1602
 
1603
  wstate.use_buf(ctx0, 2);
@@ -1619,31 +1618,31 @@ static bool whisper_encode_internal(
1619
 
1620
  // cur = mlp_ln_w*cur + mlp_ln_b
1621
  cur = ggml_add(ctx0,
1622
- ggml_mul(ctx0,
1623
- ggml_repeat(ctx0, layer.mlp_ln_w, cur),
1624
- cur),
1625
- ggml_repeat(ctx0, layer.mlp_ln_b, cur));
1626
  }
1627
 
1628
- #ifdef WHISPER_USE_FLASH_FF
1629
  wstate.use_buf(ctx0, 0);
1630
 
1631
  cur = ggml_flash_ff(ctx0,
1632
- ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
1633
- layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
1634
- #else
1635
  wstate.use_buf(ctx0, 0);
1636
 
1637
  // fully connected
1638
  cur = ggml_mul_mat(ctx0,
1639
- layer.mlp_0_w,
1640
- cur);
1641
 
1642
  wstate.use_buf(ctx0, 1);
1643
 
1644
  cur = ggml_add(ctx0,
1645
- ggml_repeat(ctx0, layer.mlp_0_b, cur),
1646
- cur);
1647
 
1648
  wstate.use_buf(ctx0, 0);
1649
 
@@ -1654,15 +1653,15 @@ static bool whisper_encode_internal(
1654
 
1655
  // projection
1656
  cur = ggml_mul_mat(ctx0,
1657
- layer.mlp_1_w,
1658
- cur);
1659
 
1660
  wstate.use_buf(ctx0, 0);
1661
 
1662
  cur = ggml_add(ctx0,
1663
- ggml_repeat(ctx0, layer.mlp_1_b, cur),
1664
- cur);
1665
- #endif
1666
  }
1667
 
1668
  wstate.use_buf(ctx0, 3);
@@ -1682,10 +1681,10 @@ static bool whisper_encode_internal(
1682
 
1683
  // cur = ln_f_g*cur + ln_f_b
1684
  cur = ggml_add(ctx0,
1685
- ggml_mul(ctx0,
1686
- ggml_repeat(ctx0, model.e_ln_w, cur),
1687
- cur),
1688
- ggml_repeat(ctx0, model.e_ln_b, cur));
1689
  }
1690
 
1691
  wstate.use_buf(ctx0, -1);
@@ -2580,11 +2579,11 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
2580
  state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
2581
  if (!state->ctx_coreml) {
2582
  fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
2583
- #ifndef WHISPER_COREML_ALLOW_FALLBACK
2584
  return nullptr;
2585
  #endif
2586
  } else {
2587
- fprintf(stderr, "%s: Core ML model loaded\n", __func__);
2588
  }
2589
  #endif
2590
 
 
1393
  const bool use_coreml = wstate.ctx_coreml != nullptr;
1394
  #endif
1395
 
1396
+ if (!use_coreml) {
 
1397
  // convolution + gelu
1398
  {
1399
  wstate.use_buf(ctx0, 1);
1400
 
1401
  cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
1402
  cur = ggml_add(ctx0,
1403
+ ggml_repeat(ctx0,
1404
+ model.e_conv_1_b,
1405
+ cur),
1406
+ cur);
1407
 
1408
  cur = ggml_gelu(ctx0, cur);
1409
 
 
1411
 
1412
  cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
1413
  cur = ggml_add(ctx0,
1414
+ ggml_repeat(ctx0,
1415
+ model.e_conv_2_b,
1416
+ cur),
1417
+ cur);
1418
 
1419
  cur = ggml_gelu(ctx0, cur);
1420
  }
 
1460
 
1461
  // cur = ln_0_w*cur + ln_0_b
1462
  cur = ggml_add(ctx0,
1463
+ ggml_mul(ctx0,
1464
+ ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
1465
+ cur),
1466
+ ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
1467
  }
1468
 
1469
  // self-attention
 
1471
  wstate.use_buf(ctx0, 1);
1472
 
1473
  struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
1474
+ layer.attn_q_w,
1475
+ cur);
1476
 
1477
  Qcur = ggml_add(ctx0,
1478
+ ggml_repeat(ctx0,
1479
+ layer.attn_q_b,
1480
+ Qcur),
1481
+ Qcur);
1482
 
1483
  //Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1484
 
1485
  // note: no bias for Key
1486
  struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
1487
+ layer.attn_k_w,
1488
+ cur);
1489
 
1490
  //Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
1491
 
1492
  struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
1493
+ layer.attn_v_w,
1494
+ cur);
1495
 
1496
  Vcur = ggml_add(ctx0,
1497
+ ggml_repeat(ctx0,
1498
+ layer.attn_v_b,
1499
+ Vcur),
1500
+ Vcur);
1501
 
1502
  // ------
1503
 
1504
  wstate.use_buf(ctx0, 0);
1505
 
1506
+ #ifdef WHISPER_USE_FLASH_ATTN
1507
  struct ggml_tensor * Q =
1508
  ggml_permute(ctx0,
1509
  ggml_cpy(ctx0,
 
1528
  ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
1529
 
1530
  struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
1531
+ #else
1532
  struct ggml_tensor * Q =
1533
  ggml_permute(ctx0,
1534
  ggml_cpy(ctx0,
 
1574
  );
1575
 
1576
  struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
1577
+ #endif
1578
  struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
1579
 
1580
  wstate.use_buf(ctx0, 1);
1581
 
1582
  cur = ggml_cpy(ctx0,
1583
+ KQV_merged,
1584
+ ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
1585
  }
1586
 
1587
  // projection
 
1589
  wstate.use_buf(ctx0, 0);
1590
 
1591
  cur = ggml_mul_mat(ctx0,
1592
+ layer.attn_ln_1_w,
1593
+ cur);
1594
 
1595
  wstate.use_buf(ctx0, 1);
1596
 
1597
  cur = ggml_add(ctx0,
1598
+ ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
1599
+ cur);
1600
  }
1601
 
1602
  wstate.use_buf(ctx0, 2);
 
1618
 
1619
  // cur = mlp_ln_w*cur + mlp_ln_b
1620
  cur = ggml_add(ctx0,
1621
+ ggml_mul(ctx0,
1622
+ ggml_repeat(ctx0, layer.mlp_ln_w, cur),
1623
+ cur),
1624
+ ggml_repeat(ctx0, layer.mlp_ln_b, cur));
1625
  }
1626
 
1627
+ #ifdef WHISPER_USE_FLASH_FF
1628
  wstate.use_buf(ctx0, 0);
1629
 
1630
  cur = ggml_flash_ff(ctx0,
1631
+ ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
1632
+ layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
1633
+ #else
1634
  wstate.use_buf(ctx0, 0);
1635
 
1636
  // fully connected
1637
  cur = ggml_mul_mat(ctx0,
1638
+ layer.mlp_0_w,
1639
+ cur);
1640
 
1641
  wstate.use_buf(ctx0, 1);
1642
 
1643
  cur = ggml_add(ctx0,
1644
+ ggml_repeat(ctx0, layer.mlp_0_b, cur),
1645
+ cur);
1646
 
1647
  wstate.use_buf(ctx0, 0);
1648
 
 
1653
 
1654
  // projection
1655
  cur = ggml_mul_mat(ctx0,
1656
+ layer.mlp_1_w,
1657
+ cur);
1658
 
1659
  wstate.use_buf(ctx0, 0);
1660
 
1661
  cur = ggml_add(ctx0,
1662
+ ggml_repeat(ctx0, layer.mlp_1_b, cur),
1663
+ cur);
1664
+ #endif
1665
  }
1666
 
1667
  wstate.use_buf(ctx0, 3);
 
1681
 
1682
  // cur = ln_f_g*cur + ln_f_b
1683
  cur = ggml_add(ctx0,
1684
+ ggml_mul(ctx0,
1685
+ ggml_repeat(ctx0, model.e_ln_w, cur),
1686
+ cur),
1687
+ ggml_repeat(ctx0, model.e_ln_b, cur));
1688
  }
1689
 
1690
  wstate.use_buf(ctx0, -1);
 
2579
  state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
2580
  if (!state->ctx_coreml) {
2581
  fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
2582
+ #ifndef WHISPER_COREML_ALLOW_FALLBACK
2583
  return nullptr;
2584
  #endif
2585
  } else {
2586
+ fprintf(stderr, "%s: Core ML model loaded\n", __func__);
2587
  }
2588
  #endif
2589