Spaces:
Sleeping
Sleeping
build : add WHISPER_COREML_ALLOW_FALLBACK to make / CMake (#812)
Browse files- CMakeLists.txt +21 -16
- Makefile +13 -0
- whisper.cpp +59 -60
CMakeLists.txt
CHANGED
|
@@ -39,32 +39,33 @@ endif()
|
|
| 39 |
|
| 40 |
# options
|
| 41 |
|
| 42 |
-
option(BUILD_SHARED_LIBS
|
| 43 |
|
| 44 |
-
option(WHISPER_ALL_WARNINGS
|
| 45 |
-
option(WHISPER_ALL_WARNINGS_3RD_PARTY
|
| 46 |
|
| 47 |
-
option(WHISPER_SANITIZE_THREAD
|
| 48 |
-
option(WHISPER_SANITIZE_ADDRESS
|
| 49 |
-
option(WHISPER_SANITIZE_UNDEFINED
|
| 50 |
|
| 51 |
-
option(WHISPER_BUILD_TESTS
|
| 52 |
-
option(WHISPER_BUILD_EXAMPLES
|
| 53 |
|
| 54 |
-
option(WHISPER_SUPPORT_SDL2
|
| 55 |
|
| 56 |
if (APPLE)
|
| 57 |
-
option(WHISPER_NO_ACCELERATE
|
| 58 |
-
option(WHISPER_NO_AVX
|
| 59 |
-
option(WHISPER_NO_AVX2
|
| 60 |
-
option(WHISPER_NO_FMA
|
| 61 |
|
| 62 |
-
option(WHISPER_COREML
|
|
|
|
| 63 |
else()
|
| 64 |
-
option(WHISPER_SUPPORT_OPENBLAS
|
| 65 |
endif()
|
| 66 |
|
| 67 |
-
option(WHISPER_PERF
|
| 68 |
|
| 69 |
# sanitizers
|
| 70 |
|
|
@@ -119,6 +120,10 @@ if (APPLE)
|
|
| 119 |
else()
|
| 120 |
message(WARNING "CoreML framework not found")
|
| 121 |
endif()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
endif()
|
| 123 |
endif()
|
| 124 |
|
|
|
|
| 39 |
|
| 40 |
# options
|
| 41 |
|
| 42 |
+
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
| 43 |
|
| 44 |
+
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
| 45 |
+
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
| 46 |
|
| 47 |
+
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
| 48 |
+
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
| 49 |
+
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
| 50 |
|
| 51 |
+
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
| 52 |
+
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
| 53 |
|
| 54 |
+
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
| 55 |
|
| 56 |
if (APPLE)
|
| 57 |
+
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
| 58 |
+
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
| 59 |
+
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
| 60 |
+
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
| 61 |
|
| 62 |
+
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
| 63 |
+
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
| 64 |
else()
|
| 65 |
+
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
| 66 |
endif()
|
| 67 |
|
| 68 |
+
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
| 69 |
|
| 70 |
# sanitizers
|
| 71 |
|
|
|
|
| 120 |
else()
|
| 121 |
message(WARNING "CoreML framework not found")
|
| 122 |
endif()
|
| 123 |
+
|
| 124 |
+
if (WHISPER_COREML_ALLOW_FALLBACK)
|
| 125 |
+
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
|
| 126 |
+
endif()
|
| 127 |
endif()
|
| 128 |
endif()
|
| 129 |
|
Makefile
CHANGED
|
@@ -123,6 +123,7 @@ endif
|
|
| 123 |
ifeq ($(UNAME_M),amd64)
|
| 124 |
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
| 125 |
endif
|
|
|
|
| 126 |
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
| 127 |
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
| 128 |
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
|
@@ -133,6 +134,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
|
|
| 133 |
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
| 134 |
endif
|
| 135 |
endif
|
|
|
|
| 136 |
ifndef WHISPER_NO_ACCELERATE
|
| 137 |
# Mac M1 - include Accelerate framework
|
| 138 |
ifeq ($(UNAME_S),Darwin)
|
|
@@ -140,26 +142,36 @@ ifndef WHISPER_NO_ACCELERATE
|
|
| 140 |
LDFLAGS += -framework Accelerate
|
| 141 |
endif
|
| 142 |
endif
|
|
|
|
| 143 |
ifdef WHISPER_COREML
|
| 144 |
CXXFLAGS += -DWHISPER_USE_COREML
|
| 145 |
LDFLAGS += -framework Foundation -framework CoreML
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
endif
|
|
|
|
| 147 |
ifdef WHISPER_OPENBLAS
|
| 148 |
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
| 149 |
LDFLAGS += -lopenblas
|
| 150 |
endif
|
|
|
|
| 151 |
ifdef WHISPER_GPROF
|
| 152 |
CFLAGS += -pg
|
| 153 |
CXXFLAGS += -pg
|
| 154 |
endif
|
|
|
|
| 155 |
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
| 156 |
CFLAGS += -mcpu=native
|
| 157 |
CXXFLAGS += -mcpu=native
|
| 158 |
endif
|
|
|
|
| 159 |
ifneq ($(filter armv6%,$(UNAME_M)),)
|
| 160 |
# 32-bit Raspberry Pi 1, 2, 3
|
| 161 |
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
|
| 162 |
endif
|
|
|
|
| 163 |
ifneq ($(filter armv7%,$(UNAME_M)),)
|
| 164 |
# 32-bit ARM, for example on Armbian or possibly raspbian
|
| 165 |
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
|
@@ -167,6 +179,7 @@ ifneq ($(filter armv7%,$(UNAME_M)),)
|
|
| 167 |
# 64-bit ARM, use these (TODO: auto-detect 64-bit)
|
| 168 |
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
| 169 |
endif
|
|
|
|
| 170 |
ifneq ($(filter armv8%,$(UNAME_M)),)
|
| 171 |
# Raspberry Pi 4
|
| 172 |
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
|
|
|
| 123 |
ifeq ($(UNAME_M),amd64)
|
| 124 |
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
| 125 |
endif
|
| 126 |
+
|
| 127 |
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
| 128 |
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
| 129 |
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
|
|
|
| 134 |
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
| 135 |
endif
|
| 136 |
endif
|
| 137 |
+
|
| 138 |
ifndef WHISPER_NO_ACCELERATE
|
| 139 |
# Mac M1 - include Accelerate framework
|
| 140 |
ifeq ($(UNAME_S),Darwin)
|
|
|
|
| 142 |
LDFLAGS += -framework Accelerate
|
| 143 |
endif
|
| 144 |
endif
|
| 145 |
+
|
| 146 |
ifdef WHISPER_COREML
|
| 147 |
CXXFLAGS += -DWHISPER_USE_COREML
|
| 148 |
LDFLAGS += -framework Foundation -framework CoreML
|
| 149 |
+
|
| 150 |
+
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
| 151 |
+
CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
| 152 |
+
endif
|
| 153 |
endif
|
| 154 |
+
|
| 155 |
ifdef WHISPER_OPENBLAS
|
| 156 |
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
| 157 |
LDFLAGS += -lopenblas
|
| 158 |
endif
|
| 159 |
+
|
| 160 |
ifdef WHISPER_GPROF
|
| 161 |
CFLAGS += -pg
|
| 162 |
CXXFLAGS += -pg
|
| 163 |
endif
|
| 164 |
+
|
| 165 |
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
| 166 |
CFLAGS += -mcpu=native
|
| 167 |
CXXFLAGS += -mcpu=native
|
| 168 |
endif
|
| 169 |
+
|
| 170 |
ifneq ($(filter armv6%,$(UNAME_M)),)
|
| 171 |
# 32-bit Raspberry Pi 1, 2, 3
|
| 172 |
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
|
| 173 |
endif
|
| 174 |
+
|
| 175 |
ifneq ($(filter armv7%,$(UNAME_M)),)
|
| 176 |
# 32-bit ARM, for example on Armbian or possibly raspbian
|
| 177 |
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
|
|
|
| 179 |
# 64-bit ARM, use these (TODO: auto-detect 64-bit)
|
| 180 |
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
| 181 |
endif
|
| 182 |
+
|
| 183 |
ifneq ($(filter armv8%,$(UNAME_M)),)
|
| 184 |
# Raspberry Pi 4
|
| 185 |
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
whisper.cpp
CHANGED
|
@@ -1393,18 +1393,17 @@ static bool whisper_encode_internal(
|
|
| 1393 |
const bool use_coreml = wstate.ctx_coreml != nullptr;
|
| 1394 |
#endif
|
| 1395 |
|
| 1396 |
-
if (!use_coreml)
|
| 1397 |
-
{
|
| 1398 |
// convolution + gelu
|
| 1399 |
{
|
| 1400 |
wstate.use_buf(ctx0, 1);
|
| 1401 |
|
| 1402 |
cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
|
| 1403 |
cur = ggml_add(ctx0,
|
| 1404 |
-
|
| 1405 |
-
|
| 1406 |
-
|
| 1407 |
-
|
| 1408 |
|
| 1409 |
cur = ggml_gelu(ctx0, cur);
|
| 1410 |
|
|
@@ -1412,10 +1411,10 @@ static bool whisper_encode_internal(
|
|
| 1412 |
|
| 1413 |
cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
|
| 1414 |
cur = ggml_add(ctx0,
|
| 1415 |
-
|
| 1416 |
-
|
| 1417 |
-
|
| 1418 |
-
|
| 1419 |
|
| 1420 |
cur = ggml_gelu(ctx0, cur);
|
| 1421 |
}
|
|
@@ -1461,10 +1460,10 @@ static bool whisper_encode_internal(
|
|
| 1461 |
|
| 1462 |
// cur = ln_0_w*cur + ln_0_b
|
| 1463 |
cur = ggml_add(ctx0,
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
|
| 1468 |
}
|
| 1469 |
|
| 1470 |
// self-attention
|
|
@@ -1472,39 +1471,39 @@ static bool whisper_encode_internal(
|
|
| 1472 |
wstate.use_buf(ctx0, 1);
|
| 1473 |
|
| 1474 |
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
|
| 1475 |
-
|
| 1476 |
-
|
| 1477 |
|
| 1478 |
Qcur = ggml_add(ctx0,
|
| 1479 |
-
|
| 1480 |
-
|
| 1481 |
-
|
| 1482 |
-
|
| 1483 |
|
| 1484 |
//Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
| 1485 |
|
| 1486 |
// note: no bias for Key
|
| 1487 |
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
|
| 1488 |
-
|
| 1489 |
-
|
| 1490 |
|
| 1491 |
//Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
| 1492 |
|
| 1493 |
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
| 1494 |
-
|
| 1495 |
-
|
| 1496 |
|
| 1497 |
Vcur = ggml_add(ctx0,
|
| 1498 |
-
|
| 1499 |
-
|
| 1500 |
-
|
| 1501 |
-
|
| 1502 |
|
| 1503 |
// ------
|
| 1504 |
|
| 1505 |
wstate.use_buf(ctx0, 0);
|
| 1506 |
|
| 1507 |
-
|
| 1508 |
struct ggml_tensor * Q =
|
| 1509 |
ggml_permute(ctx0,
|
| 1510 |
ggml_cpy(ctx0,
|
|
@@ -1529,7 +1528,7 @@ static bool whisper_encode_internal(
|
|
| 1529 |
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
|
| 1530 |
|
| 1531 |
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
|
| 1532 |
-
|
| 1533 |
struct ggml_tensor * Q =
|
| 1534 |
ggml_permute(ctx0,
|
| 1535 |
ggml_cpy(ctx0,
|
|
@@ -1575,14 +1574,14 @@ static bool whisper_encode_internal(
|
|
| 1575 |
);
|
| 1576 |
|
| 1577 |
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
|
| 1578 |
-
|
| 1579 |
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
| 1580 |
|
| 1581 |
wstate.use_buf(ctx0, 1);
|
| 1582 |
|
| 1583 |
cur = ggml_cpy(ctx0,
|
| 1584 |
-
|
| 1585 |
-
|
| 1586 |
}
|
| 1587 |
|
| 1588 |
// projection
|
|
@@ -1590,14 +1589,14 @@ static bool whisper_encode_internal(
|
|
| 1590 |
wstate.use_buf(ctx0, 0);
|
| 1591 |
|
| 1592 |
cur = ggml_mul_mat(ctx0,
|
| 1593 |
-
|
| 1594 |
-
|
| 1595 |
|
| 1596 |
wstate.use_buf(ctx0, 1);
|
| 1597 |
|
| 1598 |
cur = ggml_add(ctx0,
|
| 1599 |
-
|
| 1600 |
-
|
| 1601 |
}
|
| 1602 |
|
| 1603 |
wstate.use_buf(ctx0, 2);
|
|
@@ -1619,31 +1618,31 @@ static bool whisper_encode_internal(
|
|
| 1619 |
|
| 1620 |
// cur = mlp_ln_w*cur + mlp_ln_b
|
| 1621 |
cur = ggml_add(ctx0,
|
| 1622 |
-
|
| 1623 |
-
|
| 1624 |
-
|
| 1625 |
-
|
| 1626 |
}
|
| 1627 |
|
| 1628 |
-
|
| 1629 |
wstate.use_buf(ctx0, 0);
|
| 1630 |
|
| 1631 |
cur = ggml_flash_ff(ctx0,
|
| 1632 |
-
|
| 1633 |
-
|
| 1634 |
-
|
| 1635 |
wstate.use_buf(ctx0, 0);
|
| 1636 |
|
| 1637 |
// fully connected
|
| 1638 |
cur = ggml_mul_mat(ctx0,
|
| 1639 |
-
|
| 1640 |
-
|
| 1641 |
|
| 1642 |
wstate.use_buf(ctx0, 1);
|
| 1643 |
|
| 1644 |
cur = ggml_add(ctx0,
|
| 1645 |
-
|
| 1646 |
-
|
| 1647 |
|
| 1648 |
wstate.use_buf(ctx0, 0);
|
| 1649 |
|
|
@@ -1654,15 +1653,15 @@ static bool whisper_encode_internal(
|
|
| 1654 |
|
| 1655 |
// projection
|
| 1656 |
cur = ggml_mul_mat(ctx0,
|
| 1657 |
-
|
| 1658 |
-
|
| 1659 |
|
| 1660 |
wstate.use_buf(ctx0, 0);
|
| 1661 |
|
| 1662 |
cur = ggml_add(ctx0,
|
| 1663 |
-
|
| 1664 |
-
|
| 1665 |
-
|
| 1666 |
}
|
| 1667 |
|
| 1668 |
wstate.use_buf(ctx0, 3);
|
|
@@ -1682,10 +1681,10 @@ static bool whisper_encode_internal(
|
|
| 1682 |
|
| 1683 |
// cur = ln_f_g*cur + ln_f_b
|
| 1684 |
cur = ggml_add(ctx0,
|
| 1685 |
-
|
| 1686 |
-
|
| 1687 |
-
|
| 1688 |
-
|
| 1689 |
}
|
| 1690 |
|
| 1691 |
wstate.use_buf(ctx0, -1);
|
|
@@ -2580,11 +2579,11 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 2580 |
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
|
| 2581 |
if (!state->ctx_coreml) {
|
| 2582 |
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
| 2583 |
-
#ifndef WHISPER_COREML_ALLOW_FALLBACK
|
| 2584 |
return nullptr;
|
| 2585 |
#endif
|
| 2586 |
} else {
|
| 2587 |
-
fprintf(stderr, "%s: Core ML model loaded\n", __func__);
|
| 2588 |
}
|
| 2589 |
#endif
|
| 2590 |
|
|
|
|
| 1393 |
const bool use_coreml = wstate.ctx_coreml != nullptr;
|
| 1394 |
#endif
|
| 1395 |
|
| 1396 |
+
if (!use_coreml) {
|
|
|
|
| 1397 |
// convolution + gelu
|
| 1398 |
{
|
| 1399 |
wstate.use_buf(ctx0, 1);
|
| 1400 |
|
| 1401 |
cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
|
| 1402 |
cur = ggml_add(ctx0,
|
| 1403 |
+
ggml_repeat(ctx0,
|
| 1404 |
+
model.e_conv_1_b,
|
| 1405 |
+
cur),
|
| 1406 |
+
cur);
|
| 1407 |
|
| 1408 |
cur = ggml_gelu(ctx0, cur);
|
| 1409 |
|
|
|
|
| 1411 |
|
| 1412 |
cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
|
| 1413 |
cur = ggml_add(ctx0,
|
| 1414 |
+
ggml_repeat(ctx0,
|
| 1415 |
+
model.e_conv_2_b,
|
| 1416 |
+
cur),
|
| 1417 |
+
cur);
|
| 1418 |
|
| 1419 |
cur = ggml_gelu(ctx0, cur);
|
| 1420 |
}
|
|
|
|
| 1460 |
|
| 1461 |
// cur = ln_0_w*cur + ln_0_b
|
| 1462 |
cur = ggml_add(ctx0,
|
| 1463 |
+
ggml_mul(ctx0,
|
| 1464 |
+
ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
|
| 1465 |
+
cur),
|
| 1466 |
+
ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
|
| 1467 |
}
|
| 1468 |
|
| 1469 |
// self-attention
|
|
|
|
| 1471 |
wstate.use_buf(ctx0, 1);
|
| 1472 |
|
| 1473 |
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
|
| 1474 |
+
layer.attn_q_w,
|
| 1475 |
+
cur);
|
| 1476 |
|
| 1477 |
Qcur = ggml_add(ctx0,
|
| 1478 |
+
ggml_repeat(ctx0,
|
| 1479 |
+
layer.attn_q_b,
|
| 1480 |
+
Qcur),
|
| 1481 |
+
Qcur);
|
| 1482 |
|
| 1483 |
//Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
| 1484 |
|
| 1485 |
// note: no bias for Key
|
| 1486 |
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
|
| 1487 |
+
layer.attn_k_w,
|
| 1488 |
+
cur);
|
| 1489 |
|
| 1490 |
//Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
|
| 1491 |
|
| 1492 |
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
|
| 1493 |
+
layer.attn_v_w,
|
| 1494 |
+
cur);
|
| 1495 |
|
| 1496 |
Vcur = ggml_add(ctx0,
|
| 1497 |
+
ggml_repeat(ctx0,
|
| 1498 |
+
layer.attn_v_b,
|
| 1499 |
+
Vcur),
|
| 1500 |
+
Vcur);
|
| 1501 |
|
| 1502 |
// ------
|
| 1503 |
|
| 1504 |
wstate.use_buf(ctx0, 0);
|
| 1505 |
|
| 1506 |
+
#ifdef WHISPER_USE_FLASH_ATTN
|
| 1507 |
struct ggml_tensor * Q =
|
| 1508 |
ggml_permute(ctx0,
|
| 1509 |
ggml_cpy(ctx0,
|
|
|
|
| 1528 |
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
|
| 1529 |
|
| 1530 |
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
|
| 1531 |
+
#else
|
| 1532 |
struct ggml_tensor * Q =
|
| 1533 |
ggml_permute(ctx0,
|
| 1534 |
ggml_cpy(ctx0,
|
|
|
|
| 1574 |
);
|
| 1575 |
|
| 1576 |
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
|
| 1577 |
+
#endif
|
| 1578 |
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
| 1579 |
|
| 1580 |
wstate.use_buf(ctx0, 1);
|
| 1581 |
|
| 1582 |
cur = ggml_cpy(ctx0,
|
| 1583 |
+
KQV_merged,
|
| 1584 |
+
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
|
| 1585 |
}
|
| 1586 |
|
| 1587 |
// projection
|
|
|
|
| 1589 |
wstate.use_buf(ctx0, 0);
|
| 1590 |
|
| 1591 |
cur = ggml_mul_mat(ctx0,
|
| 1592 |
+
layer.attn_ln_1_w,
|
| 1593 |
+
cur);
|
| 1594 |
|
| 1595 |
wstate.use_buf(ctx0, 1);
|
| 1596 |
|
| 1597 |
cur = ggml_add(ctx0,
|
| 1598 |
+
ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
|
| 1599 |
+
cur);
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
wstate.use_buf(ctx0, 2);
|
|
|
|
| 1618 |
|
| 1619 |
// cur = mlp_ln_w*cur + mlp_ln_b
|
| 1620 |
cur = ggml_add(ctx0,
|
| 1621 |
+
ggml_mul(ctx0,
|
| 1622 |
+
ggml_repeat(ctx0, layer.mlp_ln_w, cur),
|
| 1623 |
+
cur),
|
| 1624 |
+
ggml_repeat(ctx0, layer.mlp_ln_b, cur));
|
| 1625 |
}
|
| 1626 |
|
| 1627 |
+
#ifdef WHISPER_USE_FLASH_FF
|
| 1628 |
wstate.use_buf(ctx0, 0);
|
| 1629 |
|
| 1630 |
cur = ggml_flash_ff(ctx0,
|
| 1631 |
+
ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
|
| 1632 |
+
layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
|
| 1633 |
+
#else
|
| 1634 |
wstate.use_buf(ctx0, 0);
|
| 1635 |
|
| 1636 |
// fully connected
|
| 1637 |
cur = ggml_mul_mat(ctx0,
|
| 1638 |
+
layer.mlp_0_w,
|
| 1639 |
+
cur);
|
| 1640 |
|
| 1641 |
wstate.use_buf(ctx0, 1);
|
| 1642 |
|
| 1643 |
cur = ggml_add(ctx0,
|
| 1644 |
+
ggml_repeat(ctx0, layer.mlp_0_b, cur),
|
| 1645 |
+
cur);
|
| 1646 |
|
| 1647 |
wstate.use_buf(ctx0, 0);
|
| 1648 |
|
|
|
|
| 1653 |
|
| 1654 |
// projection
|
| 1655 |
cur = ggml_mul_mat(ctx0,
|
| 1656 |
+
layer.mlp_1_w,
|
| 1657 |
+
cur);
|
| 1658 |
|
| 1659 |
wstate.use_buf(ctx0, 0);
|
| 1660 |
|
| 1661 |
cur = ggml_add(ctx0,
|
| 1662 |
+
ggml_repeat(ctx0, layer.mlp_1_b, cur),
|
| 1663 |
+
cur);
|
| 1664 |
+
#endif
|
| 1665 |
}
|
| 1666 |
|
| 1667 |
wstate.use_buf(ctx0, 3);
|
|
|
|
| 1681 |
|
| 1682 |
// cur = ln_f_g*cur + ln_f_b
|
| 1683 |
cur = ggml_add(ctx0,
|
| 1684 |
+
ggml_mul(ctx0,
|
| 1685 |
+
ggml_repeat(ctx0, model.e_ln_w, cur),
|
| 1686 |
+
cur),
|
| 1687 |
+
ggml_repeat(ctx0, model.e_ln_b, cur));
|
| 1688 |
}
|
| 1689 |
|
| 1690 |
wstate.use_buf(ctx0, -1);
|
|
|
|
| 2579 |
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
|
| 2580 |
if (!state->ctx_coreml) {
|
| 2581 |
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
|
| 2582 |
+
#ifndef WHISPER_COREML_ALLOW_FALLBACK
|
| 2583 |
return nullptr;
|
| 2584 |
#endif
|
| 2585 |
} else {
|
| 2586 |
+
fprintf(stderr, "%s: Core ML model loaded\n", __func__);
|
| 2587 |
}
|
| 2588 |
#endif
|
| 2589 |
|