Spaces:

natasa365
/

whisper.cpp

Running

App Files Files Community

ggerganov commited on Jun 12, 2024

Commit

6dc2887

1 Parent(s): ea3aa71

tests : add non-cont unary tests (llama/7857)

Browse files

* tests : add non-cont unary tests

* ggml : update unary asserts and "supports_op"

ggml-ci

Files changed (7) hide show

ggml-cuda.cu +1 -1
ggml-cuda/unary.cu +20 -0
ggml-kompute.cpp +1 -1
ggml-metal.m +1 -1
ggml-sycl.cpp +1 -1
ggml-vulkan.cpp +1 -1
ggml.c +45 -52

ggml-cuda.cu CHANGED Viewed

@@ -2740,7 +2740,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
-                    return true;
                 default:
                     return false;
             }

                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
+                    return ggml_is_contiguous(op->src[0]);
                 default:
                     return false;
             }

ggml-cuda/unary.cu CHANGED Viewed

@@ -148,6 +148,8 @@ void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -160,6 +162,8 @@ void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -172,6 +176,8 @@ void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -184,6 +190,8 @@ void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -196,6 +204,8 @@ void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -208,6 +218,8 @@ void ggml_cuda_op_sigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -220,6 +232,8 @@ void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -232,6 +246,8 @@ void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -244,6 +260,8 @@ void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -259,6 +277,8 @@ void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);

     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);
     float * dst_d = (float *)dst->data;
     cudaStream_t stream = ctx.stream();
+    GGML_ASSERT(ggml_is_contiguous(src0));
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
     GGML_ASSERT( dst->type == GGML_TYPE_F32);

ggml-kompute.cpp CHANGED Viewed

@@ -1340,7 +1340,7 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
                 case GGML_UNARY_OP_RELU:
                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_SILU:
-                    return true;
                 default:
                     ;
             }

                 case GGML_UNARY_OP_RELU:
                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_SILU:
+                    return ggml_is_contiguous(op->src[0]);
                 default:
                     ;
             }

ggml-metal.m CHANGED Viewed

@@ -744,7 +744,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_SILU:
-                    return true;
                 default:
                     return false;
             }

                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_SILU:
+                    return ggml_is_contiguous(op->src[0]);
                 default:
                     return false;
             }

ggml-sycl.cpp CHANGED Viewed

@@ -17190,7 +17190,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
-                    return true;
                 default:
                     return false;
             }

                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
+                    return ggml_is_contiguous(op->src[0]);
                 default:
                     return false;
             }

ggml-vulkan.cpp CHANGED Viewed

@@ -6439,7 +6439,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const
                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_SILU:
                 case GGML_UNARY_OP_RELU:
-                    return true;
                 default:
                     return false;
             }

                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_SILU:
                 case GGML_UNARY_OP_RELU:
+                    return ggml_is_contiguous(op->src[0]);
                 default:
                     return false;
             }

ggml.c CHANGED Viewed

@@ -7345,6 +7345,8 @@ static struct ggml_tensor * ggml_unary_impl(
         struct ggml_tensor * a,
         enum ggml_unary_op op,
         bool inplace) {
     bool is_node = false;
     if (!inplace && (a->grad)) {
@@ -11009,6 +11011,8 @@ static void ggml_compute_forward_abs_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11018,9 +11022,6 @@ static void ggml_compute_forward_abs_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_abs_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11055,6 +11056,8 @@ static void ggml_compute_forward_sgn_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11064,9 +11067,6 @@ static void ggml_compute_forward_sgn_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_sgn_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11101,6 +11101,8 @@ static void ggml_compute_forward_neg_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11110,9 +11112,6 @@ static void ggml_compute_forward_neg_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_neg_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11147,6 +11146,8 @@ static void ggml_compute_forward_step_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11156,9 +11157,6 @@ static void ggml_compute_forward_step_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_step_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11193,6 +11191,8 @@ static void ggml_compute_forward_tanh_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11202,9 +11202,6 @@ static void ggml_compute_forward_tanh_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_tanh_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11239,6 +11236,8 @@ static void ggml_compute_forward_elu_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11248,9 +11247,6 @@ static void ggml_compute_forward_elu_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_elu_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11285,6 +11281,8 @@ static void ggml_compute_forward_relu_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11294,9 +11292,6 @@ static void ggml_compute_forward_relu_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_relu_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11331,6 +11326,8 @@ static void ggml_compute_forward_sigmoid_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11340,9 +11337,6 @@ static void ggml_compute_forward_sigmoid_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_sigmoid_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11376,9 +11370,9 @@ static void ggml_compute_forward_gelu_f32(
     const struct ggml_tensor * src0 = dst->src[0];
-    GGML_ASSERT(ggml_is_contiguous_1(src0));
-    GGML_ASSERT(ggml_is_contiguous_1(dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -11439,9 +11433,9 @@ static void ggml_compute_forward_gelu_quick_f32(
     const struct ggml_tensor * src0 = dst->src[0];
-    GGML_ASSERT(ggml_is_contiguous_1(src0));
-    GGML_ASSERT(ggml_is_contiguous_1(dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -11502,9 +11496,9 @@ static void ggml_compute_forward_silu_f32(
     const struct ggml_tensor * src0 = dst->src[0];
-    GGML_ASSERT(ggml_is_contiguous_1(src0));
-    GGML_ASSERT(ggml_is_contiguous_1(dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -11565,6 +11559,8 @@ static void ggml_compute_forward_leaky_relu_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11614,11 +11610,11 @@ static void ggml_compute_forward_silu_back_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * grad = dst->src[1];
-    GGML_ASSERT(ggml_is_contiguous_1(grad));
-    GGML_ASSERT(ggml_is_contiguous_1(src0));
-    GGML_ASSERT(ggml_is_contiguous_1(dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
-    GGML_ASSERT(ggml_are_same_shape(src0, grad));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -11680,6 +11676,8 @@ static void ggml_compute_forward_hardswish_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11689,9 +11687,6 @@ static void ggml_compute_forward_hardswish_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_hardswish_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -11723,6 +11718,8 @@ static void ggml_compute_forward_hardsigmoid_f32(
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11732,9 +11729,6 @@ static void ggml_compute_forward_hardsigmoid_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert(dst->nb[0]  == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         ggml_vec_hardsigmoid_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -16681,7 +16675,10 @@ static void ggml_compute_forward_map_unary_f32(
     const struct ggml_tensor * src0 = dst->src[0];
-    GGML_ASSERT(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -16690,9 +16687,6 @@ static void ggml_compute_forward_map_unary_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert( dst->nb[0] == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         fun(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
@@ -16730,6 +16724,9 @@ static void ggml_compute_forward_map_binary_f32(
     const struct ggml_tensor * src1 = dst->src[1];
     assert(params->ith == 0);
     assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -16739,10 +16736,6 @@ static void ggml_compute_forward_map_binary_f32(
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
-    assert( dst->nb[0] == sizeof(float));
-    assert(src0->nb[0] == sizeof(float));
-    assert(src1->nb[0] == sizeof(float));
     for (int i = 0; i < n; i++) {
         fun(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),

         struct ggml_tensor * a,
         enum ggml_unary_op op,
         bool inplace) {
+    GGML_ASSERT(ggml_is_contiguous_1(a));
     bool is_node = false;
     if (!inplace && (a->grad)) {
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_abs_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_sgn_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_neg_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_step_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_tanh_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_elu_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_relu_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_sigmoid_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
+    assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const struct ggml_tensor * src0 = dst->src[0];
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
+    assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const struct ggml_tensor * src0 = dst->src[0];
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
+    assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * grad = dst->src[1];
+    assert(ggml_is_contiguous_1(grad));
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
+    assert(ggml_are_same_shape(src0, dst));
+    assert(ggml_are_same_shape(src0, grad));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_hardswish_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         ggml_vec_hardsigmoid_f32(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src0 = dst->src[0];
+    assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(dst));
+    assert(ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         fun(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),
     const struct ggml_tensor * src1 = dst->src[1];
     assert(params->ith == 0);
+    assert(ggml_is_contiguous_1(src0));
+    assert(ggml_is_contiguous_1(src1));
+    assert(ggml_is_contiguous_1(dst));
     assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
     const int n  = ggml_nrows(src0);
     const int nc = src0->ne[0];
     for (int i = 0; i < n; i++) {
         fun(nc,
                 (float *) ((char *) dst->data  + i*( dst->nb[1])),