Spaces:
Running
Running
tests : add non-cont unary tests (llama/7857)
Browse files* tests : add non-cont unary tests
* ggml : update unary asserts and "supports_op"
ggml-ci
- ggml-cuda.cu +1 -1
- ggml-cuda/unary.cu +20 -0
- ggml-kompute.cpp +1 -1
- ggml-metal.m +1 -1
- ggml-sycl.cpp +1 -1
- ggml-vulkan.cpp +1 -1
- ggml.c +45 -52
ggml-cuda.cu
CHANGED
|
@@ -2740,7 +2740,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|
| 2740 |
case GGML_UNARY_OP_HARDSWISH:
|
| 2741 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 2742 |
case GGML_UNARY_OP_TANH:
|
| 2743 |
-
return
|
| 2744 |
default:
|
| 2745 |
return false;
|
| 2746 |
}
|
|
|
|
| 2740 |
case GGML_UNARY_OP_HARDSWISH:
|
| 2741 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 2742 |
case GGML_UNARY_OP_TANH:
|
| 2743 |
+
return ggml_is_contiguous(op->src[0]);
|
| 2744 |
default:
|
| 2745 |
return false;
|
| 2746 |
}
|
ggml-cuda/unary.cu
CHANGED
|
@@ -148,6 +148,8 @@ void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 148 |
float * dst_d = (float *)dst->data;
|
| 149 |
cudaStream_t stream = ctx.stream();
|
| 150 |
|
|
|
|
|
|
|
| 151 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 152 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 153 |
|
|
@@ -160,6 +162,8 @@ void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 160 |
float * dst_d = (float *)dst->data;
|
| 161 |
cudaStream_t stream = ctx.stream();
|
| 162 |
|
|
|
|
|
|
|
| 163 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 164 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 165 |
|
|
@@ -172,6 +176,8 @@ void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|
| 172 |
float * dst_d = (float *)dst->data;
|
| 173 |
cudaStream_t stream = ctx.stream();
|
| 174 |
|
|
|
|
|
|
|
| 175 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 176 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 177 |
|
|
@@ -184,6 +190,8 @@ void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 184 |
float * dst_d = (float *)dst->data;
|
| 185 |
cudaStream_t stream = ctx.stream();
|
| 186 |
|
|
|
|
|
|
|
| 187 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 188 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 189 |
|
|
@@ -196,6 +204,8 @@ void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 196 |
float * dst_d = (float *)dst->data;
|
| 197 |
cudaStream_t stream = ctx.stream();
|
| 198 |
|
|
|
|
|
|
|
| 199 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 200 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 201 |
|
|
@@ -208,6 +218,8 @@ void ggml_cuda_op_sigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 208 |
float * dst_d = (float *)dst->data;
|
| 209 |
cudaStream_t stream = ctx.stream();
|
| 210 |
|
|
|
|
|
|
|
| 211 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 212 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 213 |
|
|
@@ -220,6 +232,8 @@ void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst
|
|
| 220 |
float * dst_d = (float *)dst->data;
|
| 221 |
cudaStream_t stream = ctx.stream();
|
| 222 |
|
|
|
|
|
|
|
| 223 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 224 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 225 |
|
|
@@ -232,6 +246,8 @@ void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|
| 232 |
float * dst_d = (float *)dst->data;
|
| 233 |
cudaStream_t stream = ctx.stream();
|
| 234 |
|
|
|
|
|
|
|
| 235 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 236 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 237 |
|
|
@@ -244,6 +260,8 @@ void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|
| 244 |
float * dst_d = (float *)dst->data;
|
| 245 |
cudaStream_t stream = ctx.stream();
|
| 246 |
|
|
|
|
|
|
|
| 247 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 248 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 249 |
|
|
@@ -259,6 +277,8 @@ void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 259 |
float * dst_d = (float *)dst->data;
|
| 260 |
cudaStream_t stream = ctx.stream();
|
| 261 |
|
|
|
|
|
|
|
| 262 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 263 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 264 |
|
|
|
|
| 148 |
float * dst_d = (float *)dst->data;
|
| 149 |
cudaStream_t stream = ctx.stream();
|
| 150 |
|
| 151 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 152 |
+
|
| 153 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 154 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 155 |
|
|
|
|
| 162 |
float * dst_d = (float *)dst->data;
|
| 163 |
cudaStream_t stream = ctx.stream();
|
| 164 |
|
| 165 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 166 |
+
|
| 167 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 168 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 169 |
|
|
|
|
| 176 |
float * dst_d = (float *)dst->data;
|
| 177 |
cudaStream_t stream = ctx.stream();
|
| 178 |
|
| 179 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 180 |
+
|
| 181 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 182 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 183 |
|
|
|
|
| 190 |
float * dst_d = (float *)dst->data;
|
| 191 |
cudaStream_t stream = ctx.stream();
|
| 192 |
|
| 193 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 194 |
+
|
| 195 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 196 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 197 |
|
|
|
|
| 204 |
float * dst_d = (float *)dst->data;
|
| 205 |
cudaStream_t stream = ctx.stream();
|
| 206 |
|
| 207 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 208 |
+
|
| 209 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 210 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 211 |
|
|
|
|
| 218 |
float * dst_d = (float *)dst->data;
|
| 219 |
cudaStream_t stream = ctx.stream();
|
| 220 |
|
| 221 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 222 |
+
|
| 223 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 224 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 225 |
|
|
|
|
| 232 |
float * dst_d = (float *)dst->data;
|
| 233 |
cudaStream_t stream = ctx.stream();
|
| 234 |
|
| 235 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 236 |
+
|
| 237 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 238 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 239 |
|
|
|
|
| 246 |
float * dst_d = (float *)dst->data;
|
| 247 |
cudaStream_t stream = ctx.stream();
|
| 248 |
|
| 249 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 250 |
+
|
| 251 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 252 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 253 |
|
|
|
|
| 260 |
float * dst_d = (float *)dst->data;
|
| 261 |
cudaStream_t stream = ctx.stream();
|
| 262 |
|
| 263 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 264 |
+
|
| 265 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 266 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 267 |
|
|
|
|
| 277 |
float * dst_d = (float *)dst->data;
|
| 278 |
cudaStream_t stream = ctx.stream();
|
| 279 |
|
| 280 |
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
| 281 |
+
|
| 282 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 283 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 284 |
|
ggml-kompute.cpp
CHANGED
|
@@ -1340,7 +1340,7 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
|
|
| 1340 |
case GGML_UNARY_OP_RELU:
|
| 1341 |
case GGML_UNARY_OP_GELU:
|
| 1342 |
case GGML_UNARY_OP_SILU:
|
| 1343 |
-
return
|
| 1344 |
default:
|
| 1345 |
;
|
| 1346 |
}
|
|
|
|
| 1340 |
case GGML_UNARY_OP_RELU:
|
| 1341 |
case GGML_UNARY_OP_GELU:
|
| 1342 |
case GGML_UNARY_OP_SILU:
|
| 1343 |
+
return ggml_is_contiguous(op->src[0]);
|
| 1344 |
default:
|
| 1345 |
;
|
| 1346 |
}
|
ggml-metal.m
CHANGED
|
@@ -744,7 +744,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
|
| 744 |
case GGML_UNARY_OP_GELU:
|
| 745 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 746 |
case GGML_UNARY_OP_SILU:
|
| 747 |
-
return
|
| 748 |
default:
|
| 749 |
return false;
|
| 750 |
}
|
|
|
|
| 744 |
case GGML_UNARY_OP_GELU:
|
| 745 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 746 |
case GGML_UNARY_OP_SILU:
|
| 747 |
+
return ggml_is_contiguous(op->src[0]);
|
| 748 |
default:
|
| 749 |
return false;
|
| 750 |
}
|
ggml-sycl.cpp
CHANGED
|
@@ -17190,7 +17190,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|
| 17190 |
case GGML_UNARY_OP_HARDSWISH:
|
| 17191 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 17192 |
case GGML_UNARY_OP_TANH:
|
| 17193 |
-
return
|
| 17194 |
default:
|
| 17195 |
return false;
|
| 17196 |
}
|
|
|
|
| 17190 |
case GGML_UNARY_OP_HARDSWISH:
|
| 17191 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 17192 |
case GGML_UNARY_OP_TANH:
|
| 17193 |
+
return ggml_is_contiguous(op->src[0]);
|
| 17194 |
default:
|
| 17195 |
return false;
|
| 17196 |
}
|
ggml-vulkan.cpp
CHANGED
|
@@ -6439,7 +6439,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const
|
|
| 6439 |
case GGML_UNARY_OP_GELU:
|
| 6440 |
case GGML_UNARY_OP_SILU:
|
| 6441 |
case GGML_UNARY_OP_RELU:
|
| 6442 |
-
return
|
| 6443 |
default:
|
| 6444 |
return false;
|
| 6445 |
}
|
|
|
|
| 6439 |
case GGML_UNARY_OP_GELU:
|
| 6440 |
case GGML_UNARY_OP_SILU:
|
| 6441 |
case GGML_UNARY_OP_RELU:
|
| 6442 |
+
return ggml_is_contiguous(op->src[0]);
|
| 6443 |
default:
|
| 6444 |
return false;
|
| 6445 |
}
|
ggml.c
CHANGED
|
@@ -7345,6 +7345,8 @@ static struct ggml_tensor * ggml_unary_impl(
|
|
| 7345 |
struct ggml_tensor * a,
|
| 7346 |
enum ggml_unary_op op,
|
| 7347 |
bool inplace) {
|
|
|
|
|
|
|
| 7348 |
bool is_node = false;
|
| 7349 |
|
| 7350 |
if (!inplace && (a->grad)) {
|
|
@@ -11009,6 +11011,8 @@ static void ggml_compute_forward_abs_f32(
|
|
| 11009 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11010 |
|
| 11011 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11012 |
assert(ggml_are_same_shape(src0, dst));
|
| 11013 |
|
| 11014 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11018,9 +11022,6 @@ static void ggml_compute_forward_abs_f32(
|
|
| 11018 |
const int n = ggml_nrows(src0);
|
| 11019 |
const int nc = src0->ne[0];
|
| 11020 |
|
| 11021 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11022 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11023 |
-
|
| 11024 |
for (int i = 0; i < n; i++) {
|
| 11025 |
ggml_vec_abs_f32(nc,
|
| 11026 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11055,6 +11056,8 @@ static void ggml_compute_forward_sgn_f32(
|
|
| 11055 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11056 |
|
| 11057 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11058 |
assert(ggml_are_same_shape(src0, dst));
|
| 11059 |
|
| 11060 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11064,9 +11067,6 @@ static void ggml_compute_forward_sgn_f32(
|
|
| 11064 |
const int n = ggml_nrows(src0);
|
| 11065 |
const int nc = src0->ne[0];
|
| 11066 |
|
| 11067 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11068 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11069 |
-
|
| 11070 |
for (int i = 0; i < n; i++) {
|
| 11071 |
ggml_vec_sgn_f32(nc,
|
| 11072 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11101,6 +11101,8 @@ static void ggml_compute_forward_neg_f32(
|
|
| 11101 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11102 |
|
| 11103 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11104 |
assert(ggml_are_same_shape(src0, dst));
|
| 11105 |
|
| 11106 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11110,9 +11112,6 @@ static void ggml_compute_forward_neg_f32(
|
|
| 11110 |
const int n = ggml_nrows(src0);
|
| 11111 |
const int nc = src0->ne[0];
|
| 11112 |
|
| 11113 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11114 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11115 |
-
|
| 11116 |
for (int i = 0; i < n; i++) {
|
| 11117 |
ggml_vec_neg_f32(nc,
|
| 11118 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11147,6 +11146,8 @@ static void ggml_compute_forward_step_f32(
|
|
| 11147 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11148 |
|
| 11149 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11150 |
assert(ggml_are_same_shape(src0, dst));
|
| 11151 |
|
| 11152 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11156,9 +11157,6 @@ static void ggml_compute_forward_step_f32(
|
|
| 11156 |
const int n = ggml_nrows(src0);
|
| 11157 |
const int nc = src0->ne[0];
|
| 11158 |
|
| 11159 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11160 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11161 |
-
|
| 11162 |
for (int i = 0; i < n; i++) {
|
| 11163 |
ggml_vec_step_f32(nc,
|
| 11164 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11193,6 +11191,8 @@ static void ggml_compute_forward_tanh_f32(
|
|
| 11193 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11194 |
|
| 11195 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11196 |
assert(ggml_are_same_shape(src0, dst));
|
| 11197 |
|
| 11198 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11202,9 +11202,6 @@ static void ggml_compute_forward_tanh_f32(
|
|
| 11202 |
const int n = ggml_nrows(src0);
|
| 11203 |
const int nc = src0->ne[0];
|
| 11204 |
|
| 11205 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11206 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11207 |
-
|
| 11208 |
for (int i = 0; i < n; i++) {
|
| 11209 |
ggml_vec_tanh_f32(nc,
|
| 11210 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11239,6 +11236,8 @@ static void ggml_compute_forward_elu_f32(
|
|
| 11239 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11240 |
|
| 11241 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11242 |
assert(ggml_are_same_shape(src0, dst));
|
| 11243 |
|
| 11244 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11248,9 +11247,6 @@ static void ggml_compute_forward_elu_f32(
|
|
| 11248 |
const int n = ggml_nrows(src0);
|
| 11249 |
const int nc = src0->ne[0];
|
| 11250 |
|
| 11251 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11252 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11253 |
-
|
| 11254 |
for (int i = 0; i < n; i++) {
|
| 11255 |
ggml_vec_elu_f32(nc,
|
| 11256 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11285,6 +11281,8 @@ static void ggml_compute_forward_relu_f32(
|
|
| 11285 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11286 |
|
| 11287 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11288 |
assert(ggml_are_same_shape(src0, dst));
|
| 11289 |
|
| 11290 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11294,9 +11292,6 @@ static void ggml_compute_forward_relu_f32(
|
|
| 11294 |
const int n = ggml_nrows(src0);
|
| 11295 |
const int nc = src0->ne[0];
|
| 11296 |
|
| 11297 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11298 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11299 |
-
|
| 11300 |
for (int i = 0; i < n; i++) {
|
| 11301 |
ggml_vec_relu_f32(nc,
|
| 11302 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11331,6 +11326,8 @@ static void ggml_compute_forward_sigmoid_f32(
|
|
| 11331 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11332 |
|
| 11333 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11334 |
assert(ggml_are_same_shape(src0, dst));
|
| 11335 |
|
| 11336 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11340,9 +11337,6 @@ static void ggml_compute_forward_sigmoid_f32(
|
|
| 11340 |
const int n = ggml_nrows(src0);
|
| 11341 |
const int nc = src0->ne[0];
|
| 11342 |
|
| 11343 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11344 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11345 |
-
|
| 11346 |
for (int i = 0; i < n; i++) {
|
| 11347 |
ggml_vec_sigmoid_f32(nc,
|
| 11348 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11376,9 +11370,9 @@ static void ggml_compute_forward_gelu_f32(
|
|
| 11376 |
|
| 11377 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11378 |
|
| 11379 |
-
|
| 11380 |
-
|
| 11381 |
-
|
| 11382 |
|
| 11383 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11384 |
return;
|
|
@@ -11439,9 +11433,9 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
| 11439 |
|
| 11440 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11441 |
|
| 11442 |
-
|
| 11443 |
-
|
| 11444 |
-
|
| 11445 |
|
| 11446 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11447 |
return;
|
|
@@ -11502,9 +11496,9 @@ static void ggml_compute_forward_silu_f32(
|
|
| 11502 |
|
| 11503 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11504 |
|
| 11505 |
-
|
| 11506 |
-
|
| 11507 |
-
|
| 11508 |
|
| 11509 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11510 |
return;
|
|
@@ -11565,6 +11559,8 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
| 11565 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11566 |
|
| 11567 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11568 |
assert(ggml_are_same_shape(src0, dst));
|
| 11569 |
|
| 11570 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11614,11 +11610,11 @@ static void ggml_compute_forward_silu_back_f32(
|
|
| 11614 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11615 |
const struct ggml_tensor * grad = dst->src[1];
|
| 11616 |
|
| 11617 |
-
|
| 11618 |
-
|
| 11619 |
-
|
| 11620 |
-
|
| 11621 |
-
|
| 11622 |
|
| 11623 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11624 |
return;
|
|
@@ -11680,6 +11676,8 @@ static void ggml_compute_forward_hardswish_f32(
|
|
| 11680 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11681 |
|
| 11682 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11683 |
assert(ggml_are_same_shape(src0, dst));
|
| 11684 |
|
| 11685 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11689,9 +11687,6 @@ static void ggml_compute_forward_hardswish_f32(
|
|
| 11689 |
const int n = ggml_nrows(src0);
|
| 11690 |
const int nc = src0->ne[0];
|
| 11691 |
|
| 11692 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11693 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11694 |
-
|
| 11695 |
for (int i = 0; i < n; i++) {
|
| 11696 |
ggml_vec_hardswish_f32(nc,
|
| 11697 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -11723,6 +11718,8 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
| 11723 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11724 |
|
| 11725 |
assert(params->ith == 0);
|
|
|
|
|
|
|
| 11726 |
assert(ggml_are_same_shape(src0, dst));
|
| 11727 |
|
| 11728 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -11732,9 +11729,6 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
| 11732 |
const int n = ggml_nrows(src0);
|
| 11733 |
const int nc = src0->ne[0];
|
| 11734 |
|
| 11735 |
-
assert(dst->nb[0] == sizeof(float));
|
| 11736 |
-
assert(src0->nb[0] == sizeof(float));
|
| 11737 |
-
|
| 11738 |
for (int i = 0; i < n; i++) {
|
| 11739 |
ggml_vec_hardsigmoid_f32(nc,
|
| 11740 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -16681,7 +16675,10 @@ static void ggml_compute_forward_map_unary_f32(
|
|
| 16681 |
|
| 16682 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 16683 |
|
| 16684 |
-
|
|
|
|
|
|
|
|
|
|
| 16685 |
|
| 16686 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 16687 |
return;
|
|
@@ -16690,9 +16687,6 @@ static void ggml_compute_forward_map_unary_f32(
|
|
| 16690 |
const int n = ggml_nrows(src0);
|
| 16691 |
const int nc = src0->ne[0];
|
| 16692 |
|
| 16693 |
-
assert( dst->nb[0] == sizeof(float));
|
| 16694 |
-
assert(src0->nb[0] == sizeof(float));
|
| 16695 |
-
|
| 16696 |
for (int i = 0; i < n; i++) {
|
| 16697 |
fun(nc,
|
| 16698 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
@@ -16730,6 +16724,9 @@ static void ggml_compute_forward_map_binary_f32(
|
|
| 16730 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 16731 |
|
| 16732 |
assert(params->ith == 0);
|
|
|
|
|
|
|
|
|
|
| 16733 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 16734 |
|
| 16735 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
@@ -16739,10 +16736,6 @@ static void ggml_compute_forward_map_binary_f32(
|
|
| 16739 |
const int n = ggml_nrows(src0);
|
| 16740 |
const int nc = src0->ne[0];
|
| 16741 |
|
| 16742 |
-
assert( dst->nb[0] == sizeof(float));
|
| 16743 |
-
assert(src0->nb[0] == sizeof(float));
|
| 16744 |
-
assert(src1->nb[0] == sizeof(float));
|
| 16745 |
-
|
| 16746 |
for (int i = 0; i < n; i++) {
|
| 16747 |
fun(nc,
|
| 16748 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 7345 |
struct ggml_tensor * a,
|
| 7346 |
enum ggml_unary_op op,
|
| 7347 |
bool inplace) {
|
| 7348 |
+
GGML_ASSERT(ggml_is_contiguous_1(a));
|
| 7349 |
+
|
| 7350 |
bool is_node = false;
|
| 7351 |
|
| 7352 |
if (!inplace && (a->grad)) {
|
|
|
|
| 11011 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11012 |
|
| 11013 |
assert(params->ith == 0);
|
| 11014 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11015 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11016 |
assert(ggml_are_same_shape(src0, dst));
|
| 11017 |
|
| 11018 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11022 |
const int n = ggml_nrows(src0);
|
| 11023 |
const int nc = src0->ne[0];
|
| 11024 |
|
|
|
|
|
|
|
|
|
|
| 11025 |
for (int i = 0; i < n; i++) {
|
| 11026 |
ggml_vec_abs_f32(nc,
|
| 11027 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11056 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11057 |
|
| 11058 |
assert(params->ith == 0);
|
| 11059 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11060 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11061 |
assert(ggml_are_same_shape(src0, dst));
|
| 11062 |
|
| 11063 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11067 |
const int n = ggml_nrows(src0);
|
| 11068 |
const int nc = src0->ne[0];
|
| 11069 |
|
|
|
|
|
|
|
|
|
|
| 11070 |
for (int i = 0; i < n; i++) {
|
| 11071 |
ggml_vec_sgn_f32(nc,
|
| 11072 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11101 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11102 |
|
| 11103 |
assert(params->ith == 0);
|
| 11104 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11105 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11106 |
assert(ggml_are_same_shape(src0, dst));
|
| 11107 |
|
| 11108 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11112 |
const int n = ggml_nrows(src0);
|
| 11113 |
const int nc = src0->ne[0];
|
| 11114 |
|
|
|
|
|
|
|
|
|
|
| 11115 |
for (int i = 0; i < n; i++) {
|
| 11116 |
ggml_vec_neg_f32(nc,
|
| 11117 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11146 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11147 |
|
| 11148 |
assert(params->ith == 0);
|
| 11149 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11150 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11151 |
assert(ggml_are_same_shape(src0, dst));
|
| 11152 |
|
| 11153 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11157 |
const int n = ggml_nrows(src0);
|
| 11158 |
const int nc = src0->ne[0];
|
| 11159 |
|
|
|
|
|
|
|
|
|
|
| 11160 |
for (int i = 0; i < n; i++) {
|
| 11161 |
ggml_vec_step_f32(nc,
|
| 11162 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11191 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11192 |
|
| 11193 |
assert(params->ith == 0);
|
| 11194 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11195 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11196 |
assert(ggml_are_same_shape(src0, dst));
|
| 11197 |
|
| 11198 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11202 |
const int n = ggml_nrows(src0);
|
| 11203 |
const int nc = src0->ne[0];
|
| 11204 |
|
|
|
|
|
|
|
|
|
|
| 11205 |
for (int i = 0; i < n; i++) {
|
| 11206 |
ggml_vec_tanh_f32(nc,
|
| 11207 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11236 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11237 |
|
| 11238 |
assert(params->ith == 0);
|
| 11239 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11240 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11241 |
assert(ggml_are_same_shape(src0, dst));
|
| 11242 |
|
| 11243 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11247 |
const int n = ggml_nrows(src0);
|
| 11248 |
const int nc = src0->ne[0];
|
| 11249 |
|
|
|
|
|
|
|
|
|
|
| 11250 |
for (int i = 0; i < n; i++) {
|
| 11251 |
ggml_vec_elu_f32(nc,
|
| 11252 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11281 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11282 |
|
| 11283 |
assert(params->ith == 0);
|
| 11284 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11285 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11286 |
assert(ggml_are_same_shape(src0, dst));
|
| 11287 |
|
| 11288 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11292 |
const int n = ggml_nrows(src0);
|
| 11293 |
const int nc = src0->ne[0];
|
| 11294 |
|
|
|
|
|
|
|
|
|
|
| 11295 |
for (int i = 0; i < n; i++) {
|
| 11296 |
ggml_vec_relu_f32(nc,
|
| 11297 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11326 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11327 |
|
| 11328 |
assert(params->ith == 0);
|
| 11329 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11330 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11331 |
assert(ggml_are_same_shape(src0, dst));
|
| 11332 |
|
| 11333 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11337 |
const int n = ggml_nrows(src0);
|
| 11338 |
const int nc = src0->ne[0];
|
| 11339 |
|
|
|
|
|
|
|
|
|
|
| 11340 |
for (int i = 0; i < n; i++) {
|
| 11341 |
ggml_vec_sigmoid_f32(nc,
|
| 11342 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11370 |
|
| 11371 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11372 |
|
| 11373 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11374 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11375 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 11376 |
|
| 11377 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11378 |
return;
|
|
|
|
| 11433 |
|
| 11434 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11435 |
|
| 11436 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11437 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11438 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 11439 |
|
| 11440 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11441 |
return;
|
|
|
|
| 11496 |
|
| 11497 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11498 |
|
| 11499 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11500 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11501 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 11502 |
|
| 11503 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11504 |
return;
|
|
|
|
| 11559 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11560 |
|
| 11561 |
assert(params->ith == 0);
|
| 11562 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11563 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11564 |
assert(ggml_are_same_shape(src0, dst));
|
| 11565 |
|
| 11566 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11610 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11611 |
const struct ggml_tensor * grad = dst->src[1];
|
| 11612 |
|
| 11613 |
+
assert(ggml_is_contiguous_1(grad));
|
| 11614 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11615 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11616 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 11617 |
+
assert(ggml_are_same_shape(src0, grad));
|
| 11618 |
|
| 11619 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11620 |
return;
|
|
|
|
| 11676 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11677 |
|
| 11678 |
assert(params->ith == 0);
|
| 11679 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11680 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11681 |
assert(ggml_are_same_shape(src0, dst));
|
| 11682 |
|
| 11683 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11687 |
const int n = ggml_nrows(src0);
|
| 11688 |
const int nc = src0->ne[0];
|
| 11689 |
|
|
|
|
|
|
|
|
|
|
| 11690 |
for (int i = 0; i < n; i++) {
|
| 11691 |
ggml_vec_hardswish_f32(nc,
|
| 11692 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 11718 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 11719 |
|
| 11720 |
assert(params->ith == 0);
|
| 11721 |
+
assert(ggml_is_contiguous_1(src0));
|
| 11722 |
+
assert(ggml_is_contiguous_1(dst));
|
| 11723 |
assert(ggml_are_same_shape(src0, dst));
|
| 11724 |
|
| 11725 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 11729 |
const int n = ggml_nrows(src0);
|
| 11730 |
const int nc = src0->ne[0];
|
| 11731 |
|
|
|
|
|
|
|
|
|
|
| 11732 |
for (int i = 0; i < n; i++) {
|
| 11733 |
ggml_vec_hardsigmoid_f32(nc,
|
| 11734 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 16675 |
|
| 16676 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 16677 |
|
| 16678 |
+
assert(params->ith == 0);
|
| 16679 |
+
assert(ggml_is_contiguous_1(src0));
|
| 16680 |
+
assert(ggml_is_contiguous_1(dst));
|
| 16681 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 16682 |
|
| 16683 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 16684 |
return;
|
|
|
|
| 16687 |
const int n = ggml_nrows(src0);
|
| 16688 |
const int nc = src0->ne[0];
|
| 16689 |
|
|
|
|
|
|
|
|
|
|
| 16690 |
for (int i = 0; i < n; i++) {
|
| 16691 |
fun(nc,
|
| 16692 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
|
|
|
| 16724 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 16725 |
|
| 16726 |
assert(params->ith == 0);
|
| 16727 |
+
assert(ggml_is_contiguous_1(src0));
|
| 16728 |
+
assert(ggml_is_contiguous_1(src1));
|
| 16729 |
+
assert(ggml_is_contiguous_1(dst));
|
| 16730 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 16731 |
|
| 16732 |
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
|
|
|
| 16736 |
const int n = ggml_nrows(src0);
|
| 16737 |
const int nc = src0->ne[0];
|
| 16738 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16739 |
for (int i = 0; i < n; i++) {
|
| 16740 |
fun(nc,
|
| 16741 |
(float *) ((char *) dst->data + i*( dst->nb[1])),
|