am17an commited on
Commit
68eb27a
·
1 Parent(s): 9d1185a

Add Conv2d for CPU (llama/14388)

Browse files

* Conv2D: Add CPU version

* Half decent

* Tiled approach for F32

* remove file

* Fix tests

* Support F16 operations

* add assert about size

* Review: further formatting fixes, add assert and use CPU version of fp32->fp16

ggml/include/ggml.h CHANGED
@@ -482,6 +482,7 @@ extern "C" {
482
  GGML_OP_CONV_TRANSPOSE_1D,
483
  GGML_OP_IM2COL,
484
  GGML_OP_IM2COL_BACK,
 
485
  GGML_OP_CONV_2D_DW,
486
  GGML_OP_CONV_TRANSPOSE_2D,
487
  GGML_OP_POOL_1D,
@@ -1813,6 +1814,17 @@ extern "C" {
1813
  struct ggml_tensor * b,
1814
  int stride);
1815
 
 
 
 
 
 
 
 
 
 
 
 
1816
  enum ggml_op_pool {
1817
  GGML_OP_POOL_MAX,
1818
  GGML_OP_POOL_AVG,
 
482
  GGML_OP_CONV_TRANSPOSE_1D,
483
  GGML_OP_IM2COL,
484
  GGML_OP_IM2COL_BACK,
485
+ GGML_OP_CONV_2D,
486
  GGML_OP_CONV_2D_DW,
487
  GGML_OP_CONV_TRANSPOSE_2D,
488
  GGML_OP_POOL_1D,
 
1814
  struct ggml_tensor * b,
1815
  int stride);
1816
 
1817
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1818
+ struct ggml_context * ctx,
1819
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1820
+ struct ggml_tensor * b, // input data [W, H, C, N]
1821
+ int s0, // stride dimension 0
1822
+ int s1, // stride dimension 1
1823
+ int p0, // padding dimension 0
1824
+ int p1, // padding dimension 1
1825
+ int d0, // dilation dimension 0
1826
+ int d1); // dilation dimension 1
1827
+
1828
  enum ggml_op_pool {
1829
  GGML_OP_POOL_MAX,
1830
  GGML_OP_POOL_AVG,
ggml/src/ggml-cpu/ggml-cpu.c CHANGED
@@ -1193,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
1193
  }
1194
  }
1195
 
1196
- static void ggml_compute_forward_mul_mat(
1197
  const struct ggml_compute_params * params,
1198
  struct ggml_tensor * dst) {
1199
 
@@ -1866,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1866
  {
1867
  ggml_compute_forward_im2col_back_f32(params, tensor);
1868
  } break;
 
 
 
 
1869
  case GGML_OP_CONV_2D_DW:
1870
  {
1871
  ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -2228,6 +2232,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2228
  } break;
2229
  case GGML_OP_IM2COL:
2230
  case GGML_OP_IM2COL_BACK:
 
2231
  case GGML_OP_CONV_2D_DW:
2232
  case GGML_OP_CONV_TRANSPOSE_1D:
2233
  case GGML_OP_CONV_TRANSPOSE_2D:
@@ -2746,6 +2751,10 @@ struct ggml_cplan ggml_graph_plan(
2746
  GGML_ABORT("fatal error");
2747
  }
2748
  } break;
 
 
 
 
2749
  case GGML_OP_CONV_TRANSPOSE_2D:
2750
  {
2751
  const int64_t ne00 = node->src[0]->ne[0]; // W
 
1193
  }
1194
  }
1195
 
1196
+ void ggml_compute_forward_mul_mat(
1197
  const struct ggml_compute_params * params,
1198
  struct ggml_tensor * dst) {
1199
 
 
1866
  {
1867
  ggml_compute_forward_im2col_back_f32(params, tensor);
1868
  } break;
1869
+ case GGML_OP_CONV_2D:
1870
+ {
1871
+ ggml_compute_forward_conv_2d(params, tensor);
1872
+ } break;
1873
  case GGML_OP_CONV_2D_DW:
1874
  {
1875
  ggml_compute_forward_conv_2d_dw(params, tensor);
 
2232
  } break;
2233
  case GGML_OP_IM2COL:
2234
  case GGML_OP_IM2COL_BACK:
2235
+ case GGML_OP_CONV_2D:
2236
  case GGML_OP_CONV_2D_DW:
2237
  case GGML_OP_CONV_TRANSPOSE_1D:
2238
  case GGML_OP_CONV_TRANSPOSE_2D:
 
2751
  GGML_ABORT("fatal error");
2752
  }
2753
  } break;
2754
+ case GGML_OP_CONV_2D:
2755
+ {
2756
+ cur = GGML_IM2COL_WORK_SIZE;
2757
+ } break;
2758
  case GGML_OP_CONV_TRANSPOSE_2D:
2759
  {
2760
  const int64_t ne00 = node->src[0]->ne[0]; // W
ggml/src/ggml-cpu/ops.cpp CHANGED
@@ -3,6 +3,7 @@
3
  #include "ggml-cpu.h"
4
  #include "ggml-impl.h"
5
  #include "binary-ops.h"
 
6
  #include "unary-ops.h"
7
  #include "vec.h"
8
 
@@ -6545,6 +6546,186 @@ void ggml_compute_forward_im2col_back_f32(
6545
  }
6546
  }
6547
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6548
  // ggml_compute_forward_conv_transpose_2d
6549
 
6550
  void ggml_compute_forward_conv_transpose_2d(
 
3
  #include "ggml-cpu.h"
4
  #include "ggml-impl.h"
5
  #include "binary-ops.h"
6
+ #include "ggml.h"
7
  #include "unary-ops.h"
8
  #include "vec.h"
9
 
 
6546
  }
6547
  }
6548
 
6549
+ static void ggml_call_mul_mat(ggml_type type, const ggml_compute_params * params, int64_t m, int64_t n, int64_t k,
6550
+ void * a, void * b, float * c) {
6551
+ const ggml_type_traits * traits = ggml_get_type_traits(type);
6552
+ struct ggml_tensor src1 = {};
6553
+ src1.type = type;
6554
+ src1.ne[0] = k;
6555
+ src1.ne[1] = m;
6556
+ src1.ne[2] = 1;
6557
+ src1.ne[3] = 1;
6558
+ src1.nb[0] = traits->type_size;
6559
+ src1.nb[1] = k * traits->type_size;
6560
+ src1.nb[2] = src1.nb[1];
6561
+ src1.nb[3] = src1.nb[2];
6562
+ src1.data = a;
6563
+
6564
+ struct ggml_tensor src0 = {};
6565
+ src0.type = type;
6566
+ src0.ne[0] = k;
6567
+ src0.ne[1] = n;
6568
+ src0.ne[2] = 1;
6569
+ src0.ne[3] = 1;
6570
+ src0.nb[0] = traits->type_size;
6571
+ src0.nb[1] = k * traits->type_size;
6572
+ src0.nb[2] = src0.nb[1];
6573
+ src0.nb[3] = src0.nb[2];
6574
+ src0.data = b;
6575
+
6576
+ struct ggml_tensor dst = {};
6577
+ dst.ne[0] = n;
6578
+ dst.ne[1] = m;
6579
+ dst.ne[2] = 1;
6580
+ dst.ne[3] = 1;
6581
+ dst.nb[0] = sizeof(float);
6582
+ dst.nb[1] = n * sizeof(float);
6583
+ dst.nb[2] = dst.nb[1];
6584
+ dst.nb[3] = dst.nb[2];
6585
+ dst.data = c;
6586
+ dst.src[0] = &src0;
6587
+ dst.src[1] = &src1;
6588
+
6589
+ ggml_compute_forward_mul_mat(params, &dst);
6590
+ }
6591
+
6592
+ // ggml_compute_forward_conv_2d
6593
+
6594
+ static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params,
6595
+ const ggml_tensor * kernel, // [KW, KH, IC, OC]
6596
+ const ggml_tensor * src, // [W, H, C, N]
6597
+ ggml_tensor * dst, // [OW, OH, OC, N]
6598
+ ggml_type kernel_type) {
6599
+
6600
+ GGML_ASSERT(ggml_is_contiguous(kernel));
6601
+ GGML_ASSERT(kernel_type == GGML_TYPE_F16 || kernel_type == GGML_TYPE_F32);
6602
+ GGML_ASSERT(kernel->type == kernel_type);
6603
+
6604
+ const ggml_type_traits * traits = ggml_get_type_traits(kernel_type);
6605
+
6606
+ const int32_t stride_x = dst->op_params[0];
6607
+ const int32_t stride_y = dst->op_params[1];
6608
+ const int32_t pad_x = dst->op_params[2];
6609
+ const int32_t pad_y = dst->op_params[3];
6610
+ const int32_t dilation_x = dst->op_params[4];
6611
+ const int32_t dilation_y = dst->op_params[5];
6612
+
6613
+ const int64_t c_in = src->ne[2];
6614
+ const int64_t c_out = kernel->ne[3];
6615
+ GGML_ASSERT(c_in == kernel->ne[2]);
6616
+
6617
+ const int64_t src_w = src->ne[0];
6618
+ const int64_t src_h = src->ne[1];
6619
+ const int64_t knl_w = kernel->ne[0];
6620
+ const int64_t knl_h = kernel->ne[1];
6621
+ const int64_t dst_w = dst->ne[0];
6622
+ const int64_t dst_h = dst->ne[1];
6623
+
6624
+ const float * src_data = (float *) src->data;
6625
+ void * knl_data = kernel->data;
6626
+ float * dst_data = (float *) dst->data;
6627
+
6628
+ const int64_t knl_n = knl_w * knl_h * c_in;
6629
+ const int64_t patch_total = dst->ne[3] * dst_w * dst_h;
6630
+
6631
+ const int64_t space_per_patch = knl_n * traits->type_size + c_out * sizeof(float);
6632
+ const int64_t batch_size = params->wsize / space_per_patch;
6633
+ const int64_t patches_per_batch = batch_size > 8 ? (batch_size / 8) * 8 : batch_size;
6634
+ const int64_t batch_n = (patch_total + patches_per_batch - 1) / patches_per_batch;
6635
+
6636
+ GGML_ASSERT(patches_per_batch > 0 && batch_size >= 1);
6637
+
6638
+ void * tmp = params->wdata;
6639
+
6640
+ for (int64_t batch_i = 0; batch_i < batch_n; ++batch_i) {
6641
+
6642
+ const int64_t patch_start_batch = batch_i * patches_per_batch;
6643
+ const int64_t patch_end_batch = std::min(patch_start_batch + patches_per_batch,
6644
+ patch_total);
6645
+ const int64_t patch_n = patch_end_batch - patch_start_batch;
6646
+
6647
+ const int64_t patch_per_thread = (patch_n + params->nth - 1) / params->nth;
6648
+ const int64_t patch_start = patch_start_batch + params->ith * patch_per_thread;
6649
+ const int64_t patch_end = std::min(patch_start + patch_per_thread, patch_end_batch);
6650
+
6651
+ //im2col for a patch
6652
+ for (int64_t p = patch_start; p < patch_end; ++p) {
6653
+ const int64_t batch_n = p / (dst_w * dst_h);
6654
+ const int64_t src_x = (p / dst_w) % dst_h;
6655
+ const int64_t src_y = p % dst_w;
6656
+
6657
+ const float * src_base = (const float *)((const char *)src_data + batch_n * src->nb[3]);
6658
+ char * dst_row = (char *) tmp + (p % patches_per_batch) * knl_n * traits->type_size;
6659
+
6660
+ for (int64_t ic = 0; ic < c_in; ++ic) {
6661
+ for (int64_t ky = 0; ky < knl_h; ++ky) {
6662
+ for (int64_t kx = 0; kx < knl_w; ++kx) {
6663
+ const int64_t sy = src_x * stride_y + ky * dilation_y - pad_y;
6664
+ const int64_t sx = src_y * stride_x + kx * dilation_x - pad_x;
6665
+
6666
+ int64_t dst_idx = ic * (knl_h * knl_w) + ky * knl_w + kx;
6667
+
6668
+ float src_val;
6669
+ if (sy < 0 || sy >= src_h || sx < 0 || sx >= src_w) {
6670
+ src_val = 0.0f;
6671
+ } else {
6672
+ const float * src_ptr = (const float *)((const char *)src_base + sx * src->nb[0] + sy * src->nb[1] + ic * src->nb[2]);
6673
+ src_val = *src_ptr;
6674
+ }
6675
+
6676
+ char * element_ptr = dst_row + dst_idx * traits->type_size;
6677
+ if (kernel_type == GGML_TYPE_F32) {
6678
+ *(float *) element_ptr = src_val;
6679
+ } else if (kernel_type == GGML_TYPE_F16) {
6680
+ *(ggml_fp16_t *) element_ptr = GGML_CPU_FP32_TO_FP16(src_val);
6681
+ }
6682
+ }
6683
+ }
6684
+ }
6685
+ } // patches handled by this thread
6686
+
6687
+ ggml_barrier(params->threadpool);
6688
+
6689
+ float * gemm_output = (float *) ((char *) tmp + patches_per_batch * knl_n * traits->type_size);
6690
+
6691
+ GGML_ASSERT(gemm_output + patch_n * c_out <= (float*)tmp + params->wsize);
6692
+
6693
+ // GEMM: patches[patch_n, knl_n] × kernel[knl_n, c_out] = output[patch_n, c_out]
6694
+ ggml_call_mul_mat(kernel_type, params, patch_n, c_out, knl_n, tmp, knl_data, gemm_output);
6695
+
6696
+ ggml_barrier(params->threadpool);
6697
+
6698
+
6699
+ //permute back [OC, N, OH, OW] to [N, OC, OH, OW]
6700
+ const int64_t permute_per_thread = (patch_n + params->nth - 1) / params->nth;
6701
+ const int64_t permute_start = params->ith * permute_per_thread;
6702
+ const int64_t permute_end = std::min(permute_start + permute_per_thread, patch_n);
6703
+
6704
+ for (int64_t i = permute_start; i < permute_end; ++i) {
6705
+ const int64_t p = patch_start_batch + i;
6706
+ const int64_t batch_n = p / (dst_w * dst_h);
6707
+ const int64_t dst_y = (p / dst_w) % dst_h;
6708
+ const int64_t dst_x = p % dst_w;
6709
+
6710
+ for (int64_t oc = 0; oc < c_out; ++oc) {
6711
+ const float value = gemm_output[i * c_out + oc];
6712
+ float * dst_ptr = (float *)((char *)dst_data + dst_x * dst->nb[0] + dst_y * dst->nb[1] + oc * dst->nb[2] + batch_n * dst->nb[3]);
6713
+ *dst_ptr = value;
6714
+ }
6715
+ }
6716
+ }
6717
+ }
6718
+
6719
+ void ggml_compute_forward_conv_2d(
6720
+ const ggml_compute_params * params,
6721
+ ggml_tensor * dst) {
6722
+
6723
+ const ggml_tensor * src0 = dst->src[0];
6724
+ const ggml_tensor * src1 = dst->src[1];
6725
+
6726
+ ggml_compute_forward_conv_2d_impl(params, src0, src1, dst, src0->type);
6727
+ }
6728
+
6729
  // ggml_compute_forward_conv_transpose_2d
6730
 
6731
  void ggml_compute_forward_conv_transpose_2d(
ggml/src/ggml-cpu/ops.h CHANGED
@@ -20,6 +20,9 @@
20
 
21
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
22
 
 
 
 
23
  #ifdef __cplusplus
24
  extern "C" {
25
  #endif
@@ -65,6 +68,7 @@ void ggml_compute_forward_clamp(const struct ggml_compute_params * params, struc
65
  void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
66
  void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst);
67
  void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 
68
  void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
69
  void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
70
  void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
@@ -107,6 +111,7 @@ void ggml_compute_forward_custom(const struct ggml_compute_params * params, stru
107
  void ggml_compute_forward_cross_entropy_loss(const struct ggml_compute_params * params, struct ggml_tensor * dst);
108
  void ggml_compute_forward_cross_entropy_loss_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
109
  void ggml_compute_forward_opt_step_adamw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 
110
 
111
  #ifdef __cplusplus
112
  }
 
20
 
21
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
22
 
23
+ // Work buffer size for im2col operations in CONV2D
24
+ #define GGML_IM2COL_WORK_SIZE (16 * 1024 * 1024)
25
+
26
  #ifdef __cplusplus
27
  extern "C" {
28
  #endif
 
68
  void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
69
  void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst);
70
  void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst);
71
+ void ggml_compute_forward_conv_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
72
  void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
73
  void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
74
  void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 
111
  void ggml_compute_forward_cross_entropy_loss(const struct ggml_compute_params * params, struct ggml_tensor * dst);
112
  void ggml_compute_forward_cross_entropy_loss_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
113
  void ggml_compute_forward_opt_step_adamw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
114
+ void ggml_compute_forward_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
115
 
116
  #ifdef __cplusplus
117
  }
ggml/src/ggml.c CHANGED
@@ -945,6 +945,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
945
  "CONV_TRANSPOSE_1D",
946
  "IM2COL",
947
  "IM2COL_BACK",
 
948
  "CONV_2D_DW",
949
  "CONV_TRANSPOSE_2D",
950
  "POOL_1D",
@@ -986,7 +987,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
986
  "GLU",
987
  };
988
 
989
- static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
990
 
991
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
992
  "none",
@@ -1044,6 +1045,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1044
  "conv_transpose_1d(x)",
1045
  "im2col(x)",
1046
  "im2col_back(x)",
 
1047
  "conv_2d_dw(x)",
1048
  "conv_transpose_2d(x)",
1049
  "pool_1d(x)",
@@ -1085,7 +1087,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1085
  "glu(x)",
1086
  };
1087
 
1088
- static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
1089
 
1090
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1091
 
@@ -4291,6 +4293,44 @@ struct ggml_tensor * ggml_conv_2d_dw_direct(
4291
  return result;
4292
  }
4293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4294
  // ggml_conv_transpose_2d_p0
4295
 
4296
  static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
 
945
  "CONV_TRANSPOSE_1D",
946
  "IM2COL",
947
  "IM2COL_BACK",
948
+ "CONV_2D",
949
  "CONV_2D_DW",
950
  "CONV_TRANSPOSE_2D",
951
  "POOL_1D",
 
987
  "GLU",
988
  };
989
 
990
+ static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
991
 
992
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
993
  "none",
 
1045
  "conv_transpose_1d(x)",
1046
  "im2col(x)",
1047
  "im2col_back(x)",
1048
+ "conv_2d(x)",
1049
  "conv_2d_dw(x)",
1050
  "conv_transpose_2d(x)",
1051
  "pool_1d(x)",
 
1087
  "glu(x)",
1088
  };
1089
 
1090
+ static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
1091
 
1092
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1093
 
 
4293
  return result;
4294
  }
4295
 
4296
+ // ggml_conv_2d_direct
4297
+
4298
+ struct ggml_tensor * ggml_conv_2d_direct(
4299
+ struct ggml_context * ctx,
4300
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
4301
+ struct ggml_tensor * b, // input data [W, H, C, N]
4302
+ int s0, // stride dimension 0
4303
+ int s1, // stride dimension 1
4304
+ int p0, // padding dimension 0
4305
+ int p1, // padding dimension 1
4306
+ int d0, // dilation dimension 0
4307
+ int d1) {// dilation dimension 1
4308
+
4309
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
4310
+ //GGML_ASSERT(a->type == b->type);
4311
+
4312
+ int64_t ne[4];
4313
+ ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4314
+ ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4315
+ ne[2] = a->ne[3];
4316
+ ne[3] = b->ne[3];
4317
+
4318
+ struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4319
+
4320
+ ggml_set_op_params_i32(result, 0, s0);
4321
+ ggml_set_op_params_i32(result, 1, s1);
4322
+ ggml_set_op_params_i32(result, 2, p0);
4323
+ ggml_set_op_params_i32(result, 3, p1);
4324
+ ggml_set_op_params_i32(result, 4, d0);
4325
+ ggml_set_op_params_i32(result, 5, d1);
4326
+
4327
+ result->op = GGML_OP_CONV_2D;
4328
+ result->src[0] = a;
4329
+ result->src[1] = b;
4330
+
4331
+ return result;
4332
+ }
4333
+
4334
  // ggml_conv_transpose_2d_p0
4335
 
4336
  static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {