hipudding commited on
Commit
fa22f70
·
1 Parent(s): 2237878

CANN: Add ggml_set_rows (llama/14943)

Browse files
ggml/src/ggml-cann/aclnn_ops.cpp CHANGED
@@ -68,6 +68,8 @@
68
  #include <aclnnop/aclnn_grouped_matmul_v3.h>
69
  #include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
70
  #include <aclnnop/aclnn_zero.h>
 
 
71
  #include <float.h>
72
 
73
  #include <cmath>
@@ -1614,50 +1616,97 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1614
  }
1615
 
1616
  /**
1617
- * @brief Performs embedding operation on a 4D tensor using the CANN backend.
1618
  *
1619
- * This function extracts slices from the source tensor (`src_buffer`),
1620
- * index tensor (`index`), and destination tensor (`dst`), and performs an
1621
- * embedding operation on them. The embedding operation is applied by iterating
1622
- * over the last two dimensions of the source tensor, creating the necessary
1623
- * tensors for the source, index, and output, and executing the embedding operation.
1624
  *
1625
  * @param ctx The context for CANN backend operations.
1626
- * @param src_buffer The source buffer holding the data for the source tensor.
1627
  * @param src_ne The dimensions of the source tensor.
1628
  * @param src_nb The strides (byte offsets) of the source tensor.
1629
- * @param index The index tensor used in the embedding operation.
1630
- * @param dst The destination tensor where the result will be stored.
 
 
 
1631
  */
1632
- static void aclnn_embedding_4d(ggml_backend_cann_context& ctx, void* src_buffer,
1633
- int64_t* src_ne, size_t* src_nb, ggml_tensor* index,
1634
- ggml_tensor* dst) {
 
1635
  for (int64_t i = 0; i < src_ne[3]; i++) {
1636
  for (int64_t j = 0; j < src_ne[2]; j++) {
1637
  // src
1638
- int64_t acl_src_ne[2] = {src_ne[0], src_ne[1]};
1639
- size_t acl_src_nb[2] = {src_nb[0], src_nb[1]};
1640
  aclTensor* acl_src_tensor = ggml_cann_create_tensor(
1641
  (char*)src_buffer + i * src_nb[3] + j * src_nb[2],
1642
- ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
1643
- acl_src_ne, acl_src_nb, 2);
1644
 
1645
  // index
1646
- int64_t acl_index_ne[1] = {index->ne[0]};
1647
- size_t acl_index_nb[1] = {index->nb[0]};
1648
  aclTensor* acl_index = ggml_cann_create_tensor(
1649
- (char*)index->data + i * index->nb[2] + j * index->nb[1],
1650
  ggml_cann_type_mapping(index->type), ggml_element_size(index),
1651
- acl_index_ne, acl_index_nb, 1);
1652
 
1653
  // out
1654
- int64_t acl_out_ne[2] = {dst->ne[0], dst->ne[1]};
1655
- size_t acl_out_nb[2] = {dst->nb[0], dst->nb[1]};
1656
  aclTensor* acl_out = ggml_cann_create_tensor(
1657
- (char*)dst->data + i * dst->nb[3] + j * dst->nb[2],
1658
- ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
1659
- acl_out_ne, acl_out_nb, 2);
1660
- GGML_CANN_CALL_ACLNN_OP(ctx, Embedding, acl_src_tensor, acl_index, acl_out);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1661
  ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
1662
  }
1663
  }
@@ -1669,8 +1718,9 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1669
 
1670
  switch (src0->type) {
1671
  case GGML_TYPE_F32: {
1672
- aclnn_embedding_4d(ctx, src0->data, src0->ne, src0->nb, src1,
1673
- dst);
 
1674
  break;
1675
  }
1676
  case GGML_TYPE_F16: {
@@ -1687,8 +1737,9 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1687
  src_trans_buffer, ACL_FLOAT, ggml_type_size(dst->type),
1688
  src0->ne, src_trans_nb, GGML_MAX_DIMS);
1689
  aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
1690
- aclnn_embedding_4d(ctx, src_trans_buffer, src0->ne,
1691
- src_trans_nb, src1, dst);
 
1692
  ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
1693
  break;
1694
  }
@@ -1748,8 +1799,10 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1748
  dequant_nb[i] = dequant_nb[i - 1] * src0->ne[i - 1];
1749
  }
1750
 
1751
- aclnn_embedding_4d(ctx, dequant_buffer_allocator.get(),
1752
- dequant_ne, dequant_nb, src1, dst);
 
 
1753
 
1754
  ggml_cann_release_resources(ctx, dequant_tensor);
1755
  break;
@@ -1760,6 +1813,43 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1760
  }
1761
  }
1762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1763
  /**
1764
  * @brief Repeats elements of a tensor along a specified dimension.
1765
  *
 
68
  #include <aclnnop/aclnn_grouped_matmul_v3.h>
69
  #include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
70
  #include <aclnnop/aclnn_zero.h>
71
+ #include <aclnnop/aclnn_index_copy.h>
72
+ #include <aclnnop/aclnn_index_select.h>
73
  #include <float.h>
74
 
75
  #include <cmath>
 
1616
  }
1617
 
1618
  /**
1619
+ * @brief Performs index select operation on a 4D tensor using the CANN backend.
1620
  *
1621
+ * This function applies the `IndexSelect` operation along a specific dimension
1622
+ * of the source tensor (`src_buffer`) using the indices from the index tensor (`index`).
1623
+ * It iterates over the last two dimensions of the source tensor, creates the corresponding
1624
+ * CANN tensors for the source, index, and output slices, and executes the `IndexSelect`
1625
+ * operation for each slice.
1626
  *
1627
  * @param ctx The context for CANN backend operations.
1628
+ * @param src_buffer The source buffer containing the 4D input tensor data.
1629
  * @param src_ne The dimensions of the source tensor.
1630
  * @param src_nb The strides (byte offsets) of the source tensor.
1631
+ * @param dst_buffer The destination buffer where the output tensor data will be written.
1632
+ * @param dst_ne The dimensions of the destination tensor.
1633
+ * @param dst_nb The strides (byte offsets) of the destination tensor.
1634
+ * @param index The index tensor specifying the indices to select from the source tensor.
1635
+ * @param type The data type of the source and destination tensors.
1636
  */
1637
+ static void aclnn_index_select_4d(ggml_backend_cann_context& ctx,
1638
+ void* src_buffer,int64_t* src_ne, size_t* src_nb,
1639
+ void* dst_buffer, int64_t* dst_ne, size_t* dst_nb,
1640
+ ggml_tensor* index, ggml_type type) {
1641
  for (int64_t i = 0; i < src_ne[3]; i++) {
1642
  for (int64_t j = 0; j < src_ne[2]; j++) {
1643
  // src
 
 
1644
  aclTensor* acl_src_tensor = ggml_cann_create_tensor(
1645
  (char*)src_buffer + i * src_nb[3] + j * src_nb[2],
1646
+ ggml_cann_type_mapping(type), ggml_type_size(type),
1647
+ src_ne, src_nb, 2);
1648
 
1649
  // index
 
 
1650
  aclTensor* acl_index = ggml_cann_create_tensor(
1651
+ (char*)index->data + (i % index->ne[2]) * index->nb[2] + (j % index->ne[1]) * index->nb[1],
1652
  ggml_cann_type_mapping(index->type), ggml_element_size(index),
1653
+ index->ne, index->nb, 1);
1654
 
1655
  // out
 
 
1656
  aclTensor* acl_out = ggml_cann_create_tensor(
1657
+ (char*)dst_buffer + i * dst_nb[3] + j * dst_nb[2],
1658
+ ggml_cann_type_mapping(type), ggml_type_size(type),
1659
+ dst_ne, dst_nb, 2);
1660
+ GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, acl_src_tensor, 0, acl_index, acl_out);
1661
+ ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
1662
+ }
1663
+ }
1664
+ }
1665
+
1666
+ /**
1667
+ * @brief Performs inplace index copy operation on a 4D tensor using the CANN backend.
1668
+ *
1669
+ * This function applies the `IndexCopy` operation along a specific dimension of the
1670
+ * destination tensor (`dst_buffer`) by copying elements from the source tensor (`src_buffer`)
1671
+ * to positions specified by the index tensor (`index`).
1672
+ * It iterates over the last two dimensions of the tensors, creates the corresponding
1673
+ * CANN tensors for source, index, and destination slices, and performs the index copy
1674
+ * operation for each slice.
1675
+ *
1676
+ * @param ctx The context for CANN backend operations.
1677
+ * @param src_buffer The source buffer containing the 4D input tensor data to be copied.
1678
+ * @param src_ne The dimensions of the source tensor.
1679
+ * @param src_nb The strides (byte offsets) of the source tensor.
1680
+ * @param dst_buffer The destination buffer where values will be copied to.
1681
+ * @param dst_ne The dimensions of the destination tensor.
1682
+ * @param dst_nb The strides (byte offsets) of the destination tensor.
1683
+ * @param index The index tensor specifying target positions in the destination tensor.
1684
+ * @param type The data type of the source and destination tensors.
1685
+ */
1686
+ static void aclnn_index_copy_4d(ggml_backend_cann_context& ctx,
1687
+ void* src_buffer,int64_t* src_ne, size_t* src_nb,
1688
+ void* dst_buffer, int64_t* dst_ne, size_t* dst_nb,
1689
+ ggml_tensor* index, ggml_type type) {
1690
+ for (int64_t i = 0; i < src_ne[3]; i++) {
1691
+ for (int64_t j = 0; j < src_ne[2]; j++) {
1692
+ // src
1693
+ aclTensor* acl_src_tensor = ggml_cann_create_tensor(
1694
+ (char*)src_buffer + i * src_nb[3] + j * src_nb[2],
1695
+ ggml_cann_type_mapping(type), ggml_type_size(type),
1696
+ src_ne, src_nb, 2);
1697
+
1698
+ // index
1699
+ aclTensor* acl_index = ggml_cann_create_tensor(
1700
+ (char*)index->data + (i % index->ne[2]) * index->nb[2] + (j % index->ne[1]) * index->nb[1],
1701
+ ggml_cann_type_mapping(index->type), ggml_element_size(index),
1702
+ index->ne, index->nb, 1);
1703
+
1704
+ // out
1705
+ aclTensor* acl_out = ggml_cann_create_tensor(
1706
+ (char*)dst_buffer + i * dst_nb[3] + j * dst_nb[2],
1707
+ ggml_cann_type_mapping(type), ggml_type_size(type),
1708
+ dst_ne, dst_nb, 2);
1709
+ GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexCopy, acl_out, 0, acl_index, acl_src_tensor);
1710
  ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
1711
  }
1712
  }
 
1718
 
1719
  switch (src0->type) {
1720
  case GGML_TYPE_F32: {
1721
+ aclnn_index_select_4d(ctx, src0->data, src0->ne, src0->nb,
1722
+ dst->data, dst->ne, dst->nb,
1723
+ src1, dst->type);
1724
  break;
1725
  }
1726
  case GGML_TYPE_F16: {
 
1737
  src_trans_buffer, ACL_FLOAT, ggml_type_size(dst->type),
1738
  src0->ne, src_trans_nb, GGML_MAX_DIMS);
1739
  aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
1740
+ aclnn_index_select_4d(ctx, src_trans_buffer, src0->ne, src_trans_nb,
1741
+ dst->data, dst->ne, dst->nb,
1742
+ src1, dst->type);
1743
  ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
1744
  break;
1745
  }
 
1799
  dequant_nb[i] = dequant_nb[i - 1] * src0->ne[i - 1];
1800
  }
1801
 
1802
+ aclnn_index_select_4d(ctx, dequant_buffer_allocator.get(),
1803
+ dequant_ne, dequant_nb,
1804
+ dst->data, dst->ne, dst->nb,
1805
+ src1, dst->type);
1806
 
1807
  ggml_cann_release_resources(ctx, dequant_tensor);
1808
  break;
 
1813
  }
1814
  }
1815
 
1816
+ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1817
+ ggml_tensor* src0 = dst->src[0]; // src
1818
+ ggml_tensor* src1 = dst->src[1]; // index
1819
+
1820
+ switch (dst->type) {
1821
+ case GGML_TYPE_F32: {
1822
+ aclnn_index_copy_4d(ctx, src0->data, src0->ne, src0->nb,
1823
+ dst->data, dst->ne, dst->nb,
1824
+ src1, dst->type);
1825
+ break;
1826
+ }
1827
+ case GGML_TYPE_F16: {
1828
+ aclTensor* acl_src0 = ggml_cann_create_tensor(src0);
1829
+ ggml_cann_pool_alloc src_buffer_allocator(
1830
+ ctx.pool(), ggml_nelements(src0) * sizeof(uint16_t));
1831
+ void* src_trans_buffer = src_buffer_allocator.get();
1832
+ size_t src_trans_nb[GGML_MAX_DIMS];
1833
+ src_trans_nb[0] = sizeof(uint16_t);
1834
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
1835
+ src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
1836
+ }
1837
+ aclTensor* src_trans_tensor = ggml_cann_create_tensor(
1838
+ src_trans_buffer, ACL_FLOAT16, ggml_type_size(dst->type),
1839
+ src0->ne, src_trans_nb, GGML_MAX_DIMS);
1840
+ aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
1841
+ aclnn_index_copy_4d(ctx, src_trans_buffer, src0->ne, src_trans_nb,
1842
+ dst->data, dst->ne, dst->nb,
1843
+ src1, dst->type);
1844
+ ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
1845
+ break;
1846
+ }
1847
+ default:
1848
+ GGML_ABORT("Unsupported tensor type for GGML_OP_SET_ROWS");
1849
+ break;
1850
+ }
1851
+ }
1852
+
1853
  /**
1854
  * @brief Repeats elements of a tensor along a specified dimension.
1855
  *
ggml/src/ggml-cann/aclnn_ops.h CHANGED
@@ -424,15 +424,25 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
424
  *
425
  * @details This function retrieves rows from a source tensor src0 according to
426
  * the indices provided in another tensor src1 and stores the result in
427
- * a destination tensor (\p dst). It supports different data types
428
- * including F32, F16, Q4_0, and Q8_0.
429
  *
430
  * @param ctx The backend CANN context for executing operations.
431
  * @param dst The destination tensor where the extracted rows will be stored.
432
- * dst->op is `GGML_OP_GET_ROWS`.
433
  */
434
  void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
435
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  /**
437
  * @brief Executes matrix multiplication for the given tensor.
438
  *
 
424
  *
425
  * @details This function retrieves rows from a source tensor src0 according to
426
  * the indices provided in another tensor src1 and stores the result in
427
+ * a destination tensor (\p dst).
 
428
  *
429
  * @param ctx The backend CANN context for executing operations.
430
  * @param dst The destination tensor where the extracted rows will be stored.
 
431
  */
432
  void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
433
 
434
+ /**
435
+ * @brief Writes specific rows into a tensor at positions specified by indices.
436
+ *
437
+ * @details This function copies rows from a source tensor into a destination
438
+ * tensor (\p dst) at the positions indicated by the indices in another
439
+ * tensor.
440
+ *
441
+ * @param ctx The backend CANN context for executing operations.
442
+ * @param dst The destination tensor where the specified rows will be updated.
443
+ */
444
+ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
445
+
446
  /**
447
  * @brief Executes matrix multiplication for the given tensor.
448
  *
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -1659,6 +1659,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
1659
  case GGML_OP_GET_ROWS:
1660
  ggml_cann_get_rows(ctx, dst);
1661
  break;
 
 
 
1662
  case GGML_OP_DUP:
1663
  ggml_cann_dup(ctx, dst);
1664
  break;
@@ -2191,13 +2194,15 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
2191
  return false;
2192
  }
2193
  } break;
2194
- case GGML_OP_SET_ROWS:
2195
- {
2196
- // TODO: add support
2197
- // ref: https://github.com/ggml-org/llama.cpp/pull/14274
2198
- #pragma message("TODO: implement F32, F16, BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
2199
- return false;
2200
- } break;
 
 
2201
  case GGML_OP_CPY: {
2202
  ggml_tensor *src = op->src[0];
2203
  if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||
 
1659
  case GGML_OP_GET_ROWS:
1660
  ggml_cann_get_rows(ctx, dst);
1661
  break;
1662
+ case GGML_OP_SET_ROWS:
1663
+ ggml_cann_set_rows(ctx, dst);
1664
+ break;
1665
  case GGML_OP_DUP:
1666
  ggml_cann_dup(ctx, dst);
1667
  break;
 
2194
  return false;
2195
  }
2196
  } break;
2197
+ case GGML_OP_SET_ROWS: {
2198
+ switch (op->type) {
2199
+ case GGML_TYPE_F32:
2200
+ case GGML_TYPE_F16:
2201
+ return true;
2202
+ default:
2203
+ return false;
2204
+ }
2205
+ } break;
2206
  case GGML_OP_CPY: {
2207
  ggml_tensor *src = op->src[0];
2208
  if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) ||