uvos commited on
Commit
8e133f7
·
1 Parent(s): fa22f70

HIP: Ignore unsupported unroll transformation in fattn-vec (llama/14931)

Browse files

llvm with the amdgcn target dose not support unrolling loops with conditional break statements, when those statements can not be resolved at compile time. Similar to other places in GGML lets simply ignore this warning.

ggml/src/ggml-cuda/fattn-vec-f16.cuh CHANGED
@@ -1,6 +1,12 @@
1
  #include "common.cuh"
2
  #include "fattn-common.cuh"
3
 
 
 
 
 
 
 
4
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
5
  #ifndef GGML_USE_HIP
6
  __launch_bounds__(D, 1)
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
341
  NO_DEVICE_CODE;
342
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
343
  }
 
 
 
344
 
345
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
346
  void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 
1
  #include "common.cuh"
2
  #include "fattn-common.cuh"
3
 
4
+ // Currenlty llvm with the amdgcn target dose not support unrolling loops
5
+ // that contain a break that can not be resolved at compile time.
6
+ #ifdef __clang__
7
+ #pragma clang diagnostic push
8
+ #pragma clang diagnostic ignored "-Wpass-failed"
9
+ #endif // __clang__
10
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
11
  #ifndef GGML_USE_HIP
12
  __launch_bounds__(D, 1)
 
347
  NO_DEVICE_CODE;
348
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
349
  }
350
+ #ifdef __clang__
351
+ #pragma clang diagnostic pop
352
+ #endif // __clang__
353
 
354
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
355
  void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
ggml/src/ggml-cuda/fattn-vec-f32.cuh CHANGED
@@ -1,6 +1,12 @@
1
  #include "common.cuh"
2
  #include "fattn-common.cuh"
3
 
 
 
 
 
 
 
4
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
5
  #ifndef GGML_USE_HIP
6
  __launch_bounds__(D, 1)
@@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32(
336
  NO_DEVICE_CODE;
337
  #endif // FLASH_ATTN_AVAILABLE
338
  }
 
 
 
339
 
340
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
341
  void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 
1
  #include "common.cuh"
2
  #include "fattn-common.cuh"
3
 
4
+ // Currenlty llvm with the amdgcn target dose not support unrolling loops
5
+ // that contain a break that can not be resolved at compile time.
6
+ #ifdef __clang__
7
+ #pragma clang diagnostic push
8
+ #pragma clang diagnostic ignored "-Wpass-failed"
9
+ #endif // __clang__
10
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
11
  #ifndef GGML_USE_HIP
12
  __launch_bounds__(D, 1)
 
342
  NO_DEVICE_CODE;
343
  #endif // FLASH_ATTN_AVAILABLE
344
  }
345
+ #ifdef __clang__
346
+ #pragma clang diagnostic pop
347
+ #endif // __clang__
348
 
349
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
350
  void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {