Spaces:
Running
Running
HIP: Ignore unsupported unroll transformation in fattn-vec (llama/14931)
Browse filesllvm with the amdgcn target dose not support unrolling loops with conditional break statements, when those statements can not be resolved at compile time. Similar to other places in GGML lets simply ignore this warning.
ggml/src/ggml-cuda/fattn-vec-f16.cuh
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
#include "common.cuh"
|
| 2 |
#include "fattn-common.cuh"
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
| 5 |
#ifndef GGML_USE_HIP
|
| 6 |
__launch_bounds__(D, 1)
|
|
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
|
|
| 341 |
NO_DEVICE_CODE;
|
| 342 |
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
|
| 343 |
}
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
| 346 |
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 1 |
#include "common.cuh"
|
| 2 |
#include "fattn-common.cuh"
|
| 3 |
|
| 4 |
+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
|
| 5 |
+
// that contain a break that can not be resolved at compile time.
|
| 6 |
+
#ifdef __clang__
|
| 7 |
+
#pragma clang diagnostic push
|
| 8 |
+
#pragma clang diagnostic ignored "-Wpass-failed"
|
| 9 |
+
#endif // __clang__
|
| 10 |
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
| 11 |
#ifndef GGML_USE_HIP
|
| 12 |
__launch_bounds__(D, 1)
|
|
|
|
| 347 |
NO_DEVICE_CODE;
|
| 348 |
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
|
| 349 |
}
|
| 350 |
+
#ifdef __clang__
|
| 351 |
+
#pragma clang diagnostic pop
|
| 352 |
+
#endif // __clang__
|
| 353 |
|
| 354 |
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
| 355 |
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
ggml/src/ggml-cuda/fattn-vec-f32.cuh
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
#include "common.cuh"
|
| 2 |
#include "fattn-common.cuh"
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
| 5 |
#ifndef GGML_USE_HIP
|
| 6 |
__launch_bounds__(D, 1)
|
|
@@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32(
|
|
| 336 |
NO_DEVICE_CODE;
|
| 337 |
#endif // FLASH_ATTN_AVAILABLE
|
| 338 |
}
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
| 341 |
void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 1 |
#include "common.cuh"
|
| 2 |
#include "fattn-common.cuh"
|
| 3 |
|
| 4 |
+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
|
| 5 |
+
// that contain a break that can not be resolved at compile time.
|
| 6 |
+
#ifdef __clang__
|
| 7 |
+
#pragma clang diagnostic push
|
| 8 |
+
#pragma clang diagnostic ignored "-Wpass-failed"
|
| 9 |
+
#endif // __clang__
|
| 10 |
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
| 11 |
#ifndef GGML_USE_HIP
|
| 12 |
__launch_bounds__(D, 1)
|
|
|
|
| 342 |
NO_DEVICE_CODE;
|
| 343 |
#endif // FLASH_ATTN_AVAILABLE
|
| 344 |
}
|
| 345 |
+
#ifdef __clang__
|
| 346 |
+
#pragma clang diagnostic pop
|
| 347 |
+
#endif // __clang__
|
| 348 |
|
| 349 |
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
| 350 |
void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|