Spaces:
Sleeping
Sleeping
ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors (llama/15379)
Browse files* ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors
* ggml-quants : avoid division by zero in make_q3_quants
- ggml/src/ggml-quants.c +5 -5
ggml/src/ggml-quants.c
CHANGED
|
@@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
|
|
| 566 |
for (int i = 0; i < n; ++i) {
|
| 567 |
L[i] += nmax;
|
| 568 |
}
|
| 569 |
-
return sumlx / suml2;
|
| 570 |
}
|
| 571 |
for (int i = 0; i < n; ++i) {
|
| 572 |
int l = nearest_int(iscale * x[i]);
|
|
@@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
|
|
| 901 |
for (int i = 0; i < n; ++i) {
|
| 902 |
max = MAX(max, x[i]);
|
| 903 |
}
|
| 904 |
-
if (
|
| 905 |
for (int i = 0; i < n; ++i) { L[i] = 0; }
|
| 906 |
return 0.f;
|
| 907 |
}
|
|
@@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
|
|
| 966 |
break;
|
| 967 |
}
|
| 968 |
}
|
| 969 |
-
return sumlx/suml2;
|
| 970 |
}
|
| 971 |
|
| 972 |
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
|
|
@@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|
| 4266 |
sumw[j+1] = sumw[j] + weight[i];
|
| 4267 |
}
|
| 4268 |
}
|
| 4269 |
-
float best_score = -
|
| 4270 |
int besti1 = -1, besti2 = -1, best_shift = 0;
|
| 4271 |
for (int i1 = 0; i1 <= block_size; ++i1) {
|
| 4272 |
for (int i2 = i1; i2 <= block_size; ++i2) {
|
|
@@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|
| 4442 |
idx[2*j] = j;
|
| 4443 |
}
|
| 4444 |
qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
|
| 4445 |
-
float best_score = -
|
| 4446 |
int besti1 = -1, besti2 = -1, best_k = -1;
|
| 4447 |
// 0: +, +
|
| 4448 |
// 1: +, -
|
|
|
|
| 566 |
for (int i = 0; i < n; ++i) {
|
| 567 |
L[i] += nmax;
|
| 568 |
}
|
| 569 |
+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
|
| 570 |
}
|
| 571 |
for (int i = 0; i < n; ++i) {
|
| 572 |
int l = nearest_int(iscale * x[i]);
|
|
|
|
| 901 |
for (int i = 0; i < n; ++i) {
|
| 902 |
max = MAX(max, x[i]);
|
| 903 |
}
|
| 904 |
+
if (max < GROUP_MAX_EPS) { // all zero
|
| 905 |
for (int i = 0; i < n; ++i) { L[i] = 0; }
|
| 906 |
return 0.f;
|
| 907 |
}
|
|
|
|
| 966 |
break;
|
| 967 |
}
|
| 968 |
}
|
| 969 |
+
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
|
| 970 |
}
|
| 971 |
|
| 972 |
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
|
|
|
|
| 4266 |
sumw[j+1] = sumw[j] + weight[i];
|
| 4267 |
}
|
| 4268 |
}
|
| 4269 |
+
float best_score = -FLT_MAX, scale = max;
|
| 4270 |
int besti1 = -1, besti2 = -1, best_shift = 0;
|
| 4271 |
for (int i1 = 0; i1 <= block_size; ++i1) {
|
| 4272 |
for (int i2 = i1; i2 <= block_size; ++i2) {
|
|
|
|
| 4442 |
idx[2*j] = j;
|
| 4443 |
}
|
| 4444 |
qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
|
| 4445 |
+
float best_score = -FLT_MAX, scale = max;
|
| 4446 |
int besti1 = -1, besti2 = -1, best_k = -1;
|
| 4447 |
// 0: +, +
|
| 4448 |
// 1: +, -
|