compilade commited on
Commit
a575f57
·
1 Parent(s): cf24af7

ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors (llama/15379)

Browse files

* ggml-quants : fix make_qp_quants NANs and IQ1 assertion errors

* ggml-quants : avoid division by zero in make_q3_quants

Files changed (1) hide show
  1. ggml/src/ggml-quants.c +5 -5
ggml/src/ggml-quants.c CHANGED
@@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
566
  for (int i = 0; i < n; ++i) {
567
  L[i] += nmax;
568
  }
569
- return sumlx / suml2;
570
  }
571
  for (int i = 0; i < n; ++i) {
572
  int l = nearest_int(iscale * x[i]);
@@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
901
  for (int i = 0; i < n; ++i) {
902
  max = MAX(max, x[i]);
903
  }
904
- if (!max) { // all zero
905
  for (int i = 0; i < n; ++i) { L[i] = 0; }
906
  return 0.f;
907
  }
@@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
966
  break;
967
  }
968
  }
969
- return sumlx/suml2;
970
  }
971
 
972
  static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
@@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
4266
  sumw[j+1] = sumw[j] + weight[i];
4267
  }
4268
  }
4269
- float best_score = -FLT_MIN, scale = max;
4270
  int besti1 = -1, besti2 = -1, best_shift = 0;
4271
  for (int i1 = 0; i1 <= block_size; ++i1) {
4272
  for (int i2 = i1; i2 <= block_size; ++i2) {
@@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
4442
  idx[2*j] = j;
4443
  }
4444
  qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4445
- float best_score = -FLT_MIN, scale = max;
4446
  int besti1 = -1, besti2 = -1, best_k = -1;
4447
  // 0: +, +
4448
  // 1: +, -
 
566
  for (int i = 0; i < n; ++i) {
567
  L[i] += nmax;
568
  }
569
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
570
  }
571
  for (int i = 0; i < n; ++i) {
572
  int l = nearest_int(iscale * x[i]);
 
901
  for (int i = 0; i < n; ++i) {
902
  max = MAX(max, x[i]);
903
  }
904
+ if (max < GROUP_MAX_EPS) { // all zero
905
  for (int i = 0; i < n; ++i) { L[i] = 0; }
906
  return 0.f;
907
  }
 
966
  break;
967
  }
968
  }
969
+ return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
970
  }
971
 
972
  static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
 
4266
  sumw[j+1] = sumw[j] + weight[i];
4267
  }
4268
  }
4269
+ float best_score = -FLT_MAX, scale = max;
4270
  int besti1 = -1, besti2 = -1, best_shift = 0;
4271
  for (int i1 = 0; i1 <= block_size; ++i1) {
4272
  for (int i2 = i1; i2 <= block_size; ++i2) {
 
4442
  idx[2*j] = j;
4443
  }
4444
  qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4445
+ float best_score = -FLT_MAX, scale = max;
4446
  int besti1 = -1, besti2 = -1, best_k = -1;
4447
  // 0: +, +
4448
  // 1: +, -