ggerganov commited on
Commit
06c222c
·
unverified ·
1 Parent(s): b85f30e

talk-llama : sync llama.cpp

Browse files
examples/talk-llama/llama.cpp CHANGED
@@ -68,10 +68,12 @@
68
  #include <cstdio>
69
  #include <cstring>
70
  #include <ctime>
 
71
  #include <forward_list>
72
  #include <fstream>
73
  #include <functional>
74
  #include <initializer_list>
 
75
  #include <map>
76
  #include <memory>
77
  #include <mutex>
@@ -1550,8 +1552,9 @@ static const size_t MiB = 1024*kiB;
1550
  static const size_t GiB = 1024*MiB;
1551
 
1552
  struct llama_hparams {
1553
- bool vocab_only;
1554
- bool rope_finetuned;
 
1555
  uint32_t n_vocab;
1556
  uint32_t n_ctx_train; // context size the model was trained on
1557
  uint32_t n_embd;
@@ -1580,7 +1583,8 @@ struct llama_hparams {
1580
  bool causal_attn = true;
1581
  bool need_kq_pos = false;
1582
 
1583
- uint32_t pooling_type = LLAMA_POOLING_TYPE_NONE;
 
1584
 
1585
  bool operator!=(const llama_hparams & other) const {
1586
  if (this->vocab_only != other.vocab_only) return true;
@@ -1639,6 +1643,7 @@ struct llama_cparams {
1639
  float yarn_attn_factor;
1640
  float yarn_beta_fast;
1641
  float yarn_beta_slow;
 
1642
 
1643
  bool mul_mat_q;
1644
  bool offload_kqv;
@@ -1707,11 +1712,20 @@ struct llama_kv_cell {
1707
  bool has_seq_id(const llama_seq_id & id) const {
1708
  return seq_id.find(id) != seq_id.end();
1709
  }
 
 
 
 
 
 
 
 
1710
  };
1711
 
1712
  // ring-buffer of cached KV data
1713
  struct llama_kv_cache {
1714
  bool has_shift = false;
 
1715
 
1716
  // Note: The value of head isn't only used to optimize searching
1717
  // for a free KV slot. llama_decode_internal also uses it, so it
@@ -1723,6 +1737,9 @@ struct llama_kv_cache {
1723
  // computed before each graph build
1724
  uint32_t n = 0;
1725
 
 
 
 
1726
  std::vector<llama_kv_cell> cells;
1727
 
1728
  std::vector<struct ggml_tensor *> k_l; // per layer
@@ -1958,8 +1975,8 @@ struct llama_context {
1958
  static bool llama_kv_cache_init(
1959
  struct llama_kv_cache & cache,
1960
  const llama_model & model,
1961
- ggml_type ktype,
1962
- ggml_type vtype,
1963
  uint32_t n_ctx,
1964
  bool offload) {
1965
  const struct llama_hparams & hparams = model.hparams;
@@ -1974,6 +1991,9 @@ static bool llama_kv_cache_init(
1974
  cache.size = n_ctx;
1975
  cache.used = 0;
1976
 
 
 
 
1977
  cache.cells.clear();
1978
  cache.cells.resize(n_ctx);
1979
 
@@ -2014,8 +2034,8 @@ static bool llama_kv_cache_init(
2014
 
2015
  for (int i = 0; i < (int) n_layer; i++) {
2016
  struct ggml_context * ctx = offload ? ctx_map.at(model.buft_layer[i].buft) : cache.ctxs.front();
2017
- ggml_tensor * k = ggml_new_tensor_1d(ctx, ktype, n_embd_k_gqa*n_ctx);
2018
- ggml_tensor * v = ggml_new_tensor_1d(ctx, vtype, n_embd_v_gqa*n_ctx);
2019
  ggml_format_name(k, "cache_k_l%d", i);
2020
  ggml_format_name(v, "cache_v_l%d", i);
2021
  cache.k_l.push_back(k);
@@ -2099,7 +2119,7 @@ static bool llama_kv_cache_find_slot(
2099
  // find how many cells are currently in use
2100
  static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
2101
  for (uint32_t i = cache.size - 1; i > 0; --i) {
2102
- if (cache.cells[i].pos >= 0 && !cache.cells[i].seq_id.empty()) {
2103
  return i + 1;
2104
  }
2105
  }
@@ -2135,7 +2155,7 @@ static void llama_kv_cache_seq_rm(
2135
  } else {
2136
  continue;
2137
  }
2138
- if (cache.cells[i].seq_id.empty()) {
2139
  // keep count of the number of used cells
2140
  if (cache.cells[i].pos >= 0) cache.used--;
2141
 
@@ -2186,7 +2206,7 @@ static void llama_kv_cache_seq_keep(struct llama_kv_cache & cache, llama_seq_id
2186
  if (new_head != cache.size && new_head < cache.head) cache.head = new_head;
2187
  }
2188
 
2189
- static void llama_kv_cache_seq_shift(
2190
  struct llama_kv_cache & cache,
2191
  llama_seq_id seq_id,
2192
  llama_pos p0,
@@ -2204,10 +2224,14 @@ static void llama_kv_cache_seq_shift(
2204
  cache.cells[i].delta += delta;
2205
 
2206
  if (cache.cells[i].pos < 0) {
2207
- if (!cache.cells[i].seq_id.empty()) cache.used--;
 
 
2208
  cache.cells[i].pos = -1;
2209
  cache.cells[i].seq_id.clear();
2210
- if (new_head == cache.size) new_head = i;
 
 
2211
  }
2212
  }
2213
  }
@@ -2239,6 +2263,22 @@ static void llama_kv_cache_seq_div(
2239
  }
2240
  }
2241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2242
  //
2243
  // model loading and saving
2244
  //
@@ -2310,7 +2350,7 @@ namespace GGUFMeta {
2310
  }
2311
  };
2312
 
2313
- struct ArrayInfo{
2314
  const gguf_type gt;
2315
  const size_t length;
2316
  const void * data;
@@ -2329,7 +2369,7 @@ namespace GGUFMeta {
2329
  };
2330
 
2331
  template<typename T>
2332
- class GKV: public GKV_Base<T> {
2333
  GKV() = delete;
2334
 
2335
  public:
@@ -2352,39 +2392,39 @@ namespace GGUFMeta {
2352
  return "unknown";
2353
  }
2354
 
2355
- static bool validate_override(const llama_model_kv_override_type expected_type, const struct llama_model_kv_override *override) {
2356
- if (!override) { return false; }
2357
- if (override->tag == expected_type) {
2358
  LLAMA_LOG_INFO("%s: Using metadata override (%5s) '%s' = ",
2359
- __func__, override_type_to_str(override->tag), override->key);
2360
- switch (override->tag) {
2361
  case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
2362
- LLAMA_LOG_INFO("%s\n", override->bool_value ? "true" : "false");
2363
  } break;
2364
  case LLAMA_KV_OVERRIDE_TYPE_INT: {
2365
- LLAMA_LOG_INFO("%" PRId64 "\n", override->int_value);
2366
  } break;
2367
  case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
2368
- LLAMA_LOG_INFO("%.6f\n", override->float_value);
2369
  } break;
2370
  default:
2371
  // Shouldn't be possible to end up here, but just in case...
2372
  throw std::runtime_error(
2373
  format("Unsupported attempt to override %s type for metadata key %s\n",
2374
- override_type_to_str(override->tag), override->key));
2375
  }
2376
  return true;
2377
  }
2378
  LLAMA_LOG_WARN("%s: Warning: Bad metadata override type for key '%s', expected %s but got %s\n",
2379
- __func__, override->key, override_type_to_str(expected_type), override_type_to_str(override->tag));
2380
  return false;
2381
  }
2382
 
2383
  template<typename OT>
2384
  static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
2385
- try_override(OT & target, const struct llama_model_kv_override *override) {
2386
- if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, override)) {
2387
- target = override->bool_value;
2388
  return true;
2389
  }
2390
  return false;
@@ -2392,9 +2432,9 @@ namespace GGUFMeta {
2392
 
2393
  template<typename OT>
2394
  static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
2395
- try_override(OT & target, const struct llama_model_kv_override *override) {
2396
- if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, override)) {
2397
- target = override->int_value;
2398
  return true;
2399
  }
2400
  return false;
@@ -2402,9 +2442,9 @@ namespace GGUFMeta {
2402
 
2403
  template<typename OT>
2404
  static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
2405
- try_override(T & target, const struct llama_model_kv_override *override) {
2406
- if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, override)) {
2407
- target = override->float_value;
2408
  return true;
2409
  }
2410
  return false;
@@ -2412,17 +2452,17 @@ namespace GGUFMeta {
2412
 
2413
  template<typename OT>
2414
  static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
2415
- try_override(T & target, const struct llama_model_kv_override *override) {
2416
  (void)target;
2417
- (void)override;
2418
- if (!override) { return false; }
2419
  // Currently, we should never end up here so it would be a bug if we do.
2420
  throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n",
2421
- override ? override->key : "NULL"));
2422
  }
2423
 
2424
- static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override *override = nullptr) {
2425
- if (try_override<T>(target, override)) {
2426
  return true;
2427
  }
2428
  if (k < 0) { return false; }
@@ -2430,12 +2470,12 @@ namespace GGUFMeta {
2430
  return true;
2431
  }
2432
 
2433
- static bool set(const gguf_context * ctx, const char * key, T & target, const struct llama_model_kv_override *override = nullptr) {
2434
- return set(ctx, gguf_find_key(ctx, key), target, override);
2435
  }
2436
 
2437
- static bool set(const gguf_context * ctx, const std::string & key, T & target, const struct llama_model_kv_override *override = nullptr) {
2438
- return set(ctx, key.c_str(), target, override);
2439
  }
2440
  };
2441
  }
@@ -2542,9 +2582,11 @@ struct llama_model_loader {
2542
  case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
2543
  case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
2544
  case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break;
 
2545
  case GGML_TYPE_IQ3_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ3_XXS; break;
2546
  case GGML_TYPE_IQ1_S: ftype = LLAMA_FTYPE_MOSTLY_IQ1_S; break;
2547
  case GGML_TYPE_IQ4_NL: ftype = LLAMA_FTYPE_MOSTLY_IQ4_NL; break;
 
2548
  case GGML_TYPE_IQ3_S: ftype = LLAMA_FTYPE_MOSTLY_IQ3_S; break;
2549
  default:
2550
  {
@@ -2846,6 +2888,15 @@ struct llama_model_loader {
2846
  }
2847
  };
2848
 
 
 
 
 
 
 
 
 
 
2849
  //
2850
  // load LLaMA models
2851
  //
@@ -2887,10 +2938,13 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
2887
  case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
2888
  case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XXS - 2.0625 bpw";
2889
  case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
2890
- case LLAMA_FTYPE_MOSTLY_Q3_K_XS:return "Q3_K - Extra small";
 
 
2891
  case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
2892
  case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
2893
  case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
 
2894
  case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
2895
  case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
2896
 
@@ -2926,16 +2980,16 @@ static const char * llama_model_type_name(e_model type) {
2926
  default: return "?B";
2927
  }
2928
  }
 
2929
  static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
2930
  switch (type) {
2931
- case LLAMA_VOCAB_TYPE_SPM: return "SPM";
2932
- case LLAMA_VOCAB_TYPE_BPE: return "BPE";
2933
- case LLAMA_VOCAB_TYPE_WPM: return "WPM";
2934
- default: return "unknown";
2935
  }
2936
  }
2937
 
2938
-
2939
  static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
2940
  model.arch = ml.get_arch();
2941
  if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -3112,10 +3166,10 @@ static void llm_load_hparams(
3112
  } break;
3113
  case LLM_ARCH_BERT:
3114
  {
3115
- ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
3116
- ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
3117
  ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
3118
- ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
3119
 
3120
  switch (hparams.n_layer) {
3121
  case 3:
@@ -3133,10 +3187,10 @@ static void llm_load_hparams(
3133
  } break;
3134
  case LLM_ARCH_NOMIC_BERT:
3135
  {
3136
- ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
3137
- ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
3138
  ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
3139
- ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
3140
 
3141
  if (hparams.n_layer == 12 && hparams.n_embd == 768) {
3142
  model.type = e_model::MODEL_137M;
@@ -3275,6 +3329,8 @@ static void llm_load_hparams(
3275
  if (hparams.f_max_alibi_bias > 0.0f) {
3276
  hparams.need_kq_pos = true;
3277
  }
 
 
3278
  }
3279
 
3280
  // TODO: This should probably be in llama.h
@@ -3577,6 +3633,8 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
3577
  LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff);
3578
  LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
3579
  LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
 
 
3580
  LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type);
3581
  LLAMA_LOG_INFO("%s: freq_base_train = %.1f\n", __func__, hparams.rope_freq_base_train);
3582
  LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train);
@@ -4598,12 +4656,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
4598
 
4599
  using llm_build_cb = std::function<void(struct ggml_tensor * cur, const char * name, int nl)>;
4600
 
4601
- enum llm_rope_type {
4602
- LLM_ROPE,
4603
- LLM_ROPE_NEOX,
4604
- LLM_ROPE_GLM,
4605
- };
4606
-
4607
  enum llm_ffn_op_type {
4608
  LLM_FFN_SILU,
4609
  LLM_FFN_GELU,
@@ -4649,55 +4701,6 @@ static struct ggml_tensor * llm_build_inp_embd(
4649
  return inpL;
4650
  }
4651
 
4652
- // Persimmon: n_rot = n_embd_head_k/2
4653
- // Other: n_rot = n_embd_head_k
4654
- static void llm_build_k_shift(
4655
- struct ggml_context * ctx,
4656
- const llama_hparams & hparams,
4657
- const llama_cparams & cparams,
4658
- const llama_kv_cache & kv,
4659
- struct ggml_cgraph * graph,
4660
- struct ggml_tensor * K_shift,
4661
- llm_rope_type type,
4662
- int64_t n_ctx,
4663
- float freq_base,
4664
- float freq_scale,
4665
- const llm_build_cb & cb) {
4666
- const int64_t n_layer = hparams.n_layer;
4667
- const int64_t n_head_kv = hparams.n_head_kv;
4668
- const int64_t n_embd_head_k = hparams.n_embd_head_k;
4669
- const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
4670
- const int32_t n_rot = hparams.n_rot;
4671
- const int32_t n_orig_ctx = cparams.n_yarn_orig_ctx;
4672
- const float ext_factor = cparams.yarn_ext_factor;
4673
- const float attn_factor = cparams.yarn_attn_factor;
4674
- const float beta_fast = cparams.yarn_beta_fast;
4675
- const float beta_slow = cparams.yarn_beta_slow;
4676
-
4677
- int rope_type = 0;
4678
-
4679
- switch (type) {
4680
- case LLM_ROPE: rope_type = 0; break;
4681
- case LLM_ROPE_NEOX: rope_type = 2; break;
4682
- case LLM_ROPE_GLM: rope_type = 4; break;
4683
- }
4684
-
4685
- for (int il = 0; il < n_layer; ++il) {
4686
- struct ggml_tensor * tmp =
4687
- // we rotate only the first n_rot dimensions
4688
- ggml_rope_custom_inplace(ctx,
4689
- ggml_view_3d(ctx, kv.k_l[il],
4690
- n_embd_head_k, n_head_kv, n_ctx,
4691
- ggml_row_size(kv.k_l[il]->type, n_embd_head_k),
4692
- ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa),
4693
- 0),
4694
- K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
4695
- ext_factor, attn_factor, beta_fast, beta_slow);
4696
- cb(tmp, "K_shifted", il);
4697
- ggml_build_forward_expand(graph, tmp);
4698
- }
4699
- }
4700
-
4701
  static void llm_build_kv_store(
4702
  struct ggml_context * ctx,
4703
  const llama_hparams & hparams,
@@ -4899,8 +4902,8 @@ static struct ggml_tensor * llm_build_kqv(
4899
  ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
4900
  }
4901
 
4902
- #if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL)
4903
- #pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL")
4904
  #pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
4905
  #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
4906
  if (hparams.f_max_alibi_bias > 0.0f) {
@@ -5001,6 +5004,7 @@ struct llm_build_context {
5001
 
5002
  const int64_t n_embd;
5003
  const int64_t n_layer;
 
5004
  const int64_t n_ctx; // user-specified context size (can be different from n_ctx_train)
5005
  const int64_t n_head;
5006
  const int64_t n_head_kv;
@@ -5025,8 +5029,8 @@ struct llm_build_context {
5025
  const int32_t kv_head; // index of where we store new KV data in the cache
5026
  const int32_t n_orig_ctx;
5027
 
5028
- const bool do_rope_shift;
5029
- const uint32_t pooling_type;
5030
 
5031
  const llm_build_cb & cb;
5032
 
@@ -5048,6 +5052,7 @@ struct llm_build_context {
5048
  kv_self (lctx.kv_self),
5049
  n_embd (hparams.n_embd),
5050
  n_layer (hparams.n_layer),
 
5051
  n_ctx (cparams.n_ctx),
5052
  n_head (hparams.n_head),
5053
  n_head_kv (hparams.n_head_kv),
@@ -5069,8 +5074,8 @@ struct llm_build_context {
5069
  n_kv (worst_case ? n_ctx : kv_self.n),
5070
  kv_head (worst_case ? n_ctx - n_tokens : kv_self.head),
5071
  n_orig_ctx (cparams.n_yarn_orig_ctx),
5072
- do_rope_shift (worst_case || kv_self.has_shift),
5073
- pooling_type (cparams.do_pooling ? hparams.pooling_type : (uint32_t)LLAMA_POOLING_TYPE_NONE),
5074
  cb (cb),
5075
  buf_compute_meta (lctx.buf_compute_meta) {
5076
  // all initializations should be done in init()
@@ -5093,6 +5098,76 @@ struct llm_build_context {
5093
  }
5094
  }
5095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5096
  struct ggml_cgraph * build_llama() {
5097
  struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
5098
 
@@ -5114,11 +5189,6 @@ struct llm_build_context {
5114
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5115
  cb(KQ_mask, "KQ_mask", -1);
5116
 
5117
- // shift the entire K-cache if needed
5118
- if (do_rope_shift) {
5119
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
5120
- }
5121
-
5122
  for (int il = 0; il < n_layer; ++il) {
5123
  struct ggml_tensor * inpSA = inpL;
5124
 
@@ -5154,14 +5224,14 @@ struct llm_build_context {
5154
 
5155
  Qcur = ggml_rope_custom(
5156
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
5157
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
5158
  ext_factor, attn_factor, beta_fast, beta_slow
5159
  );
5160
  cb(Qcur, "Qcur", il);
5161
 
5162
  Kcur = ggml_rope_custom(
5163
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
5164
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
5165
  ext_factor, attn_factor, beta_fast, beta_slow
5166
  );
5167
  cb(Kcur, "Kcur", il);
@@ -5302,11 +5372,6 @@ struct llm_build_context {
5302
  struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0);
5303
  cb(KQ_pos, "KQ_pos", -1);
5304
 
5305
- // shift the entire K-cache if needed
5306
- if (do_rope_shift) {
5307
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
5308
- }
5309
-
5310
  for (int il = 0; il < n_layer; ++il) {
5311
  struct ggml_tensor * inpSA = inpL;
5312
 
@@ -5330,12 +5395,12 @@ struct llm_build_context {
5330
  case MODEL_7B:
5331
  Qcur = ggml_rope_custom(
5332
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
5333
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
5334
  ext_factor, attn_factor, beta_fast, beta_slow
5335
  );
5336
  Kcur = ggml_rope_custom(
5337
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
5338
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
5339
  ext_factor, attn_factor, beta_fast, beta_slow
5340
  );
5341
  break;
@@ -5420,11 +5485,6 @@ struct llm_build_context {
5420
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5421
  cb(KQ_mask, "KQ_mask", -1);
5422
 
5423
- // shift the entire K-cache if needed
5424
- if (do_rope_shift) {
5425
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
5426
- }
5427
-
5428
  for (int il = 0; il < n_layer; ++il) {
5429
  struct ggml_tensor * attn_norm;
5430
 
@@ -5463,13 +5523,13 @@ struct llm_build_context {
5463
 
5464
  // using mode = 2 for neox mode
5465
  Qcur = ggml_rope_custom(
5466
- ctx0, Qcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
5467
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5468
  );
5469
  cb(Qcur, "Qcur", il);
5470
 
5471
  Kcur = ggml_rope_custom(
5472
- ctx0, Kcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
5473
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5474
  );
5475
  cb(Kcur, "Kcur", il);
@@ -5639,10 +5699,6 @@ struct llm_build_context {
5639
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5640
  cb(KQ_mask, "KQ_mask", -1);
5641
 
5642
- if (do_rope_shift) {
5643
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
5644
- }
5645
-
5646
  for (int il = 0; il < n_layer; ++il) {
5647
  struct ggml_tensor * residual = inpL;
5648
 
@@ -5700,7 +5756,7 @@ struct llm_build_context {
5700
 
5701
  // RoPE the first n_rot of q/k, pass the other half, and concat.
5702
  struct ggml_tensor * qrot = ggml_view_3d(
5703
- ctx0, tmpq, hparams.n_rot, n_head, n_tokens,
5704
  ggml_element_size(tmpq) * n_embd_head,
5705
  ggml_element_size(tmpq) * n_embd_head * n_head,
5706
  0
@@ -5708,7 +5764,7 @@ struct llm_build_context {
5708
  cb(qrot, "qrot", il);
5709
 
5710
  struct ggml_tensor * krot = ggml_view_3d(
5711
- ctx0, tmpk, hparams.n_rot, n_head, n_tokens,
5712
  ggml_element_size(tmpk) * n_embd_head,
5713
  ggml_element_size(tmpk) * n_embd_head * n_head,
5714
  0
@@ -5717,29 +5773,29 @@ struct llm_build_context {
5717
 
5718
  // get the second half of tmpq, e.g tmpq[n_rot:, :, :]
5719
  struct ggml_tensor * qpass = ggml_view_3d(
5720
- ctx0, tmpq, hparams.n_rot, n_head, n_tokens,
5721
  ggml_element_size(tmpq) * n_embd_head,
5722
  ggml_element_size(tmpq) * n_embd_head * n_head,
5723
- ggml_element_size(tmpq) * hparams.n_rot
5724
  );
5725
  cb(qpass, "qpass", il);
5726
 
5727
  struct ggml_tensor * kpass = ggml_view_3d(
5728
- ctx0, tmpk, hparams.n_rot, n_head, n_tokens,
5729
  ggml_element_size(tmpk) * n_embd_head,
5730
  ggml_element_size(tmpk) * n_embd_head * n_head,
5731
- ggml_element_size(tmpk) * hparams.n_rot
5732
  );
5733
  cb(kpass, "kpass", il);
5734
 
5735
  struct ggml_tensor * qrotated = ggml_rope_custom(
5736
- ctx0, qrot, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
5737
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5738
  );
5739
  cb(qrotated, "qrotated", il);
5740
 
5741
  struct ggml_tensor * krotated = ggml_rope_custom(
5742
- ctx0, krot, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
5743
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5744
  );
5745
  cb(krotated, "krotated", il);
@@ -5991,14 +6047,14 @@ struct llm_build_context {
5991
 
5992
  Qcur = ggml_rope_custom(
5993
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
5994
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
5995
  ext_factor, attn_factor, beta_fast, beta_slow
5996
  );
5997
  cb(Qcur, "Qcur", il);
5998
 
5999
  Kcur = ggml_rope_custom(
6000
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6001
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
6002
  ext_factor, attn_factor, beta_fast, beta_slow
6003
  );
6004
  cb(Kcur, "Kcur", il);
@@ -6287,11 +6343,6 @@ struct llm_build_context {
6287
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6288
  cb(KQ_mask, "KQ_mask", -1);
6289
 
6290
- // shift the entire K-cache if needed
6291
- if (do_rope_shift) {
6292
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
6293
- }
6294
-
6295
  for (int il = 0; il < n_layer; ++il) {
6296
  struct ggml_tensor * inpSA = inpL;
6297
 
@@ -6328,14 +6379,14 @@ struct llm_build_context {
6328
 
6329
  Qcur = ggml_rope_custom(
6330
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
6331
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
6332
  ext_factor, attn_factor, beta_fast, beta_slow
6333
  );
6334
  cb(Qcur, "Qcur", il);
6335
 
6336
  Kcur = ggml_rope_custom(
6337
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6338
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
6339
  ext_factor, attn_factor, beta_fast, beta_slow
6340
  );
6341
  cb(Kcur, "Kcur", il);
@@ -6410,11 +6461,6 @@ struct llm_build_context {
6410
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6411
  cb(KQ_mask, "KQ_mask", -1);
6412
 
6413
- // shift the entire K-cache if needed
6414
- if (do_rope_shift) {
6415
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
6416
- }
6417
-
6418
  for (int il = 0; il < n_layer; ++il) {
6419
  struct ggml_tensor * inpSA = inpL;
6420
 
@@ -6444,13 +6490,13 @@ struct llm_build_context {
6444
 
6445
  // using mode = 2 for neox mode
6446
  Qcur = ggml_rope_custom(
6447
- ctx0, Qcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
6448
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6449
  );
6450
  cb(Qcur, "Qcur", il);
6451
 
6452
  Kcur = ggml_rope_custom(
6453
- ctx0, Kcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
6454
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6455
  );
6456
  cb(Kcur, "Kcur", il);
@@ -6524,11 +6570,6 @@ struct llm_build_context {
6524
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6525
  cb(KQ_mask, "KQ_mask", -1);
6526
 
6527
- // shift the entire K-cache if needed
6528
- if (do_rope_shift) {
6529
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
6530
- }
6531
-
6532
  for (int il = 0; il < n_layer; ++il) {
6533
  struct ggml_tensor * inpSA = inpL;
6534
 
@@ -6564,14 +6605,14 @@ struct llm_build_context {
6564
 
6565
  Qcur = ggml_rope_custom(
6566
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
6567
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
6568
  ext_factor, attn_factor, beta_fast, beta_slow
6569
  );
6570
  cb(Qcur, "Qcur", il);
6571
 
6572
  Kcur = ggml_rope_custom(
6573
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6574
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
6575
  ext_factor, attn_factor, beta_fast, beta_slow
6576
  );
6577
  cb(Kcur, "Kcur", il);
@@ -6645,11 +6686,6 @@ struct llm_build_context {
6645
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6646
  cb(KQ_mask, "KQ_mask", -1);
6647
 
6648
- // shift the entire K-cache if needed
6649
- if (do_rope_shift) {
6650
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
6651
- }
6652
-
6653
  for (int il = 0; il < n_layer; ++il) {
6654
  attn_norm_output = llm_build_norm(ctx0, inpL, hparams,
6655
  model.layers[il].attn_norm,
@@ -6687,7 +6723,7 @@ struct llm_build_context {
6687
  Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
6688
 
6689
  Qcur = ggml_rope_custom(
6690
- ctx0, Qcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
6691
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6692
  );
6693
  cb(Qcur, "Qcur", il);
@@ -6698,7 +6734,7 @@ struct llm_build_context {
6698
  cb(Qcur, "Qcur", il);
6699
 
6700
  Kcur = ggml_rope_custom(
6701
- ctx0, Kcur, inp_pos, hparams.n_rot, 2, 0, n_orig_ctx,
6702
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6703
  );
6704
  cb(Kcur, "Kcur", il);
@@ -6767,11 +6803,6 @@ struct llm_build_context {
6767
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6768
  cb(KQ_mask, "KQ_mask", -1);
6769
 
6770
- // shift the entire K-cache if needed
6771
- if (do_rope_shift) {
6772
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
6773
- }
6774
-
6775
  for (int il = 0; il < n_layer; ++il) {
6776
 
6777
  // norm
@@ -6795,14 +6826,14 @@ struct llm_build_context {
6795
  cb(Vcur, "Vcur", il);
6796
 
6797
  Qcur = ggml_rope_custom(
6798
- ctx0, ggml_reshape_3d(ctx0, Qcur, hparams.n_rot, n_head, n_tokens), inp_pos,
6799
- n_embd_head, 2, 0, n_orig_ctx, freq_base, freq_scale,
6800
  ext_factor, attn_factor, beta_fast, beta_slow);
6801
  cb(Qcur, "Qcur", il);
6802
 
6803
  Kcur = ggml_rope_custom(
6804
- ctx0, ggml_reshape_3d(ctx0, Kcur, hparams.n_rot, n_head_kv, n_tokens), inp_pos,
6805
- n_embd_head, 2, 0, n_orig_ctx, freq_base, freq_scale,
6806
  ext_factor, attn_factor, beta_fast, beta_slow);
6807
  cb(Kcur, "Kcur", il);
6808
 
@@ -6972,11 +7003,6 @@ struct llm_build_context {
6972
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6973
  cb(KQ_mask, "KQ_mask", -1);
6974
 
6975
- // shift the entire K-cache if needed
6976
- if (do_rope_shift) {
6977
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
6978
- }
6979
-
6980
  for (int il = 0; il < n_layer; ++il) {
6981
  cur = llm_build_norm(ctx0, inpL, hparams,
6982
  model.layers[il].attn_norm,
@@ -7002,14 +7028,14 @@ struct llm_build_context {
7002
 
7003
  struct ggml_tensor * Qcur = ggml_rope_custom(
7004
  ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, n_tokens), inp_pos,
7005
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
7006
  ext_factor, attn_factor, beta_fast, beta_slow
7007
  );
7008
  cb(Qcur, "Qcur", il);
7009
 
7010
  struct ggml_tensor * Kcur = ggml_rope_custom(
7011
  ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, n_tokens), inp_pos,
7012
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
7013
  ext_factor, attn_factor, beta_fast, beta_slow
7014
  );
7015
  cb(Kcur, "Kcur", il);
@@ -7080,11 +7106,6 @@ struct llm_build_context {
7080
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7081
  cb(KQ_mask, "KQ_mask", -1);
7082
 
7083
- // shift the entire K-cache if needed
7084
- if (do_rope_shift) {
7085
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
7086
- }
7087
-
7088
  for (int il = 0; il < n_layer; ++il) {
7089
  struct ggml_tensor * inpSA = inpL;
7090
 
@@ -7120,14 +7141,14 @@ struct llm_build_context {
7120
 
7121
  Qcur = ggml_rope_custom(
7122
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7123
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
7124
  ext_factor, attn_factor, beta_fast, beta_slow
7125
  );
7126
  cb(Qcur, "Qcur", il);
7127
 
7128
  Kcur = ggml_rope_custom(
7129
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7130
- hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale,
7131
  ext_factor, attn_factor, beta_fast, beta_slow
7132
  );
7133
  cb(Kcur, "Kcur", il);
@@ -7199,11 +7220,6 @@ struct llm_build_context {
7199
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7200
  cb(KQ_mask, "KQ_mask", -1);
7201
 
7202
- // shift the entire K-cache if needed
7203
- if (do_rope_shift) {
7204
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
7205
- }
7206
-
7207
  for (int il = 0; il < n_layer; ++il) {
7208
  struct ggml_tensor * inpSA = inpL;
7209
 
@@ -7239,14 +7255,14 @@ struct llm_build_context {
7239
 
7240
  Qcur = ggml_rope_custom(
7241
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7242
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
7243
  ext_factor, attn_factor, beta_fast, beta_slow
7244
  );
7245
  cb(Qcur, "Qcur", il);
7246
 
7247
  Kcur = ggml_rope_custom(
7248
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7249
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
7250
  ext_factor, attn_factor, beta_fast, beta_slow
7251
  );
7252
  cb(Kcur, "Kcur", il);
@@ -7331,11 +7347,6 @@ struct llm_build_context {
7331
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7332
  cb(KQ_mask, "KQ_mask", -1);
7333
 
7334
- // shift the entire K-cache if needed
7335
- if (do_rope_shift) {
7336
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
7337
- }
7338
-
7339
  for (int il = 0; il < n_layer; ++il) {
7340
  struct ggml_tensor * inpSA = inpL;
7341
 
@@ -7371,14 +7382,14 @@ struct llm_build_context {
7371
 
7372
  Qcur = ggml_rope_custom(
7373
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7374
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
7375
  ext_factor, attn_factor, beta_fast, beta_slow
7376
  );
7377
  cb(Qcur, "Qcur", il);
7378
 
7379
  Kcur = ggml_rope_custom(
7380
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7381
- hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale,
7382
  ext_factor, attn_factor, beta_fast, beta_slow
7383
  );
7384
  cb(Kcur, "Kcur", il);
@@ -7467,11 +7478,6 @@ struct llm_build_context {
7467
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7468
  cb(KQ_mask, "KQ_mask", -1);
7469
 
7470
- // shift the entire K-cache if needed
7471
- if (do_rope_shift) {
7472
- llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
7473
- }
7474
-
7475
  for (int il = 0; il < n_layer; ++il) {
7476
 
7477
  // norm
@@ -7494,7 +7500,7 @@ struct llm_build_context {
7494
 
7495
  Qcur = ggml_rope_custom(
7496
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos,
7497
- n_embd_head_k, 2, 0, n_orig_ctx, freq_base, freq_scale,
7498
  ext_factor, attn_factor, beta_fast, beta_slow);
7499
  cb(Qcur, "Qcur", il);
7500
 
@@ -7503,7 +7509,7 @@ struct llm_build_context {
7503
 
7504
  Kcur = ggml_rope_custom(
7505
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos,
7506
- n_embd_head_k, 2, 0, n_orig_ctx, freq_base, freq_scale,
7507
  ext_factor, attn_factor, beta_fast, beta_slow);
7508
  cb(Kcur, "Kcur", il);
7509
 
@@ -7556,6 +7562,40 @@ struct llm_build_context {
7556
  }
7557
  };
7558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7559
  static struct ggml_cgraph * llama_build_graph(
7560
  llama_context & lctx,
7561
  const llama_batch & batch,
@@ -7675,6 +7715,20 @@ static struct ggml_cgraph * llama_build_graph(
7675
  return result;
7676
  }
7677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7678
  static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
7679
  //
7680
  // set input data
@@ -7742,18 +7796,6 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
7742
  }
7743
  }
7744
 
7745
- if (kv_self.has_shift) {
7746
- const int64_t n_ctx = cparams.n_ctx;
7747
-
7748
- assert(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer));
7749
-
7750
- int32_t * data = (int32_t *) lctx.inp_K_shift->data;
7751
-
7752
- for (int i = 0; i < n_ctx; ++i) {
7753
- data[i] = lctx.kv_self.cells[i].delta;
7754
- }
7755
- }
7756
-
7757
  if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
7758
  const int64_t n_tokens = batch.n_tokens;
7759
 
@@ -7798,6 +7840,34 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
7798
  }
7799
  }
7800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7801
  // decode a batch of tokens by evaluating the transformer
7802
  //
7803
  // - lctx: llama context
@@ -7875,6 +7945,8 @@ static int llama_decode_internal(
7875
  batch.seq_id = seq_id_arr.data();
7876
  }
7877
 
 
 
7878
  // if we have enough unused cells before the current head ->
7879
  // better to start searching from the beginning of the cache, hoping to fill it
7880
  if (kv_self.head > kv_self.used + 2*n_tokens) {
@@ -7899,8 +7971,9 @@ static int llama_decode_internal(
7899
  ggml_cgraph * gf = llama_build_graph(lctx, batch, false);
7900
 
7901
  // the output is always the last tensor in the graph
7902
- struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
7903
  struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
 
7904
  if (strcmp(res->name, "result_output") == 0) {
7905
  // the embeddings could be the second to last tensor, or the third to last tensor
7906
  if (strcmp(embeddings->name, "result_norm") != 0) {
@@ -7927,40 +8000,12 @@ static int llama_decode_internal(
7927
  n_threads = std::min(4, n_threads);
7928
  }
7929
 
7930
- #ifdef GGML_USE_MPI
7931
- const int64_t n_layer = hparams.n_layer;
7932
- ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
7933
- #endif
7934
-
7935
- #ifdef GGML_USE_METAL
7936
- if (ggml_backend_is_metal(lctx.backend_metal)) {
7937
- ggml_backend_metal_set_n_cb(lctx.backend_metal, n_threads);
7938
- }
7939
- #endif
7940
-
7941
- if (lctx.backend_cpu != nullptr) {
7942
- ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
7943
- }
7944
-
7945
  llama_set_inputs(lctx, batch);
7946
 
7947
- ggml_backend_sched_graph_compute(lctx.sched, gf);
7948
-
7949
- // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
7950
-
7951
- #ifdef GGML_USE_MPI
7952
- ggml_mpi_graph_compute_post(lctx.ctx_mpi, gf, n_layer);
7953
- #endif
7954
 
7955
  // update the kv ring buffer
7956
  {
7957
- if (kv_self.has_shift) {
7958
- kv_self.has_shift = false;
7959
- for (uint32_t i = 0; i < kv_self.size; ++i) {
7960
- kv_self.cells[i].delta = 0;
7961
- }
7962
- }
7963
-
7964
  kv_self.head += n_tokens;
7965
 
7966
  // Ensure kv cache head points to a valid index.
@@ -7969,6 +8014,18 @@ static int llama_decode_internal(
7969
  }
7970
  }
7971
 
 
 
 
 
 
 
 
 
 
 
 
 
7972
  #ifdef GGML_PERF
7973
  // print timing information per ggml operation (for debugging purposes)
7974
  // requires GGML_PERF to be defined
@@ -8056,6 +8113,245 @@ static int llama_decode_internal(
8056
  return 0;
8057
  }
8058
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8059
  //
8060
  // tokenizer
8061
  //
@@ -8647,37 +8943,46 @@ struct llm_tokenizer_wpm {
8647
  }
8648
 
8649
  std::vector<std::string> preprocess(const std::string & text) {
8650
- std::string ori_str = normalize(text);
8651
- uint64_t ori_size = ori_str.size();
 
 
 
 
 
 
 
 
 
 
 
8652
 
8653
- // single punct / single symbol / single digit
8654
- // baseline: add whitespace on the left and right of punct and chinese characters
8655
- std::vector<std::string> words;
8656
  std::string new_str = "";
8657
- uint64_t i = 0;
8658
- while (i < ori_size) {
8659
- int utf_char_len = utf8_len(ori_str[i]);
8660
- if ((utf_char_len == 1) && ispunct(ori_str[i])) {
8661
- new_str += " ";
8662
- new_str += ori_str[i];
8663
- new_str += " ";
8664
- i += 1;
8665
  }
8666
- else if ((utf_char_len == 3) && is_chinese_char(ori_str.substr(i, 3))) {
 
8667
  new_str += " ";
8668
- new_str += ori_str.substr(i, 3);
8669
  new_str += " ";
8670
- i += 3;
8671
- }
8672
- else {
8673
- new_str += ori_str[i];
8674
- i += 1;
8675
  }
8676
  }
8677
 
8678
  // split by whitespace
8679
  uint64_t l = 0;
8680
  uint64_t r = 0;
 
8681
  while (r < new_str.size()) {
8682
  // if is whitespace
8683
  if (isspace(new_str[r])) {
@@ -8695,47 +9000,20 @@ struct llm_tokenizer_wpm {
8695
  return words;
8696
  }
8697
 
8698
- std::string normalize(const std::string & text) {
8699
- // TODO: handle chinese characters? https://github.com/huggingface/tokenizers/blob/ef5f50605ddf9f8caef1598c0e4853862b9707a7/tokenizers/src/normalizers/bert.rs#L98
8700
- std::string text2 = strip_accents(text);
8701
- for (size_t i = 0; i < text2.size(); i += utf8_len(text2[i])) {
8702
- char c = text2[i];
8703
- if (c >= 'A' && c <= 'Z') {
8704
- text2[i] = c - 'A' + 'a';
8705
- }
8706
  }
8707
- return text2;
 
8708
  }
8709
 
8710
- bool is_chinese_char(const std::string & str) {
8711
- int len = str.length();
8712
- unsigned int codepoint = 0;
8713
- int num_bytes = 0;
8714
- int i = 0;
8715
- unsigned char ch = static_cast<unsigned char>(str[i]);
8716
- if (ch <= 0x7f) {
8717
- codepoint = ch;
8718
- num_bytes = 1;
8719
- } else if ((ch >> 5) == 0x06) {
8720
- codepoint = ch & 0x1f;
8721
- num_bytes = 2;
8722
- } else if ((ch >> 4) == 0x0e) {
8723
- codepoint = ch & 0x0f;
8724
- num_bytes = 3;
8725
- } else if ((ch >> 3) == 0x1e) {
8726
- codepoint = ch & 0x07;
8727
- num_bytes = 4;
8728
- }
8729
- for (int j = 1; j < num_bytes; ++j) {
8730
- if (i + j >= len) {
8731
- return false; // incomplete UTF-8 character
8732
- }
8733
- unsigned char next_ch = static_cast<unsigned char>(str[i + j]);
8734
- if ((next_ch >> 6) != 0x02) {
8735
- return false; // invalid trailing byte
8736
- }
8737
- codepoint = (codepoint << 6) | (next_ch & 0x3f);
8738
- }
8739
  if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) ||
8740
  (codepoint >= 0x3400 && codepoint <= 0x4DBF) ||
8741
  (codepoint >= 0x20000 && codepoint <= 0x2A6DF) ||
@@ -8751,41 +9029,6 @@ struct llm_tokenizer_wpm {
8751
  return false;
8752
  }
8753
 
8754
- std::string strip_accents(const std::string & input_string) {
8755
- std::string resultString;
8756
- std::map<std::string, char> accent_map = {
8757
- {"À", 'A'}, {"Á", 'A'}, {"Â", 'A'}, {"Ã", 'A'}, {"Ä", 'A'}, {"Å", 'A'},
8758
- {"à", 'a'}, {"á", 'a'}, {"â", 'a'}, {"ã", 'a'}, {"ä", 'a'}, {"å", 'a'},
8759
- {"È", 'E'}, {"É", 'E'}, {"Ê", 'E'}, {"Ë", 'E'}, {"è", 'e'}, {"é", 'e'},
8760
- {"ê", 'e'}, {"ë", 'e'}, {"Ì", 'I'}, {"Í", 'I'}, {"Î", 'I'}, {"Ï", 'I'},
8761
- {"ì", 'i'}, {"í", 'i'}, {"î", 'i'}, {"ï", 'i'}, {"Ò", 'O'}, {"Ó", 'O'},
8762
- {"Ô", 'O'}, {"Õ", 'O'}, {"Ö", 'O'}, {"ò", 'o'}, {"ó", 'o'}, {"ô", 'o'},
8763
- {"õ", 'o'}, {"ö", 'o'}, {"Ù", 'U'}, {"Ú", 'U'}, {"Û", 'U'}, {"Ü", 'U'},
8764
- {"ù", 'u'}, {"ú", 'u'}, {"û", 'u'}, {"ü", 'u'}, {"Ý", 'Y'}, {"ý", 'y'},
8765
- {"Ç", 'C'}, {"ç", 'c'}, {"Ñ", 'N'}, {"ñ", 'n'},
8766
- };
8767
-
8768
- for (size_t i = 0; i < input_string.length();) {
8769
- int len = utf8_len(input_string[i]);
8770
- std::string curChar = input_string.substr(i, len);
8771
- auto iter = accent_map.find(curChar);
8772
- if (iter != accent_map.end()) {
8773
- resultString += iter->second;
8774
- } else {
8775
- resultString += curChar;
8776
- }
8777
- i += len;
8778
- }
8779
-
8780
- return resultString;
8781
- }
8782
-
8783
- static size_t utf8_len(char src) {
8784
- const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
8785
- uint8_t highbits = static_cast<uint8_t>(src) >> 4;
8786
- return lookup[highbits];
8787
- }
8788
-
8789
  const llama_vocab & vocab;
8790
  };
8791
 
@@ -10511,31 +10754,47 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10511
  if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
10512
  new_type = GGML_TYPE_Q8_0;
10513
  }
10514
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
 
10515
  new_type = GGML_TYPE_Q5_K;
10516
  }
10517
  else if (new_type != GGML_TYPE_Q8_0) {
10518
  new_type = GGML_TYPE_Q6_K;
10519
  }
10520
  } else if (name == "token_embd.weight") {
10521
- if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
 
10522
  new_type = GGML_TYPE_Q2_K;
10523
  }
 
 
 
10524
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10525
- new_type = GGML_TYPE_Q4_K;
10526
  }
10527
- } else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
 
10528
  if (name.find("attn_v.weight") != std::string::npos) {
10529
  if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
10530
- else new_type = GGML_TYPE_Q2_K;
10531
  ++qs.i_attention_wv;
10532
  }
 
 
 
10533
  else if (name.find("ffn_down") != std::string::npos) {
10534
- if (qs.i_ffn_down < qs.n_ffn_down/8) new_type = GGML_TYPE_Q2_K;
 
 
10535
  ++qs.i_ffn_down;
10536
  }
10537
  else if (name.find("attn_output.weight") != std::string::npos) {
10538
- if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) new_type = GGML_TYPE_IQ2_XXS;
 
 
 
 
 
10539
  }
10540
  } else if (name.find("attn_v.weight") != std::string::npos) {
10541
  if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
@@ -10545,7 +10804,13 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10545
  new_type = GGML_TYPE_Q4_K;
10546
  }
10547
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10548
- new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
 
 
 
 
 
 
10549
  }
10550
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
10551
  new_type = GGML_TYPE_Q4_K;
@@ -10557,7 +10822,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10557
  new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
10558
  }
10559
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
10560
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL && qs.model.hparams.n_gqa() >= 4) {
10561
  new_type = GGML_TYPE_Q5_K;
10562
  }
10563
  else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
@@ -10583,13 +10848,19 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10583
  // TODO: explore better strategies
10584
  new_type = GGML_TYPE_Q8_0;
10585
  }
10586
- else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
10587
  new_type = GGML_TYPE_IQ3_XXS;
10588
  }
 
 
 
10589
  } else if (name.find("attn_q.weight") != std::string::npos) {
10590
- if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
10591
  new_type = GGML_TYPE_IQ3_XXS;
10592
  }
 
 
 
10593
  } else if (name.find("ffn_down") != std::string::npos) {
10594
  auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
10595
  int i_layer = info.first, n_layer = info.second;
@@ -10620,8 +10891,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10620
  if (use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
10621
  }
10622
  }
10623
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL && !qs.has_imatrix) {
10624
- if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K;
10625
  }
10626
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M && use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
10627
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && arch != LLM_ARCH_FALCON && i_layer < n_layer/8) {
@@ -10638,15 +10909,15 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10638
  } else if (name.find("attn_output.weight") != std::string::npos) {
10639
  if (arch != LLM_ARCH_FALCON) {
10640
  if (qs.model.hparams.n_expert == 8) {
10641
- if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
10642
  ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
10643
  ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
10644
- ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
10645
  new_type = GGML_TYPE_Q5_K;
10646
  }
10647
  } else {
10648
  if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
10649
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_Q3_K;
10650
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
10651
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ) new_type = GGML_TYPE_Q5_K;
10652
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ) new_type = GGML_TYPE_Q4_K;
@@ -10665,7 +10936,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10665
  else if (name.find("ffn_gate") != std::string::npos) {
10666
  auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
10667
  int i_layer = info.first, n_layer = info.second;
10668
- if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
10669
  new_type = GGML_TYPE_IQ3_XXS;
10670
  }
10671
  ++qs.i_ffn_gate;
@@ -10673,7 +10944,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10673
  else if (name.find("ffn_up") != std::string::npos) {
10674
  auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
10675
  int i_layer = info.first, n_layer = info.second;
10676
- if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
10677
  new_type = GGML_TYPE_IQ3_XXS;
10678
  }
10679
  ++qs.i_ffn_up;
@@ -10692,8 +10963,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10692
  //}
10693
  bool convert_incompatible_tensor = false;
10694
  if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
10695
- new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K ||
10696
- new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS ||
10697
  new_type == GGML_TYPE_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || new_type == GGML_TYPE_IQ3_S) {
10698
  int nx = tensor->ne[0];
10699
  int ny = tensor->ne[1];
@@ -10708,14 +10979,16 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10708
  switch (new_type) {
10709
  case GGML_TYPE_IQ2_XXS:
10710
  case GGML_TYPE_IQ2_XS:
 
10711
  case GGML_TYPE_IQ3_XXS:
10712
  case GGML_TYPE_IQ3_S:
10713
  case GGML_TYPE_IQ1_S:
10714
  case GGML_TYPE_Q2_K:
10715
- case GGML_TYPE_Q3_K: new_type = GGML_TYPE_IQ4_NL; break;
10716
- case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
10717
- case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
10718
- case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
 
10719
  default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
10720
  }
10721
  LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
@@ -10741,7 +11014,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
10741
  // K-quants
10742
  case LLAMA_FTYPE_MOSTLY_Q2_K_S:
10743
  case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
10744
- case LLAMA_FTYPE_MOSTLY_Q3_K_XS: quantized_type = GGML_TYPE_IQ3_S; break;
10745
  case LLAMA_FTYPE_MOSTLY_Q3_K_S:
10746
  case LLAMA_FTYPE_MOSTLY_Q3_K_M:
10747
  case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
@@ -10752,9 +11025,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
10752
  case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
10753
  case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break;
10754
  case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break;
 
 
10755
  case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break;
10756
  case LLAMA_FTYPE_MOSTLY_IQ1_S: quantized_type = GGML_TYPE_IQ1_S; break;
10757
  case LLAMA_FTYPE_MOSTLY_IQ4_NL: quantized_type = GGML_TYPE_IQ4_NL; break;
 
10758
  case LLAMA_FTYPE_MOSTLY_IQ3_S: quantized_type = GGML_TYPE_IQ3_S; break;
10759
  case LLAMA_FTYPE_MOSTLY_IQ3_M: quantized_type = GGML_TYPE_IQ3_S; break;
10760
 
@@ -10886,7 +11162,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
10886
  quantize &= !params->only_copy;
10887
 
10888
  // do not quantize expert gating tensors
10889
- quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_FFN_GATE_INP, "weight");
 
10890
 
10891
  // do not quantize positional embeddings and token types (BERT)
10892
  quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight");
@@ -10930,6 +11207,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
10930
  }
10931
  if ((new_type == GGML_TYPE_IQ2_XXS ||
10932
  new_type == GGML_TYPE_IQ2_XS ||
 
10933
  new_type == GGML_TYPE_IQ1_S ||
10934
  (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
10935
  LLAMA_LOG_ERROR("\n\n============================================================\n");
@@ -11385,6 +11663,7 @@ struct llama_context_params llama_context_default_params() {
11385
  /*.yarn_beta_fast =*/ 32.0f,
11386
  /*.yarn_beta_slow =*/ 1.0f,
11387
  /*.yarn_orig_ctx =*/ 0,
 
11388
  /*.cb_eval =*/ nullptr,
11389
  /*.cb_eval_user_data =*/ nullptr,
11390
  /*.type_k =*/ GGML_TYPE_F16,
@@ -11549,6 +11828,7 @@ struct llama_context * llama_new_context_with_model(
11549
  cparams.yarn_attn_factor = params.yarn_attn_factor;
11550
  cparams.yarn_beta_fast = params.yarn_beta_fast;
11551
  cparams.yarn_beta_slow = params.yarn_beta_slow;
 
11552
  cparams.mul_mat_q = params.mul_mat_q;
11553
  cparams.offload_kqv = params.offload_kqv;
11554
  cparams.do_pooling = params.do_pooling;
@@ -11671,8 +11951,7 @@ struct llama_context * llama_new_context_with_model(
11671
  }
11672
  ctx->backends.push_back(ctx->backend_cpu);
11673
 
11674
- if (!llama_kv_cache_init(ctx->kv_self, ctx->model, type_k, type_v,
11675
- cparams.n_ctx, cparams.offload_kqv)) {
11676
  LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
11677
  llama_free(ctx);
11678
  return nullptr;
@@ -11751,7 +12030,7 @@ struct llama_context * llama_new_context_with_model(
11751
  }
11752
 
11753
  // buffer used to store the computation graph and the tensor meta data
11754
- ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead());
11755
 
11756
  ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES);
11757
 
@@ -11820,6 +12099,49 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
11820
  return model->vocab.type;
11821
  }
11822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11823
  int32_t llama_n_vocab(const struct llama_model * model) {
11824
  return model->vocab.id_to_token.size();
11825
  }
@@ -12062,12 +12384,12 @@ void llama_kv_cache_seq_keep(struct llama_context * ctx, llama_seq_id seq_id) {
12062
  llama_kv_cache_seq_keep(ctx->kv_self, seq_id);
12063
  }
12064
 
12065
- void llama_kv_cache_seq_shift(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) {
12066
  if (delta == 0) {
12067
  return;
12068
  }
12069
 
12070
- llama_kv_cache_seq_shift(ctx->kv_self, seq_id, p0, p1, delta);
12071
  }
12072
 
12073
  void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
@@ -12078,6 +12400,19 @@ void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, lla
12078
  llama_kv_cache_seq_div(ctx->kv_self, seq_id, p0, p1, d);
12079
  }
12080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12081
  // Returns the *maximum* size of the state
12082
  size_t llama_get_state_size(const struct llama_context * ctx) {
12083
  // we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
@@ -12204,10 +12539,10 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
12204
  const auto & hparams = ctx->model.hparams;
12205
  const auto & cparams = ctx->cparams;
12206
 
12207
- const auto n_layer = hparams.n_layer;
12208
- const auto n_embd_k_gqa = hparams.n_embd_k_gqa();
12209
- const auto n_embd_v_gqa = hparams.n_embd_v_gqa();
12210
- const auto n_ctx = cparams.n_ctx;
12211
 
12212
  const size_t kv_buf_size = kv_self.total_size();
12213
  const uint32_t kv_head = kv_self.head;
@@ -12222,14 +12557,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
12222
  if (kv_buf_size) {
12223
  std::vector<uint8_t> tmp_buf;
12224
  for (int il = 0; il < (int) n_layer; ++il) {
12225
- size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
 
12226
  tmp_buf.resize(k_size);
12227
  ggml_backend_tensor_get(kv_self.k_l[il], tmp_buf.data(), 0, tmp_buf.size());
12228
  data_ctx->write(tmp_buf.data(), tmp_buf.size());
12229
 
12230
  // v is not contiguous, copy row by row
12231
- size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
12232
- size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
 
12233
  tmp_buf.resize(v_row_size);
12234
  for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
12235
  ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), ir*v_row_stride, tmp_buf.size());
@@ -12316,10 +12653,10 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
12316
  const auto & hparams = ctx->model.hparams;
12317
  const auto & cparams = ctx->cparams;
12318
 
12319
- const int n_layer = hparams.n_layer;
12320
- const int n_embd_k_gqa = hparams.n_embd_k_gqa();
12321
- const int n_embd_v_gqa = hparams.n_embd_v_gqa();
12322
- const int n_ctx = cparams.n_ctx;
12323
 
12324
  size_t kv_buf_size;
12325
  uint32_t kv_head;
@@ -12335,13 +12672,15 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
12335
  GGML_ASSERT(kv_self.total_size() == kv_buf_size);
12336
 
12337
  for (int il = 0; il < (int) n_layer; ++il) {
12338
- size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
 
12339
  ggml_backend_tensor_set(kv_self.k_l[il], inp, 0, k_size);
12340
  inp += k_size;
12341
 
12342
  // v is not contiguous, copy row by row
12343
- size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
12344
- size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
 
12345
  for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
12346
  ggml_backend_tensor_set(kv_self.v_l[il], inp, ir*v_row_stride, v_row_size);
12347
  inp += v_row_size;
 
68
  #include <cstdio>
69
  #include <cstring>
70
  #include <ctime>
71
+ #include <cwctype>
72
  #include <forward_list>
73
  #include <fstream>
74
  #include <functional>
75
  #include <initializer_list>
76
+ #include <locale>
77
  #include <map>
78
  #include <memory>
79
  #include <mutex>
 
1552
  static const size_t GiB = 1024*MiB;
1553
 
1554
  struct llama_hparams {
1555
+ bool vocab_only;
1556
+ bool rope_finetuned;
1557
+
1558
  uint32_t n_vocab;
1559
  uint32_t n_ctx_train; // context size the model was trained on
1560
  uint32_t n_embd;
 
1583
  bool causal_attn = true;
1584
  bool need_kq_pos = false;
1585
 
1586
+ enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
1587
+ enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
1588
 
1589
  bool operator!=(const llama_hparams & other) const {
1590
  if (this->vocab_only != other.vocab_only) return true;
 
1643
  float yarn_attn_factor;
1644
  float yarn_beta_fast;
1645
  float yarn_beta_slow;
1646
+ float defrag_thold;
1647
 
1648
  bool mul_mat_q;
1649
  bool offload_kqv;
 
1712
  bool has_seq_id(const llama_seq_id & id) const {
1713
  return seq_id.find(id) != seq_id.end();
1714
  }
1715
+
1716
+ bool is_empty() const {
1717
+ return seq_id.empty();
1718
+ }
1719
+
1720
+ bool is_same_seq(const llama_kv_cell & other) const {
1721
+ return seq_id == other.seq_id;
1722
+ }
1723
  };
1724
 
1725
  // ring-buffer of cached KV data
1726
  struct llama_kv_cache {
1727
  bool has_shift = false;
1728
+ bool do_defrag = false;
1729
 
1730
  // Note: The value of head isn't only used to optimize searching
1731
  // for a free KV slot. llama_decode_internal also uses it, so it
 
1737
  // computed before each graph build
1738
  uint32_t n = 0;
1739
 
1740
+ ggml_type type_k = GGML_TYPE_F16;
1741
+ ggml_type type_v = GGML_TYPE_F16;
1742
+
1743
  std::vector<llama_kv_cell> cells;
1744
 
1745
  std::vector<struct ggml_tensor *> k_l; // per layer
 
1975
  static bool llama_kv_cache_init(
1976
  struct llama_kv_cache & cache,
1977
  const llama_model & model,
1978
+ ggml_type type_k,
1979
+ ggml_type type_v,
1980
  uint32_t n_ctx,
1981
  bool offload) {
1982
  const struct llama_hparams & hparams = model.hparams;
 
1991
  cache.size = n_ctx;
1992
  cache.used = 0;
1993
 
1994
+ cache.type_k = type_k;
1995
+ cache.type_v = type_v;
1996
+
1997
  cache.cells.clear();
1998
  cache.cells.resize(n_ctx);
1999
 
 
2034
 
2035
  for (int i = 0; i < (int) n_layer; i++) {
2036
  struct ggml_context * ctx = offload ? ctx_map.at(model.buft_layer[i].buft) : cache.ctxs.front();
2037
+ ggml_tensor * k = ggml_new_tensor_1d(ctx, type_k, n_embd_k_gqa*n_ctx);
2038
+ ggml_tensor * v = ggml_new_tensor_1d(ctx, type_v, n_embd_v_gqa*n_ctx);
2039
  ggml_format_name(k, "cache_k_l%d", i);
2040
  ggml_format_name(v, "cache_v_l%d", i);
2041
  cache.k_l.push_back(k);
 
2119
  // find how many cells are currently in use
2120
  static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
2121
  for (uint32_t i = cache.size - 1; i > 0; --i) {
2122
+ if (cache.cells[i].pos >= 0 && !cache.cells[i].is_empty()) {
2123
  return i + 1;
2124
  }
2125
  }
 
2155
  } else {
2156
  continue;
2157
  }
2158
+ if (cache.cells[i].is_empty()) {
2159
  // keep count of the number of used cells
2160
  if (cache.cells[i].pos >= 0) cache.used--;
2161
 
 
2206
  if (new_head != cache.size && new_head < cache.head) cache.head = new_head;
2207
  }
2208
 
2209
+ static void llama_kv_cache_seq_add(
2210
  struct llama_kv_cache & cache,
2211
  llama_seq_id seq_id,
2212
  llama_pos p0,
 
2224
  cache.cells[i].delta += delta;
2225
 
2226
  if (cache.cells[i].pos < 0) {
2227
+ if (!cache.cells[i].is_empty()) {
2228
+ cache.used--;
2229
+ }
2230
  cache.cells[i].pos = -1;
2231
  cache.cells[i].seq_id.clear();
2232
+ if (new_head == cache.size) {
2233
+ new_head = i;
2234
+ }
2235
  }
2236
  }
2237
  }
 
2263
  }
2264
  }
2265
 
2266
+ static llama_pos llama_kv_cache_seq_pos_max(struct llama_kv_cache & cache, llama_seq_id seq_id) {
2267
+ llama_pos result = 0;
2268
+
2269
+ for (uint32_t i = 0; i < cache.size; ++i) {
2270
+ if (cache.cells[i].has_seq_id(seq_id)) {
2271
+ result = std::max(result, cache.cells[i].pos);
2272
+ }
2273
+ }
2274
+
2275
+ return result;
2276
+ }
2277
+
2278
+ static void llama_kv_cache_defrag(struct llama_kv_cache & cache) {
2279
+ cache.do_defrag = true;
2280
+ }
2281
+
2282
  //
2283
  // model loading and saving
2284
  //
 
2350
  }
2351
  };
2352
 
2353
+ struct ArrayInfo {
2354
  const gguf_type gt;
2355
  const size_t length;
2356
  const void * data;
 
2369
  };
2370
 
2371
  template<typename T>
2372
+ class GKV : public GKV_Base<T> {
2373
  GKV() = delete;
2374
 
2375
  public:
 
2392
  return "unknown";
2393
  }
2394
 
2395
+ static bool validate_override(const llama_model_kv_override_type expected_type, const struct llama_model_kv_override * ovrd) {
2396
+ if (!ovrd) { return false; }
2397
+ if (ovrd->tag == expected_type) {
2398
  LLAMA_LOG_INFO("%s: Using metadata override (%5s) '%s' = ",
2399
+ __func__, override_type_to_str(ovrd->tag), ovrd->key);
2400
+ switch (ovrd->tag) {
2401
  case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
2402
+ LLAMA_LOG_INFO("%s\n", ovrd->bool_value ? "true" : "false");
2403
  } break;
2404
  case LLAMA_KV_OVERRIDE_TYPE_INT: {
2405
+ LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->int_value);
2406
  } break;
2407
  case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
2408
+ LLAMA_LOG_INFO("%.6f\n", ovrd->float_value);
2409
  } break;
2410
  default:
2411
  // Shouldn't be possible to end up here, but just in case...
2412
  throw std::runtime_error(
2413
  format("Unsupported attempt to override %s type for metadata key %s\n",
2414
+ override_type_to_str(ovrd->tag), ovrd->key));
2415
  }
2416
  return true;
2417
  }
2418
  LLAMA_LOG_WARN("%s: Warning: Bad metadata override type for key '%s', expected %s but got %s\n",
2419
+ __func__, ovrd->key, override_type_to_str(expected_type), override_type_to_str(ovrd->tag));
2420
  return false;
2421
  }
2422
 
2423
  template<typename OT>
2424
  static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
2425
+ try_override(OT & target, const struct llama_model_kv_override * ovrd) {
2426
+ if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, ovrd)) {
2427
+ target = ovrd->bool_value;
2428
  return true;
2429
  }
2430
  return false;
 
2432
 
2433
  template<typename OT>
2434
  static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
2435
+ try_override(OT & target, const struct llama_model_kv_override * ovrd) {
2436
+ if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, ovrd)) {
2437
+ target = ovrd->int_value;
2438
  return true;
2439
  }
2440
  return false;
 
2442
 
2443
  template<typename OT>
2444
  static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
2445
+ try_override(T & target, const struct llama_model_kv_override * ovrd) {
2446
+ if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, ovrd)) {
2447
+ target = ovrd->float_value;
2448
  return true;
2449
  }
2450
  return false;
 
2452
 
2453
  template<typename OT>
2454
  static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
2455
+ try_override(T & target, const struct llama_model_kv_override * ovrd) {
2456
  (void)target;
2457
+ (void)ovrd;
2458
+ if (!ovrd) { return false; }
2459
  // Currently, we should never end up here so it would be a bug if we do.
2460
  throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n",
2461
+ ovrd ? ovrd->key : "NULL"));
2462
  }
2463
 
2464
+ static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
2465
+ if (try_override<T>(target, ovrd)) {
2466
  return true;
2467
  }
2468
  if (k < 0) { return false; }
 
2470
  return true;
2471
  }
2472
 
2473
+ static bool set(const gguf_context * ctx, const char * key, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
2474
+ return set(ctx, gguf_find_key(ctx, key), target, ovrd);
2475
  }
2476
 
2477
+ static bool set(const gguf_context * ctx, const std::string & key, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
2478
+ return set(ctx, key.c_str(), target, ovrd);
2479
  }
2480
  };
2481
  }
 
2582
  case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
2583
  case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
2584
  case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break;
2585
+ case GGML_TYPE_IQ2_S: ftype = LLAMA_FTYPE_MOSTLY_IQ2_S; break;
2586
  case GGML_TYPE_IQ3_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ3_XXS; break;
2587
  case GGML_TYPE_IQ1_S: ftype = LLAMA_FTYPE_MOSTLY_IQ1_S; break;
2588
  case GGML_TYPE_IQ4_NL: ftype = LLAMA_FTYPE_MOSTLY_IQ4_NL; break;
2589
+ case GGML_TYPE_IQ4_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ4_XS; break;
2590
  case GGML_TYPE_IQ3_S: ftype = LLAMA_FTYPE_MOSTLY_IQ3_S; break;
2591
  default:
2592
  {
 
2888
  }
2889
  };
2890
 
2891
+ template<>
2892
+ bool llama_model_loader::get_key(const enum llm_kv kid, enum llama_pooling_type & result, const bool required) {
2893
+ uint32_t tmp;
2894
+ const bool found = get_key(kid, tmp, required);
2895
+ result = (enum llama_pooling_type) tmp;
2896
+ return found;
2897
+ }
2898
+
2899
+
2900
  //
2901
  // load LLaMA models
2902
  //
 
2938
  case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
2939
  case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XXS - 2.0625 bpw";
2940
  case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
2941
+ case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
2942
+ case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
2943
+ case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
2944
  case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
2945
  case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
2946
  case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
2947
+ case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
2948
  case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
2949
  case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
2950
 
 
2980
  default: return "?B";
2981
  }
2982
  }
2983
+
2984
  static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
2985
  switch (type) {
2986
+ case LLAMA_VOCAB_TYPE_SPM: return "SPM";
2987
+ case LLAMA_VOCAB_TYPE_BPE: return "BPE";
2988
+ case LLAMA_VOCAB_TYPE_WPM: return "WPM";
2989
+ default: return "unknown";
2990
  }
2991
  }
2992
 
 
2993
  static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
2994
  model.arch = ml.get_arch();
2995
  if (model.arch == LLM_ARCH_UNKNOWN) {
 
3166
  } break;
3167
  case LLM_ARCH_BERT:
3168
  {
3169
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
3170
+ ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
3171
  ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
3172
+ ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
3173
 
3174
  switch (hparams.n_layer) {
3175
  case 3:
 
3187
  } break;
3188
  case LLM_ARCH_NOMIC_BERT:
3189
  {
3190
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
3191
+ ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
3192
  ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
3193
+ ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
3194
 
3195
  if (hparams.n_layer == 12 && hparams.n_embd == 768) {
3196
  model.type = e_model::MODEL_137M;
 
3329
  if (hparams.f_max_alibi_bias > 0.0f) {
3330
  hparams.need_kq_pos = true;
3331
  }
3332
+
3333
+ hparams.rope_type = llama_rope_type(&model);
3334
  }
3335
 
3336
  // TODO: This should probably be in llama.h
 
3633
  LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff);
3634
  LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
3635
  LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
3636
+ LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type);
3637
+ LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type);
3638
  LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type);
3639
  LLAMA_LOG_INFO("%s: freq_base_train = %.1f\n", __func__, hparams.rope_freq_base_train);
3640
  LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train);
 
4656
 
4657
  using llm_build_cb = std::function<void(struct ggml_tensor * cur, const char * name, int nl)>;
4658
 
 
 
 
 
 
 
4659
  enum llm_ffn_op_type {
4660
  LLM_FFN_SILU,
4661
  LLM_FFN_GELU,
 
4701
  return inpL;
4702
  }
4703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4704
  static void llm_build_kv_store(
4705
  struct ggml_context * ctx,
4706
  const llama_hparams & hparams,
 
4902
  ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
4903
  }
4904
 
4905
+ #if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE)
4906
+ #pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, and Kompute")
4907
  #pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
4908
  #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
4909
  if (hparams.f_max_alibi_bias > 0.0f) {
 
5004
 
5005
  const int64_t n_embd;
5006
  const int64_t n_layer;
5007
+ const int64_t n_rot;
5008
  const int64_t n_ctx; // user-specified context size (can be different from n_ctx_train)
5009
  const int64_t n_head;
5010
  const int64_t n_head_kv;
 
5029
  const int32_t kv_head; // index of where we store new KV data in the cache
5030
  const int32_t n_orig_ctx;
5031
 
5032
+ const enum llama_pooling_type pooling_type;
5033
+ const enum llama_rope_type rope_type;
5034
 
5035
  const llm_build_cb & cb;
5036
 
 
5052
  kv_self (lctx.kv_self),
5053
  n_embd (hparams.n_embd),
5054
  n_layer (hparams.n_layer),
5055
+ n_rot (hparams.n_rot),
5056
  n_ctx (cparams.n_ctx),
5057
  n_head (hparams.n_head),
5058
  n_head_kv (hparams.n_head_kv),
 
5074
  n_kv (worst_case ? n_ctx : kv_self.n),
5075
  kv_head (worst_case ? n_ctx - n_tokens : kv_self.head),
5076
  n_orig_ctx (cparams.n_yarn_orig_ctx),
5077
+ pooling_type (cparams.do_pooling ? hparams.pooling_type : LLAMA_POOLING_TYPE_NONE),
5078
+ rope_type (hparams.rope_type),
5079
  cb (cb),
5080
  buf_compute_meta (lctx.buf_compute_meta) {
5081
  // all initializations should be done in init()
 
5098
  }
5099
  }
5100
 
5101
+ struct ggml_cgraph * build_k_shift() {
5102
+ struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
5103
+
5104
+ for (int il = 0; il < n_layer; ++il) {
5105
+ struct ggml_tensor * tmp =
5106
+ // we rotate only the first n_rot dimensions
5107
+ ggml_rope_custom_inplace(ctx0,
5108
+ ggml_view_3d(ctx0, kv_self.k_l[il],
5109
+ n_embd_head_k, n_head_kv, n_ctx,
5110
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_head_k),
5111
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
5112
+ 0),
5113
+ lctx.inp_K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
5114
+ ext_factor, attn_factor, beta_fast, beta_slow);
5115
+ cb(tmp, "K_shifted", il);
5116
+ ggml_build_forward_expand(gf, tmp);
5117
+ }
5118
+
5119
+ return gf;
5120
+ }
5121
+
5122
+ struct ggml_cgraph * build_defrag(const std::vector<uint32_t> & ids) {
5123
+ struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
5124
+
5125
+ for (uint32_t i = 0; i < ids.size(); ++i) {
5126
+ const uint32_t id = ids[i];
5127
+
5128
+ if (i == id || id == ids.size()) {
5129
+ continue;
5130
+ }
5131
+
5132
+ uint32_t nm = 1;
5133
+
5134
+ while (i + nm < ids.size() && ids[i + nm] == id + nm) {
5135
+ nm++;
5136
+ }
5137
+
5138
+ for (int il = 0; il < n_layer; ++il) {
5139
+ ggml_tensor * view_k_src = ggml_view_2d(ctx0, kv_self.k_l[il],
5140
+ n_embd_k_gqa, nm,
5141
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
5142
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*i));
5143
+
5144
+ ggml_tensor * view_k_dst = ggml_view_2d(ctx0, kv_self.k_l[il],
5145
+ n_embd_k_gqa, nm,
5146
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
5147
+ ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*id));
5148
+
5149
+ ggml_tensor * view_v_src = ggml_view_2d(ctx0, kv_self.v_l[il],
5150
+ nm, n_embd_v_gqa,
5151
+ ggml_row_size(kv_self.v_l[il]->type, kv_self.size),
5152
+ ggml_row_size(kv_self.v_l[il]->type, i));
5153
+
5154
+ ggml_tensor * view_v_dst = ggml_view_2d(ctx0, kv_self.v_l[il],
5155
+ nm, n_embd_v_gqa,
5156
+ ggml_row_size(kv_self.v_l[il]->type, kv_self.size),
5157
+ ggml_row_size(kv_self.v_l[il]->type, id));
5158
+
5159
+ ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_k_src, view_k_dst));
5160
+ ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_v_src, view_v_dst));
5161
+ }
5162
+
5163
+ i += nm - 1;
5164
+ }
5165
+
5166
+ //LLAMA_LOG_INFO("gf->n_nodes = %d\n", gf->n_nodes);
5167
+
5168
+ return gf;
5169
+ }
5170
+
5171
  struct ggml_cgraph * build_llama() {
5172
  struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
5173
 
 
5189
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5190
  cb(KQ_mask, "KQ_mask", -1);
5191
 
 
 
 
 
 
5192
  for (int il = 0; il < n_layer; ++il) {
5193
  struct ggml_tensor * inpSA = inpL;
5194
 
 
5224
 
5225
  Qcur = ggml_rope_custom(
5226
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
5227
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
5228
  ext_factor, attn_factor, beta_fast, beta_slow
5229
  );
5230
  cb(Qcur, "Qcur", il);
5231
 
5232
  Kcur = ggml_rope_custom(
5233
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
5234
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
5235
  ext_factor, attn_factor, beta_fast, beta_slow
5236
  );
5237
  cb(Kcur, "Kcur", il);
 
5372
  struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0);
5373
  cb(KQ_pos, "KQ_pos", -1);
5374
 
 
 
 
 
 
5375
  for (int il = 0; il < n_layer; ++il) {
5376
  struct ggml_tensor * inpSA = inpL;
5377
 
 
5395
  case MODEL_7B:
5396
  Qcur = ggml_rope_custom(
5397
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
5398
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
5399
  ext_factor, attn_factor, beta_fast, beta_slow
5400
  );
5401
  Kcur = ggml_rope_custom(
5402
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
5403
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
5404
  ext_factor, attn_factor, beta_fast, beta_slow
5405
  );
5406
  break;
 
5485
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5486
  cb(KQ_mask, "KQ_mask", -1);
5487
 
 
 
 
 
 
5488
  for (int il = 0; il < n_layer; ++il) {
5489
  struct ggml_tensor * attn_norm;
5490
 
 
5523
 
5524
  // using mode = 2 for neox mode
5525
  Qcur = ggml_rope_custom(
5526
+ ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
5527
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5528
  );
5529
  cb(Qcur, "Qcur", il);
5530
 
5531
  Kcur = ggml_rope_custom(
5532
+ ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
5533
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5534
  );
5535
  cb(Kcur, "Kcur", il);
 
5699
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
5700
  cb(KQ_mask, "KQ_mask", -1);
5701
 
 
 
 
 
5702
  for (int il = 0; il < n_layer; ++il) {
5703
  struct ggml_tensor * residual = inpL;
5704
 
 
5756
 
5757
  // RoPE the first n_rot of q/k, pass the other half, and concat.
5758
  struct ggml_tensor * qrot = ggml_view_3d(
5759
+ ctx0, tmpq, n_rot, n_head, n_tokens,
5760
  ggml_element_size(tmpq) * n_embd_head,
5761
  ggml_element_size(tmpq) * n_embd_head * n_head,
5762
  0
 
5764
  cb(qrot, "qrot", il);
5765
 
5766
  struct ggml_tensor * krot = ggml_view_3d(
5767
+ ctx0, tmpk, n_rot, n_head, n_tokens,
5768
  ggml_element_size(tmpk) * n_embd_head,
5769
  ggml_element_size(tmpk) * n_embd_head * n_head,
5770
  0
 
5773
 
5774
  // get the second half of tmpq, e.g tmpq[n_rot:, :, :]
5775
  struct ggml_tensor * qpass = ggml_view_3d(
5776
+ ctx0, tmpq, n_rot, n_head, n_tokens,
5777
  ggml_element_size(tmpq) * n_embd_head,
5778
  ggml_element_size(tmpq) * n_embd_head * n_head,
5779
+ ggml_element_size(tmpq) * n_rot
5780
  );
5781
  cb(qpass, "qpass", il);
5782
 
5783
  struct ggml_tensor * kpass = ggml_view_3d(
5784
+ ctx0, tmpk, n_rot, n_head, n_tokens,
5785
  ggml_element_size(tmpk) * n_embd_head,
5786
  ggml_element_size(tmpk) * n_embd_head * n_head,
5787
+ ggml_element_size(tmpk) * n_rot
5788
  );
5789
  cb(kpass, "kpass", il);
5790
 
5791
  struct ggml_tensor * qrotated = ggml_rope_custom(
5792
+ ctx0, qrot, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
5793
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5794
  );
5795
  cb(qrotated, "qrotated", il);
5796
 
5797
  struct ggml_tensor * krotated = ggml_rope_custom(
5798
+ ctx0, krot, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
5799
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
5800
  );
5801
  cb(krotated, "krotated", il);
 
6047
 
6048
  Qcur = ggml_rope_custom(
6049
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
6050
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6051
  ext_factor, attn_factor, beta_fast, beta_slow
6052
  );
6053
  cb(Qcur, "Qcur", il);
6054
 
6055
  Kcur = ggml_rope_custom(
6056
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6057
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6058
  ext_factor, attn_factor, beta_fast, beta_slow
6059
  );
6060
  cb(Kcur, "Kcur", il);
 
6343
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6344
  cb(KQ_mask, "KQ_mask", -1);
6345
 
 
 
 
 
 
6346
  for (int il = 0; il < n_layer; ++il) {
6347
  struct ggml_tensor * inpSA = inpL;
6348
 
 
6379
 
6380
  Qcur = ggml_rope_custom(
6381
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
6382
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6383
  ext_factor, attn_factor, beta_fast, beta_slow
6384
  );
6385
  cb(Qcur, "Qcur", il);
6386
 
6387
  Kcur = ggml_rope_custom(
6388
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6389
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6390
  ext_factor, attn_factor, beta_fast, beta_slow
6391
  );
6392
  cb(Kcur, "Kcur", il);
 
6461
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6462
  cb(KQ_mask, "KQ_mask", -1);
6463
 
 
 
 
 
 
6464
  for (int il = 0; il < n_layer; ++il) {
6465
  struct ggml_tensor * inpSA = inpL;
6466
 
 
6490
 
6491
  // using mode = 2 for neox mode
6492
  Qcur = ggml_rope_custom(
6493
+ ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
6494
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6495
  );
6496
  cb(Qcur, "Qcur", il);
6497
 
6498
  Kcur = ggml_rope_custom(
6499
+ ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
6500
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6501
  );
6502
  cb(Kcur, "Kcur", il);
 
6570
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6571
  cb(KQ_mask, "KQ_mask", -1);
6572
 
 
 
 
 
 
6573
  for (int il = 0; il < n_layer; ++il) {
6574
  struct ggml_tensor * inpSA = inpL;
6575
 
 
6605
 
6606
  Qcur = ggml_rope_custom(
6607
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
6608
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6609
  ext_factor, attn_factor, beta_fast, beta_slow
6610
  );
6611
  cb(Qcur, "Qcur", il);
6612
 
6613
  Kcur = ggml_rope_custom(
6614
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
6615
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6616
  ext_factor, attn_factor, beta_fast, beta_slow
6617
  );
6618
  cb(Kcur, "Kcur", il);
 
6686
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6687
  cb(KQ_mask, "KQ_mask", -1);
6688
 
 
 
 
 
 
6689
  for (int il = 0; il < n_layer; ++il) {
6690
  attn_norm_output = llm_build_norm(ctx0, inpL, hparams,
6691
  model.layers[il].attn_norm,
 
6723
  Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
6724
 
6725
  Qcur = ggml_rope_custom(
6726
+ ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
6727
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6728
  );
6729
  cb(Qcur, "Qcur", il);
 
6734
  cb(Qcur, "Qcur", il);
6735
 
6736
  Kcur = ggml_rope_custom(
6737
+ ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
6738
  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
6739
  );
6740
  cb(Kcur, "Kcur", il);
 
6803
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
6804
  cb(KQ_mask, "KQ_mask", -1);
6805
 
 
 
 
 
 
6806
  for (int il = 0; il < n_layer; ++il) {
6807
 
6808
  // norm
 
6826
  cb(Vcur, "Vcur", il);
6827
 
6828
  Qcur = ggml_rope_custom(
6829
+ ctx0, ggml_reshape_3d(ctx0, Qcur, n_rot, n_head, n_tokens), inp_pos,
6830
+ n_embd_head, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6831
  ext_factor, attn_factor, beta_fast, beta_slow);
6832
  cb(Qcur, "Qcur", il);
6833
 
6834
  Kcur = ggml_rope_custom(
6835
+ ctx0, ggml_reshape_3d(ctx0, Kcur, n_rot, n_head_kv, n_tokens), inp_pos,
6836
+ n_embd_head, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
6837
  ext_factor, attn_factor, beta_fast, beta_slow);
6838
  cb(Kcur, "Kcur", il);
6839
 
 
7003
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7004
  cb(KQ_mask, "KQ_mask", -1);
7005
 
 
 
 
 
 
7006
  for (int il = 0; il < n_layer; ++il) {
7007
  cur = llm_build_norm(ctx0, inpL, hparams,
7008
  model.layers[il].attn_norm,
 
7028
 
7029
  struct ggml_tensor * Qcur = ggml_rope_custom(
7030
  ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, n_tokens), inp_pos,
7031
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7032
  ext_factor, attn_factor, beta_fast, beta_slow
7033
  );
7034
  cb(Qcur, "Qcur", il);
7035
 
7036
  struct ggml_tensor * Kcur = ggml_rope_custom(
7037
  ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, n_tokens), inp_pos,
7038
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7039
  ext_factor, attn_factor, beta_fast, beta_slow
7040
  );
7041
  cb(Kcur, "Kcur", il);
 
7106
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7107
  cb(KQ_mask, "KQ_mask", -1);
7108
 
 
 
 
 
 
7109
  for (int il = 0; il < n_layer; ++il) {
7110
  struct ggml_tensor * inpSA = inpL;
7111
 
 
7141
 
7142
  Qcur = ggml_rope_custom(
7143
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7144
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7145
  ext_factor, attn_factor, beta_fast, beta_slow
7146
  );
7147
  cb(Qcur, "Qcur", il);
7148
 
7149
  Kcur = ggml_rope_custom(
7150
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7151
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7152
  ext_factor, attn_factor, beta_fast, beta_slow
7153
  );
7154
  cb(Kcur, "Kcur", il);
 
7220
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7221
  cb(KQ_mask, "KQ_mask", -1);
7222
 
 
 
 
 
 
7223
  for (int il = 0; il < n_layer; ++il) {
7224
  struct ggml_tensor * inpSA = inpL;
7225
 
 
7255
 
7256
  Qcur = ggml_rope_custom(
7257
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7258
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7259
  ext_factor, attn_factor, beta_fast, beta_slow
7260
  );
7261
  cb(Qcur, "Qcur", il);
7262
 
7263
  Kcur = ggml_rope_custom(
7264
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7265
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7266
  ext_factor, attn_factor, beta_fast, beta_slow
7267
  );
7268
  cb(Kcur, "Kcur", il);
 
7347
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7348
  cb(KQ_mask, "KQ_mask", -1);
7349
 
 
 
 
 
 
7350
  for (int il = 0; il < n_layer; ++il) {
7351
  struct ggml_tensor * inpSA = inpL;
7352
 
 
7382
 
7383
  Qcur = ggml_rope_custom(
7384
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
7385
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7386
  ext_factor, attn_factor, beta_fast, beta_slow
7387
  );
7388
  cb(Qcur, "Qcur", il);
7389
 
7390
  Kcur = ggml_rope_custom(
7391
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
7392
+ n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7393
  ext_factor, attn_factor, beta_fast, beta_slow
7394
  );
7395
  cb(Kcur, "Kcur", il);
 
7478
  struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
7479
  cb(KQ_mask, "KQ_mask", -1);
7480
 
 
 
 
 
 
7481
  for (int il = 0; il < n_layer; ++il) {
7482
 
7483
  // norm
 
7500
 
7501
  Qcur = ggml_rope_custom(
7502
  ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos,
7503
+ n_embd_head_k, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7504
  ext_factor, attn_factor, beta_fast, beta_slow);
7505
  cb(Qcur, "Qcur", il);
7506
 
 
7509
 
7510
  Kcur = ggml_rope_custom(
7511
  ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos,
7512
+ n_embd_head_k, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
7513
  ext_factor, attn_factor, beta_fast, beta_slow);
7514
  cb(Kcur, "Kcur", il);
7515
 
 
7562
  }
7563
  };
7564
 
7565
+ static struct ggml_cgraph * llama_build_graph_defrag(llama_context & lctx, const std::vector<uint32_t> & ids) {
7566
+ llama_batch dummy;
7567
+ dummy.n_tokens = 0;
7568
+
7569
+ llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
7570
+
7571
+ struct llm_build_context llm(lctx, dummy, cb, false);
7572
+
7573
+ llm.init();
7574
+
7575
+ struct ggml_cgraph * result = llm.build_defrag(ids);
7576
+
7577
+ llm.free();
7578
+
7579
+ return result;
7580
+ }
7581
+
7582
+ static struct ggml_cgraph * llama_build_graph_k_shift(llama_context & lctx) {
7583
+ llama_batch dummy;
7584
+ dummy.n_tokens = 0;
7585
+
7586
+ llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
7587
+
7588
+ struct llm_build_context llm(lctx, dummy, cb, false);
7589
+
7590
+ llm.init();
7591
+
7592
+ struct ggml_cgraph * result = llm.build_k_shift();
7593
+
7594
+ llm.free();
7595
+
7596
+ return result;
7597
+ }
7598
+
7599
  static struct ggml_cgraph * llama_build_graph(
7600
  llama_context & lctx,
7601
  const llama_batch & batch,
 
7715
  return result;
7716
  }
7717
 
7718
+ static void llama_set_k_shift(llama_context & lctx) {
7719
+ const auto & cparams = lctx.cparams;
7720
+
7721
+ const int64_t n_ctx = cparams.n_ctx;
7722
+
7723
+ assert(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer));
7724
+
7725
+ int32_t * data = (int32_t *) lctx.inp_K_shift->data;
7726
+
7727
+ for (int i = 0; i < n_ctx; ++i) {
7728
+ data[i] = lctx.kv_self.cells[i].delta;
7729
+ }
7730
+ }
7731
+
7732
  static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
7733
  //
7734
  // set input data
 
7796
  }
7797
  }
7798
 
 
 
 
 
 
 
 
 
 
 
 
 
7799
  if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
7800
  const int64_t n_tokens = batch.n_tokens;
7801
 
 
7840
  }
7841
  }
7842
 
7843
+ static void llama_graph_compute(
7844
+ llama_context & lctx,
7845
+ ggml_cgraph * gf,
7846
+ int n_threads) {
7847
+ #ifdef GGML_USE_MPI
7848
+ const int64_t n_layer = lctx.model.hparams.n_layer;
7849
+ ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
7850
+ #endif
7851
+
7852
+ #ifdef GGML_USE_METAL
7853
+ if (ggml_backend_is_metal(lctx.backend_metal)) {
7854
+ ggml_backend_metal_set_n_cb(lctx.backend_metal, n_threads);
7855
+ }
7856
+ #endif
7857
+
7858
+ if (lctx.backend_cpu != nullptr) {
7859
+ ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
7860
+ }
7861
+
7862
+ ggml_backend_sched_graph_compute(lctx.sched, gf);
7863
+
7864
+ // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
7865
+
7866
+ #ifdef GGML_USE_MPI
7867
+ ggml_mpi_graph_compute_post(lctx.ctx_mpi, gf, n_layer);
7868
+ #endif
7869
+ }
7870
+
7871
  // decode a batch of tokens by evaluating the transformer
7872
  //
7873
  // - lctx: llama context
 
7945
  batch.seq_id = seq_id_arr.data();
7946
  }
7947
 
7948
+ llama_kv_cache_update(&lctx);
7949
+
7950
  // if we have enough unused cells before the current head ->
7951
  // better to start searching from the beginning of the cache, hoping to fill it
7952
  if (kv_self.head > kv_self.used + 2*n_tokens) {
 
7971
  ggml_cgraph * gf = llama_build_graph(lctx, batch, false);
7972
 
7973
  // the output is always the last tensor in the graph
7974
+ struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
7975
  struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
7976
+
7977
  if (strcmp(res->name, "result_output") == 0) {
7978
  // the embeddings could be the second to last tensor, or the third to last tensor
7979
  if (strcmp(embeddings->name, "result_norm") != 0) {
 
8000
  n_threads = std::min(4, n_threads);
8001
  }
8002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8003
  llama_set_inputs(lctx, batch);
8004
 
8005
+ llama_graph_compute(lctx, gf, n_threads);
 
 
 
 
 
 
8006
 
8007
  // update the kv ring buffer
8008
  {
 
 
 
 
 
 
 
8009
  kv_self.head += n_tokens;
8010
 
8011
  // Ensure kv cache head points to a valid index.
 
8014
  }
8015
  }
8016
 
8017
+ // decide if we need to defrag the kv cache
8018
+ if (cparams.defrag_thold >= 0.0f) {
8019
+ const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens)/float(kv_self.n) : 0.0f;
8020
+
8021
+ // queue defragmentation for next llama_kv_cache_update
8022
+ if (fragmentation > cparams.defrag_thold) {
8023
+ //LLAMA_LOG_INFO("fragmentation: %.2f\n", fragmentation);
8024
+
8025
+ llama_kv_cache_defrag(kv_self);
8026
+ }
8027
+ }
8028
+
8029
  #ifdef GGML_PERF
8030
  // print timing information per ggml operation (for debugging purposes)
8031
  // requires GGML_PERF to be defined
 
8113
  return 0;
8114
  }
8115
 
8116
+ // find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
8117
+ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
8118
+ auto & kv_self = lctx.kv_self;
8119
+
8120
+ const auto & hparams = lctx.model.hparams;
8121
+
8122
+ const uint32_t n_layer = hparams.n_layer;
8123
+
8124
+ const uint32_t n_kv = llama_kv_cache_cell_max(kv_self);
8125
+ const uint32_t n_used = kv_self.used;
8126
+
8127
+ assert(n_used <= n_kv);
8128
+
8129
+ //const int64_t t_start = ggml_time_us();
8130
+
8131
+ // number of cells moved
8132
+ uint32_t n_moves = 0;
8133
+
8134
+ // determine which KV cells to move where
8135
+ //
8136
+ // cell i moves to ids[i]
8137
+ //
8138
+ // if ids[i] == i || ids[i] == n_kv, then cell i is not moved
8139
+ //
8140
+ std::vector<uint32_t> ids(n_kv, n_kv);
8141
+
8142
+ for (uint32_t i0 = 0; i0 < n_used; ++i0) {
8143
+ const auto & cell0 = kv_self.cells[i0];
8144
+
8145
+ if (!cell0.is_empty()) {
8146
+ ids[i0] = i0;
8147
+
8148
+ continue;
8149
+ }
8150
+
8151
+ // found a hole - fill it with data from the end of the cache
8152
+
8153
+ uint32_t nh = 1;
8154
+
8155
+ // determine the size of the hole
8156
+ while (i0 + nh < n_used && kv_self.cells[i0 + nh].is_empty()) {
8157
+ nh++;
8158
+ }
8159
+
8160
+ // each move requires 6*n_layer tensors (see build_defrag)
8161
+ // - source view, destination view, copy operation
8162
+ // - x2 for keys and values
8163
+ //
8164
+ if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) {
8165
+ // the graph is too big, we cannot move more cells
8166
+ break;
8167
+ }
8168
+
8169
+ uint32_t nf = 0;
8170
+ uint32_t is = n_kv - 1;
8171
+
8172
+ // starting from the end, find nh non-empty cells
8173
+ for (; is > i0; --is) {
8174
+ const auto & cell1 = kv_self.cells[is];
8175
+
8176
+ if (cell1.is_empty() || ids[is] != n_kv) {
8177
+ continue;
8178
+ }
8179
+
8180
+ // non-empty cell which is not yet moved
8181
+ nf++;
8182
+
8183
+ if (nf == nh) {
8184
+ break;
8185
+ }
8186
+ }
8187
+
8188
+ // this can only happen if `n_used` is not accurate, which would be a bug
8189
+ GGML_ASSERT(nf == nh && "KV defrag bug: nf != nh");
8190
+
8191
+ nf = 0;
8192
+
8193
+ uint32_t i1 = is;
8194
+
8195
+ // are we moving a continuous block of memory?
8196
+ bool cont = false;
8197
+
8198
+ // go back and move the nf cells to the hole
8199
+ for (; i1 < n_kv; ++i1) {
8200
+ auto & cell1 = kv_self.cells[i1];
8201
+
8202
+ if (cell1.is_empty() || ids[i1] != n_kv) {
8203
+ cont = false;
8204
+ continue;
8205
+ }
8206
+
8207
+ // this cell goes to (i0 + nf)
8208
+ ids[i1] = i0 + nf;
8209
+
8210
+ // move the cell meta data
8211
+ kv_self.cells[i0 + nf] = cell1;
8212
+
8213
+ // clear the old cell and move the head there
8214
+ cell1 = llama_kv_cell();
8215
+ kv_self.head = n_used;
8216
+
8217
+ if (!cont) {
8218
+ n_moves++;
8219
+ cont = true;
8220
+ }
8221
+
8222
+ nf++;
8223
+
8224
+ if (nf == nh) {
8225
+ break;
8226
+ }
8227
+ }
8228
+
8229
+ //LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh);
8230
+
8231
+ i0 += nh - 1;
8232
+ }
8233
+
8234
+ if (n_moves == 0) {
8235
+ return;
8236
+ }
8237
+
8238
+ //LLAMA_LOG_INFO("(tmp log) KV defrag cell moves: %u\n", n_moves);
8239
+
8240
+ //LLAMA_LOG_INFO("expected gf nodes: %u\n", 6*n_moves*n_layer);
8241
+
8242
+ #if 0
8243
+ // CPU defrag
8244
+ //
8245
+ // TODO: optimizations are possible:
8246
+ // - multiple threads
8247
+ // - avoid copying to the host memory when already there
8248
+ //
8249
+ // likely not worth the effort, as we have ggml_graph based defrag
8250
+ //
8251
+
8252
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
8253
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
8254
+
8255
+ const uint32_t kv_size = kv_self.size;
8256
+
8257
+ std::vector<uint8_t> buf_k;
8258
+ std::vector<uint8_t> buf_v;
8259
+
8260
+ for (uint32_t il = 0; il < n_layer; ++il) {
8261
+ const size_t k_size_row = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa);
8262
+ const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_size);
8263
+
8264
+ const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type);
8265
+ const size_t v_size = ggml_row_size (kv_self.v_l[il]->type, n_embd_v_gqa*kv_size);
8266
+
8267
+ buf_k.resize(k_size);
8268
+ buf_v.resize(v_size);
8269
+
8270
+ ggml_backend_tensor_get(kv_self.k_l[il], buf_k.data(), 0, buf_k.size());
8271
+ ggml_backend_tensor_get(kv_self.v_l[il], buf_v.data(), 0, buf_v.size());
8272
+
8273
+ // batch move [i, i+nm) to [id, id+nm)
8274
+ // note: cells can move only to a lower index
8275
+ for (uint32_t i = 0; i < n_kv; ++i) {
8276
+ const uint32_t id = ids[i];
8277
+
8278
+ if (i == id || id == n_kv) {
8279
+ continue;
8280
+ }
8281
+
8282
+ uint32_t nm = 1;
8283
+
8284
+ while (i + nm < n_kv && ids[i + nm] == id + nm) {
8285
+ nm++;
8286
+ }
8287
+
8288
+ // move keys
8289
+ {
8290
+ const int64_t os = i*k_size_row;
8291
+ const int64_t od = id*k_size_row;
8292
+
8293
+ memcpy(buf_k.data() + od, buf_k.data() + os, nm*k_size_row);
8294
+ }
8295
+
8296
+ // move values (note: they are transposed)
8297
+ {
8298
+ const int64_t os = i;
8299
+ const int64_t od = id;
8300
+
8301
+ for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
8302
+ memcpy(buf_v.data() + (od + j*kv_size)*v_size_el, buf_v.data() + (os + j*kv_size)*v_size_el, nm*v_size_el);
8303
+ }
8304
+ }
8305
+
8306
+ i += nm - 1;
8307
+ }
8308
+
8309
+ ggml_backend_tensor_set(kv_self.k_l[il], buf_k.data(), 0, buf_k.size());
8310
+ ggml_backend_tensor_set(kv_self.v_l[il], buf_v.data(), 0, buf_v.size());
8311
+ }
8312
+ #else
8313
+ // ggml_graph defrag
8314
+
8315
+ ggml_cgraph * gf = llama_build_graph_defrag(lctx, ids);
8316
+
8317
+ llama_graph_compute(lctx, gf, lctx.cparams.n_threads);
8318
+ #endif
8319
+
8320
+ //const int64_t t_end = ggml_time_us();
8321
+
8322
+ //LLAMA_LOG_INFO("(tmp log) KV defrag time: %.3f ms\n", (t_end - t_start)/1000.0);
8323
+ }
8324
+
8325
+ static void llama_kv_cache_update_internal(struct llama_context & lctx) {
8326
+ // apply K-shift if needed
8327
+ if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
8328
+ llama_set_k_shift(lctx);
8329
+
8330
+ {
8331
+ ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
8332
+
8333
+ llama_graph_compute(lctx, gf, lctx.cparams.n_threads);
8334
+ }
8335
+
8336
+ {
8337
+ auto & kv_self = lctx.kv_self;
8338
+
8339
+ kv_self.has_shift = false;
8340
+
8341
+ for (uint32_t i = 0; i < kv_self.size; ++i) {
8342
+ kv_self.cells[i].delta = 0;
8343
+ }
8344
+ }
8345
+ }
8346
+
8347
+ // defragment the KV cache if needed
8348
+ if (lctx.kv_self.do_defrag) {
8349
+ llama_kv_cache_defrag_internal(lctx);
8350
+
8351
+ lctx.kv_self.do_defrag = false;
8352
+ }
8353
+ }
8354
+
8355
  //
8356
  // tokenizer
8357
  //
 
8943
  }
8944
 
8945
  std::vector<std::string> preprocess(const std::string & text) {
8946
+ // normalalization form D
8947
+ std::vector<uint32_t> codepoints = codepoints_from_utf8(text);
8948
+ std::vector<uint32_t> nfd_codepoints;
8949
+ for (uint32_t code : codepoints) {
8950
+ auto it = nfd_map.find(code);
8951
+ if (it != nfd_map.end()) {
8952
+ for (uint32_t c : it->second) {
8953
+ nfd_codepoints.push_back(c);
8954
+ }
8955
+ } else {
8956
+ nfd_codepoints.push_back(code);
8957
+ }
8958
+ }
8959
 
8960
+ // strip accents, strip control, uniformize whitespace,
8961
+ // to lowercase, pad chinese characters, pad punctuation
 
8962
  std::string new_str = "";
8963
+ for (uint32_t code : nfd_codepoints) {
8964
+ int type = codepoint_type(code);
8965
+ if (type == CODEPOINT_TYPE_ACCENT_MARK || type == CODEPOINT_TYPE_CONTROL) {
8966
+ continue;
8967
+ }
8968
+ code = to_lower(code);
8969
+ if (type == CODEPOINT_TYPE_WHITESPACE) {
8970
+ code = ' ';
8971
  }
8972
+ std::string s = codepoint_to_utf8(code);
8973
+ if (type == CODEPOINT_TYPE_PUNCTUATION || is_ascii_punct(code) || is_chinese_char(code)) {
8974
  new_str += " ";
8975
+ new_str += s;
8976
  new_str += " ";
8977
+ } else {
8978
+ new_str += s;
 
 
 
8979
  }
8980
  }
8981
 
8982
  // split by whitespace
8983
  uint64_t l = 0;
8984
  uint64_t r = 0;
8985
+ std::vector<std::string> words;
8986
  while (r < new_str.size()) {
8987
  // if is whitespace
8988
  if (isspace(new_str[r])) {
 
9000
  return words;
9001
  }
9002
 
9003
+ uint32_t to_lower(uint32_t code) {
9004
+ #if defined(_WIN32)
9005
+ if (code > 0xFFFF) {
9006
+ return code;
 
 
 
 
9007
  }
9008
+ #endif
9009
+ return std::tolower(wchar_t(code), std::locale("en_US.UTF-8"));
9010
  }
9011
 
9012
+ bool is_ascii_punct(uint32_t code) {
9013
+ return code < 256 && ispunct(code);
9014
+ }
9015
+
9016
+ bool is_chinese_char(uint32_t codepoint) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9017
  if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) ||
9018
  (codepoint >= 0x3400 && codepoint <= 0x4DBF) ||
9019
  (codepoint >= 0x20000 && codepoint <= 0x2A6DF) ||
 
9029
  return false;
9030
  }
9031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9032
  const llama_vocab & vocab;
9033
  };
9034
 
 
10754
  if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
10755
  new_type = GGML_TYPE_Q8_0;
10756
  }
10757
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
10758
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
10759
  new_type = GGML_TYPE_Q5_K;
10760
  }
10761
  else if (new_type != GGML_TYPE_Q8_0) {
10762
  new_type = GGML_TYPE_Q6_K;
10763
  }
10764
  } else if (name == "token_embd.weight") {
10765
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
10766
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
10767
  new_type = GGML_TYPE_Q2_K;
10768
  }
10769
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
10770
+ new_type = GGML_TYPE_IQ3_S;
10771
+ }
10772
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10773
+ new_type = GGML_TYPE_IQ3_S;
10774
  }
10775
+ } else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
10776
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
10777
  if (name.find("attn_v.weight") != std::string::npos) {
10778
  if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
10779
+ else new_type = ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ? GGML_TYPE_IQ3_S : GGML_TYPE_Q2_K;
10780
  ++qs.i_attention_wv;
10781
  }
10782
+ else if (qs.model.hparams.n_expert == 8 && name.find("attn_k.weight") != std::string::npos) {
10783
+ new_type = GGML_TYPE_Q4_K;
10784
+ }
10785
  else if (name.find("ffn_down") != std::string::npos) {
10786
+ if (qs.i_ffn_down < qs.n_ffn_down/8) {
10787
+ new_type = ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ? GGML_TYPE_IQ3_S : GGML_TYPE_Q2_K;
10788
+ }
10789
  ++qs.i_ffn_down;
10790
  }
10791
  else if (name.find("attn_output.weight") != std::string::npos) {
10792
+ if (qs.model.hparams.n_expert == 8) {
10793
+ new_type = GGML_TYPE_Q5_K;
10794
+ } else {
10795
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) new_type = GGML_TYPE_IQ2_XXS;
10796
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_S;
10797
+ }
10798
  }
10799
  } else if (name.find("attn_v.weight") != std::string::npos) {
10800
  if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
 
10804
  new_type = GGML_TYPE_Q4_K;
10805
  }
10806
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10807
+ new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
10808
+ }
10809
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
10810
+ new_type = GGML_TYPE_Q4_K;
10811
+ }
10812
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
10813
+ new_type = GGML_TYPE_Q4_K;
10814
  }
10815
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
10816
  new_type = GGML_TYPE_Q4_K;
 
10822
  new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
10823
  }
10824
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
10825
+ else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && qs.model.hparams.n_gqa() >= 4) {
10826
  new_type = GGML_TYPE_Q5_K;
10827
  }
10828
  else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
 
10848
  // TODO: explore better strategies
10849
  new_type = GGML_TYPE_Q8_0;
10850
  }
10851
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
10852
  new_type = GGML_TYPE_IQ3_XXS;
10853
  }
10854
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10855
+ new_type = GGML_TYPE_IQ2_S;
10856
+ }
10857
  } else if (name.find("attn_q.weight") != std::string::npos) {
10858
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
10859
  new_type = GGML_TYPE_IQ3_XXS;
10860
  }
10861
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
10862
+ new_type = GGML_TYPE_IQ2_S;
10863
+ }
10864
  } else if (name.find("ffn_down") != std::string::npos) {
10865
  auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
10866
  int i_layer = info.first, n_layer = info.second;
 
10891
  if (use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
10892
  }
10893
  }
10894
+ else if (i_layer < n_layer/8 && (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && !qs.has_imatrix) {
10895
+ new_type = GGML_TYPE_Q5_K;
10896
  }
10897
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M && use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
10898
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && arch != LLM_ARCH_FALCON && i_layer < n_layer/8) {
 
10909
  } else if (name.find("attn_output.weight") != std::string::npos) {
10910
  if (arch != LLM_ARCH_FALCON) {
10911
  if (qs.model.hparams.n_expert == 8) {
10912
+ if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
10913
  ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
10914
  ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
10915
+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) {
10916
  new_type = GGML_TYPE_Q5_K;
10917
  }
10918
  } else {
10919
  if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
10920
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;
10921
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
10922
  else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ) new_type = GGML_TYPE_Q5_K;
10923
  else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ) new_type = GGML_TYPE_Q4_K;
 
10936
  else if (name.find("ffn_gate") != std::string::npos) {
10937
  auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
10938
  int i_layer = info.first, n_layer = info.second;
10939
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
10940
  new_type = GGML_TYPE_IQ3_XXS;
10941
  }
10942
  ++qs.i_ffn_gate;
 
10944
  else if (name.find("ffn_up") != std::string::npos) {
10945
  auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
10946
  int i_layer = info.first, n_layer = info.second;
10947
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
10948
  new_type = GGML_TYPE_IQ3_XXS;
10949
  }
10950
  ++qs.i_ffn_up;
 
10963
  //}
10964
  bool convert_incompatible_tensor = false;
10965
  if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
10966
+ new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K || new_type == GGML_TYPE_IQ4_XS ||
10967
+ new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_S ||
10968
  new_type == GGML_TYPE_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || new_type == GGML_TYPE_IQ3_S) {
10969
  int nx = tensor->ne[0];
10970
  int ny = tensor->ne[1];
 
10979
  switch (new_type) {
10980
  case GGML_TYPE_IQ2_XXS:
10981
  case GGML_TYPE_IQ2_XS:
10982
+ case GGML_TYPE_IQ2_S:
10983
  case GGML_TYPE_IQ3_XXS:
10984
  case GGML_TYPE_IQ3_S:
10985
  case GGML_TYPE_IQ1_S:
10986
  case GGML_TYPE_Q2_K:
10987
+ case GGML_TYPE_Q3_K:
10988
+ case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_IQ4_NL; break;
10989
+ case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
10990
+ case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
10991
+ case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
10992
  default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
10993
  }
10994
  LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
 
11014
  // K-quants
11015
  case LLAMA_FTYPE_MOSTLY_Q2_K_S:
11016
  case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
11017
+ case LLAMA_FTYPE_MOSTLY_IQ3_XS: quantized_type = GGML_TYPE_IQ3_S; break;
11018
  case LLAMA_FTYPE_MOSTLY_Q3_K_S:
11019
  case LLAMA_FTYPE_MOSTLY_Q3_K_M:
11020
  case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
 
11025
  case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
11026
  case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break;
11027
  case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break;
11028
+ case LLAMA_FTYPE_MOSTLY_IQ2_S: quantized_type = GGML_TYPE_IQ2_XS; break;
11029
+ case LLAMA_FTYPE_MOSTLY_IQ2_M: quantized_type = GGML_TYPE_IQ2_S; break;
11030
  case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break;
11031
  case LLAMA_FTYPE_MOSTLY_IQ1_S: quantized_type = GGML_TYPE_IQ1_S; break;
11032
  case LLAMA_FTYPE_MOSTLY_IQ4_NL: quantized_type = GGML_TYPE_IQ4_NL; break;
11033
+ case LLAMA_FTYPE_MOSTLY_IQ4_XS: quantized_type = GGML_TYPE_IQ4_XS; break;
11034
  case LLAMA_FTYPE_MOSTLY_IQ3_S: quantized_type = GGML_TYPE_IQ3_S; break;
11035
  case LLAMA_FTYPE_MOSTLY_IQ3_M: quantized_type = GGML_TYPE_IQ3_S; break;
11036
 
 
11162
  quantize &= !params->only_copy;
11163
 
11164
  // do not quantize expert gating tensors
11165
+ // NOTE: can't use LLM_TN here because the layer number is not known
11166
+ quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
11167
 
11168
  // do not quantize positional embeddings and token types (BERT)
11169
  quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight");
 
11207
  }
11208
  if ((new_type == GGML_TYPE_IQ2_XXS ||
11209
  new_type == GGML_TYPE_IQ2_XS ||
11210
+ new_type == GGML_TYPE_IQ2_S ||
11211
  new_type == GGML_TYPE_IQ1_S ||
11212
  (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
11213
  LLAMA_LOG_ERROR("\n\n============================================================\n");
 
11663
  /*.yarn_beta_fast =*/ 32.0f,
11664
  /*.yarn_beta_slow =*/ 1.0f,
11665
  /*.yarn_orig_ctx =*/ 0,
11666
+ /*.defrag_thold =*/ -1.0f,
11667
  /*.cb_eval =*/ nullptr,
11668
  /*.cb_eval_user_data =*/ nullptr,
11669
  /*.type_k =*/ GGML_TYPE_F16,
 
11828
  cparams.yarn_attn_factor = params.yarn_attn_factor;
11829
  cparams.yarn_beta_fast = params.yarn_beta_fast;
11830
  cparams.yarn_beta_slow = params.yarn_beta_slow;
11831
+ cparams.defrag_thold = params.defrag_thold;
11832
  cparams.mul_mat_q = params.mul_mat_q;
11833
  cparams.offload_kqv = params.offload_kqv;
11834
  cparams.do_pooling = params.do_pooling;
 
11951
  }
11952
  ctx->backends.push_back(ctx->backend_cpu);
11953
 
11954
+ if (!llama_kv_cache_init(ctx->kv_self, ctx->model, type_k, type_v, cparams.n_ctx, cparams.offload_kqv)) {
 
11955
  LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
11956
  llama_free(ctx);
11957
  return nullptr;
 
12030
  }
12031
 
12032
  // buffer used to store the computation graph and the tensor meta data
12033
+ ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead_custom(LLAMA_MAX_NODES, false));
12034
 
12035
  ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES);
12036
 
 
12099
  return model->vocab.type;
12100
  }
12101
 
12102
+ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
12103
+ switch (model->arch) {
12104
+ // these models do not use RoPE
12105
+ case LLM_ARCH_GPT2:
12106
+ case LLM_ARCH_GPTJ:
12107
+ case LLM_ARCH_GPTNEOX:
12108
+ case LLM_ARCH_MPT:
12109
+ case LLM_ARCH_REFACT:
12110
+ case LLM_ARCH_BLOOM:
12111
+ return LLAMA_ROPE_TYPE_NONE;
12112
+
12113
+ // use what we call a normal RoPE, operating on pairs of consecutive head values
12114
+ case LLM_ARCH_LLAMA:
12115
+ case LLM_ARCH_BAICHUAN:
12116
+ case LLM_ARCH_STARCODER:
12117
+ case LLM_ARCH_PLAMO:
12118
+ case LLM_ARCH_CODESHELL:
12119
+ case LLM_ARCH_ORION:
12120
+ case LLM_ARCH_INTERNLM2:
12121
+ case LLM_ARCH_MINICPM:
12122
+ return LLAMA_ROPE_TYPE_NORM;
12123
+
12124
+ // the pairs of head values are offset by n_rot/2
12125
+ case LLM_ARCH_FALCON:
12126
+ case LLM_ARCH_PERSIMMON:
12127
+ case LLM_ARCH_BERT:
12128
+ case LLM_ARCH_NOMIC_BERT:
12129
+ case LLM_ARCH_STABLELM:
12130
+ case LLM_ARCH_QWEN:
12131
+ case LLM_ARCH_QWEN2:
12132
+ case LLM_ARCH_PHI2:
12133
+ case LLM_ARCH_GEMMA:
12134
+ return LLAMA_ROPE_TYPE_NEOX;
12135
+
12136
+ // all model arches should be listed explicitly here
12137
+ case LLM_ARCH_UNKNOWN:
12138
+ GGML_ASSERT(false && "unknown architecture");
12139
+ break;
12140
+ }
12141
+
12142
+ return LLAMA_ROPE_TYPE_NONE;
12143
+ }
12144
+
12145
  int32_t llama_n_vocab(const struct llama_model * model) {
12146
  return model->vocab.id_to_token.size();
12147
  }
 
12384
  llama_kv_cache_seq_keep(ctx->kv_self, seq_id);
12385
  }
12386
 
12387
+ void llama_kv_cache_seq_add(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) {
12388
  if (delta == 0) {
12389
  return;
12390
  }
12391
 
12392
+ llama_kv_cache_seq_add(ctx->kv_self, seq_id, p0, p1, delta);
12393
  }
12394
 
12395
  void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
 
12400
  llama_kv_cache_seq_div(ctx->kv_self, seq_id, p0, p1, d);
12401
  }
12402
 
12403
+ llama_pos llama_kv_cache_seq_pos_max(struct llama_context * ctx, llama_seq_id seq_id) {
12404
+ return llama_kv_cache_seq_pos_max(ctx->kv_self, seq_id);
12405
+ }
12406
+
12407
+ void llama_kv_cache_defrag(struct llama_context * ctx) {
12408
+ llama_kv_cache_defrag(ctx->kv_self);
12409
+ }
12410
+
12411
+ void llama_kv_cache_update(struct llama_context * ctx) {
12412
+ llama_kv_cache_update_internal(*ctx);
12413
+ }
12414
+
12415
+
12416
  // Returns the *maximum* size of the state
12417
  size_t llama_get_state_size(const struct llama_context * ctx) {
12418
  // we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
 
12539
  const auto & hparams = ctx->model.hparams;
12540
  const auto & cparams = ctx->cparams;
12541
 
12542
+ const uint32_t n_layer = hparams.n_layer;
12543
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
12544
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
12545
+ const uint32_t n_ctx = cparams.n_ctx;
12546
 
12547
  const size_t kv_buf_size = kv_self.total_size();
12548
  const uint32_t kv_head = kv_self.head;
 
12557
  if (kv_buf_size) {
12558
  std::vector<uint8_t> tmp_buf;
12559
  for (int il = 0; il < (int) n_layer; ++il) {
12560
+ const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
12561
+
12562
  tmp_buf.resize(k_size);
12563
  ggml_backend_tensor_get(kv_self.k_l[il], tmp_buf.data(), 0, tmp_buf.size());
12564
  data_ctx->write(tmp_buf.data(), tmp_buf.size());
12565
 
12566
  // v is not contiguous, copy row by row
12567
+ const size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
12568
+ const size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
12569
+
12570
  tmp_buf.resize(v_row_size);
12571
  for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
12572
  ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), ir*v_row_stride, tmp_buf.size());
 
12653
  const auto & hparams = ctx->model.hparams;
12654
  const auto & cparams = ctx->cparams;
12655
 
12656
+ const uint32_t n_layer = hparams.n_layer;
12657
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
12658
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
12659
+ const uint32_t n_ctx = cparams.n_ctx;
12660
 
12661
  size_t kv_buf_size;
12662
  uint32_t kv_head;
 
12672
  GGML_ASSERT(kv_self.total_size() == kv_buf_size);
12673
 
12674
  for (int il = 0; il < (int) n_layer; ++il) {
12675
+ const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
12676
+
12677
  ggml_backend_tensor_set(kv_self.k_l[il], inp, 0, k_size);
12678
  inp += k_size;
12679
 
12680
  // v is not contiguous, copy row by row
12681
+ const size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
12682
+ const size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
12683
+
12684
  for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
12685
  ggml_backend_tensor_set(kv_self.v_l[il], inp, ir*v_row_stride, v_row_size);
12686
  inp += v_row_size;
examples/talk-llama/llama.h CHANGED
@@ -64,6 +64,15 @@ extern "C" {
64
  LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
65
  };
66
 
 
 
 
 
 
 
 
 
 
67
  enum llama_token_type {
68
  LLAMA_TOKEN_TYPE_UNDEFINED = 0,
69
  LLAMA_TOKEN_TYPE_NORMAL = 1,
@@ -98,12 +107,15 @@ extern "C" {
98
  LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
99
  LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
100
  LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
101
- LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
102
  LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
103
  LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
104
  LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
105
  LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
106
  LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
 
 
 
107
 
108
  LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
109
  };
@@ -234,6 +246,7 @@ extern "C" {
234
  float yarn_beta_fast; // YaRN low correction dim
235
  float yarn_beta_slow; // YaRN high correction dim
236
  uint32_t yarn_orig_ctx; // YaRN original context size
 
237
 
238
  ggml_backend_sched_eval_callback cb_eval;
239
  void * cb_eval_user_data;
@@ -360,6 +373,7 @@ extern "C" {
360
  LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
361
 
362
  LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
 
363
 
364
  LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
365
  LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
@@ -514,10 +528,12 @@ extern "C" {
514
  llama_seq_id seq_id);
515
 
516
  // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
517
- // If the KV cache is RoPEd, the KV data is updated accordingly
 
 
518
  // p0 < 0 : [0, p1]
519
  // p1 < 0 : [p0, inf)
520
- LLAMA_API void llama_kv_cache_seq_shift(
521
  struct llama_context * ctx,
522
  llama_seq_id seq_id,
523
  llama_pos p0,
@@ -525,7 +541,9 @@ extern "C" {
525
  llama_pos delta);
526
 
527
  // Integer division of the positions by factor of `d > 1`
528
- // If the KV cache is RoPEd, the KV data is updated accordingly
 
 
529
  // p0 < 0 : [0, p1]
530
  // p1 < 0 : [p0, inf)
531
  LLAMA_API void llama_kv_cache_seq_div(
@@ -535,6 +553,20 @@ extern "C" {
535
  llama_pos p1,
536
  int d);
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  //
539
  // State / sessions
540
  //
 
64
  LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
65
  };
66
 
67
+ // note: these values should be synchronized with ggml_rope
68
+ // TODO: maybe move this enum to ggml.h (ggml_rope_type)
69
+ enum llama_rope_type {
70
+ LLAMA_ROPE_TYPE_NONE = -1,
71
+ LLAMA_ROPE_TYPE_NORM = 0,
72
+ LLAMA_ROPE_TYPE_NEOX = 2,
73
+ LLAMA_ROPE_TYPE_GLM = 4,
74
+ };
75
+
76
  enum llama_token_type {
77
  LLAMA_TOKEN_TYPE_UNDEFINED = 0,
78
  LLAMA_TOKEN_TYPE_NORMAL = 1,
 
107
  LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
108
  LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
109
  LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
110
+ LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
111
  LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
112
  LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
113
  LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
114
  LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
115
  LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
116
+ LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
117
+ LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
118
+ LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
119
 
120
  LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
121
  };
 
246
  float yarn_beta_fast; // YaRN low correction dim
247
  float yarn_beta_slow; // YaRN high correction dim
248
  uint32_t yarn_orig_ctx; // YaRN original context size
249
+ float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
250
 
251
  ggml_backend_sched_eval_callback cb_eval;
252
  void * cb_eval_user_data;
 
373
  LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
374
 
375
  LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
376
+ LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
377
 
378
  LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
379
  LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
 
528
  llama_seq_id seq_id);
529
 
530
  // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
531
+ // If the KV cache is RoPEd, the KV data is updated accordingly:
532
+ // - lazily on next llama_decode()
533
+ // - explicitly with llama_kv_cache_update()
534
  // p0 < 0 : [0, p1]
535
  // p1 < 0 : [p0, inf)
536
+ LLAMA_API void llama_kv_cache_seq_add(
537
  struct llama_context * ctx,
538
  llama_seq_id seq_id,
539
  llama_pos p0,
 
541
  llama_pos delta);
542
 
543
  // Integer division of the positions by factor of `d > 1`
544
+ // If the KV cache is RoPEd, the KV data is updated accordingly:
545
+ // - lazily on next llama_decode()
546
+ // - explicitly with llama_kv_cache_update()
547
  // p0 < 0 : [0, p1]
548
  // p1 < 0 : [p0, inf)
549
  LLAMA_API void llama_kv_cache_seq_div(
 
553
  llama_pos p1,
554
  int d);
555
 
556
+ // Returns the largest position present in the KV cache for the specified sequence
557
+ LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
558
+ struct llama_context * ctx,
559
+ llama_seq_id seq_id);
560
+
561
+ // Defragment the KV cache
562
+ // This will be applied:
563
+ // - lazily on next llama_decode()
564
+ // - explicitly with llama_kv_cache_update()
565
+ LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx);
566
+
567
+ // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
568
+ LLAMA_API void llama_kv_cache_update(struct llama_context * ctx);
569
+
570
  //
571
  // State / sessions
572
  //
examples/talk-llama/unicode.h CHANGED
@@ -223,6 +223,268 @@ static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
223
  {0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
224
  };
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  static std::string codepoint_to_utf8(uint32_t cp) {
227
  std::string result;
228
  if (/* 0x00 <= cp && */ cp <= 0x7f) {
@@ -404,7 +666,8 @@ static std::unordered_map<uint32_t, int> codepoint_type_map() {
404
 
405
  static int codepoint_type(uint32_t cp) {
406
  static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
407
- return codepoint_types.find(cp) == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : codepoint_types.at(cp);
 
408
  }
409
 
410
  static int codepoint_type(const std::string & utf8) {
 
223
  {0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
224
  };
225
 
226
+ static const std::unordered_map<uint32_t, std::vector<uint32_t>> nfd_map = {
227
+ {0xC0, {0x41, 0x300}}, {0xC1, {0x41, 0x301}}, {0xC2, {0x41, 0x302}}, {0xC3, {0x41, 0x303}}, {0xC4, {0x41, 0x308}}, {0xC5, {0x41, 0x30A}}, {0xC7, {0x43, 0x327}}, {0xC8, {0x45, 0x300}},
228
+ {0xC9, {0x45, 0x301}}, {0xCA, {0x45, 0x302}}, {0xCB, {0x45, 0x308}}, {0xCC, {0x49, 0x300}}, {0xCD, {0x49, 0x301}}, {0xCE, {0x49, 0x302}}, {0xCF, {0x49, 0x308}}, {0xD1, {0x4E, 0x303}},
229
+ {0xD2, {0x4F, 0x300}}, {0xD3, {0x4F, 0x301}}, {0xD4, {0x4F, 0x302}}, {0xD5, {0x4F, 0x303}}, {0xD6, {0x4F, 0x308}}, {0xD9, {0x55, 0x300}}, {0xDA, {0x55, 0x301}}, {0xDB, {0x55, 0x302}},
230
+ {0xDC, {0x55, 0x308}}, {0xDD, {0x59, 0x301}}, {0xE0, {0x61, 0x300}}, {0xE1, {0x61, 0x301}}, {0xE2, {0x61, 0x302}}, {0xE3, {0x61, 0x303}}, {0xE4, {0x61, 0x308}}, {0xE5, {0x61, 0x30A}},
231
+ {0xE7, {0x63, 0x327}}, {0xE8, {0x65, 0x300}}, {0xE9, {0x65, 0x301}}, {0xEA, {0x65, 0x302}}, {0xEB, {0x65, 0x308}}, {0xEC, {0x69, 0x300}}, {0xED, {0x69, 0x301}}, {0xEE, {0x69, 0x302}},
232
+ {0xEF, {0x69, 0x308}}, {0xF1, {0x6E, 0x303}}, {0xF2, {0x6F, 0x300}}, {0xF3, {0x6F, 0x301}}, {0xF4, {0x6F, 0x302}}, {0xF5, {0x6F, 0x303}}, {0xF6, {0x6F, 0x308}}, {0xF9, {0x75, 0x300}},
233
+ {0xFA, {0x75, 0x301}}, {0xFB, {0x75, 0x302}}, {0xFC, {0x75, 0x308}}, {0xFD, {0x79, 0x301}}, {0xFF, {0x79, 0x308}}, {0x100, {0x41, 0x304}}, {0x101, {0x61, 0x304}}, {0x102, {0x41, 0x306}},
234
+ {0x103, {0x61, 0x306}}, {0x104, {0x41, 0x328}}, {0x105, {0x61, 0x328}}, {0x106, {0x43, 0x301}}, {0x107, {0x63, 0x301}}, {0x108, {0x43, 0x302}}, {0x109, {0x63, 0x302}}, {0x10A, {0x43, 0x307}},
235
+ {0x10B, {0x63, 0x307}}, {0x10C, {0x43, 0x30C}}, {0x10D, {0x63, 0x30C}}, {0x10E, {0x44, 0x30C}}, {0x10F, {0x64, 0x30C}}, {0x112, {0x45, 0x304}}, {0x113, {0x65, 0x304}}, {0x114, {0x45, 0x306}},
236
+ {0x115, {0x65, 0x306}}, {0x116, {0x45, 0x307}}, {0x117, {0x65, 0x307}}, {0x118, {0x45, 0x328}}, {0x119, {0x65, 0x328}}, {0x11A, {0x45, 0x30C}}, {0x11B, {0x65, 0x30C}}, {0x11C, {0x47, 0x302}},
237
+ {0x11D, {0x67, 0x302}}, {0x11E, {0x47, 0x306}}, {0x11F, {0x67, 0x306}}, {0x120, {0x47, 0x307}}, {0x121, {0x67, 0x307}}, {0x122, {0x47, 0x327}}, {0x123, {0x67, 0x327}}, {0x124, {0x48, 0x302}},
238
+ {0x125, {0x68, 0x302}}, {0x128, {0x49, 0x303}}, {0x129, {0x69, 0x303}}, {0x12A, {0x49, 0x304}}, {0x12B, {0x69, 0x304}}, {0x12C, {0x49, 0x306}}, {0x12D, {0x69, 0x306}}, {0x12E, {0x49, 0x328}},
239
+ {0x12F, {0x69, 0x328}}, {0x130, {0x49, 0x307}}, {0x134, {0x4A, 0x302}}, {0x135, {0x6A, 0x302}}, {0x136, {0x4B, 0x327}}, {0x137, {0x6B, 0x327}}, {0x139, {0x4C, 0x301}}, {0x13A, {0x6C, 0x301}},
240
+ {0x13B, {0x4C, 0x327}}, {0x13C, {0x6C, 0x327}}, {0x13D, {0x4C, 0x30C}}, {0x13E, {0x6C, 0x30C}}, {0x143, {0x4E, 0x301}}, {0x144, {0x6E, 0x301}}, {0x145, {0x4E, 0x327}}, {0x146, {0x6E, 0x327}},
241
+ {0x147, {0x4E, 0x30C}}, {0x148, {0x6E, 0x30C}}, {0x14C, {0x4F, 0x304}}, {0x14D, {0x6F, 0x304}}, {0x14E, {0x4F, 0x306}}, {0x14F, {0x6F, 0x306}}, {0x150, {0x4F, 0x30B}}, {0x151, {0x6F, 0x30B}},
242
+ {0x154, {0x52, 0x301}}, {0x155, {0x72, 0x301}}, {0x156, {0x52, 0x327}}, {0x157, {0x72, 0x327}}, {0x158, {0x52, 0x30C}}, {0x159, {0x72, 0x30C}}, {0x15A, {0x53, 0x301}}, {0x15B, {0x73, 0x301}},
243
+ {0x15C, {0x53, 0x302}}, {0x15D, {0x73, 0x302}}, {0x15E, {0x53, 0x327}}, {0x15F, {0x73, 0x327}}, {0x160, {0x53, 0x30C}}, {0x161, {0x73, 0x30C}}, {0x162, {0x54, 0x327}}, {0x163, {0x74, 0x327}},
244
+ {0x164, {0x54, 0x30C}}, {0x165, {0x74, 0x30C}}, {0x168, {0x55, 0x303}}, {0x169, {0x75, 0x303}}, {0x16A, {0x55, 0x304}}, {0x16B, {0x75, 0x304}}, {0x16C, {0x55, 0x306}}, {0x16D, {0x75, 0x306}},
245
+ {0x16E, {0x55, 0x30A}}, {0x16F, {0x75, 0x30A}}, {0x170, {0x55, 0x30B}}, {0x171, {0x75, 0x30B}}, {0x172, {0x55, 0x328}}, {0x173, {0x75, 0x328}}, {0x174, {0x57, 0x302}}, {0x175, {0x77, 0x302}},
246
+ {0x176, {0x59, 0x302}}, {0x177, {0x79, 0x302}}, {0x178, {0x59, 0x308}}, {0x179, {0x5A, 0x301}}, {0x17A, {0x7A, 0x301}}, {0x17B, {0x5A, 0x307}}, {0x17C, {0x7A, 0x307}}, {0x17D, {0x5A, 0x30C}},
247
+ {0x17E, {0x7A, 0x30C}}, {0x1A0, {0x4F, 0x31B}}, {0x1A1, {0x6F, 0x31B}}, {0x1AF, {0x55, 0x31B}}, {0x1B0, {0x75, 0x31B}}, {0x1CD, {0x41, 0x30C}}, {0x1CE, {0x61, 0x30C}}, {0x1CF, {0x49, 0x30C}},
248
+ {0x1D0, {0x69, 0x30C}}, {0x1D1, {0x4F, 0x30C}}, {0x1D2, {0x6F, 0x30C}}, {0x1D3, {0x55, 0x30C}}, {0x1D4, {0x75, 0x30C}}, {0x1D5, {0x55, 0x308, 0x304}}, {0x1D6, {0x75, 0x308, 0x304}},
249
+ {0x1D7, {0x55, 0x308, 0x301}}, {0x1D8, {0x75, 0x308, 0x301}}, {0x1D9, {0x55, 0x308, 0x30C}}, {0x1DA, {0x75, 0x308, 0x30C}}, {0x1DB, {0x55, 0x308, 0x300}}, {0x1DC, {0x75, 0x308, 0x300}},
250
+ {0x1DE, {0x41, 0x308, 0x304}}, {0x1DF, {0x61, 0x308, 0x304}}, {0x1E0, {0x41, 0x307, 0x304}}, {0x1E1, {0x61, 0x307, 0x304}}, {0x1E2, {0xC6, 0x304}}, {0x1E3, {0xE6, 0x304}}, {0x1E6, {0x47, 0x30C}},
251
+ {0x1E7, {0x67, 0x30C}}, {0x1E8, {0x4B, 0x30C}}, {0x1E9, {0x6B, 0x30C}}, {0x1EA, {0x4F, 0x328}}, {0x1EB, {0x6F, 0x328}}, {0x1EC, {0x4F, 0x328, 0x304}}, {0x1ED, {0x6F, 0x328, 0x304}},
252
+ {0x1EE, {0x1B7, 0x30C}}, {0x1EF, {0x292, 0x30C}}, {0x1F0, {0x6A, 0x30C}}, {0x1F4, {0x47, 0x301}}, {0x1F5, {0x67, 0x301}}, {0x1F8, {0x4E, 0x300}}, {0x1F9, {0x6E, 0x300}}, {0x1FA, {0x41, 0x30A, 0x301}},
253
+ {0x1FB, {0x61, 0x30A, 0x301}}, {0x1FC, {0xC6, 0x301}}, {0x1FD, {0xE6, 0x301}}, {0x1FE, {0xD8, 0x301}}, {0x1FF, {0xF8, 0x301}}, {0x200, {0x41, 0x30F}}, {0x201, {0x61, 0x30F}}, {0x202, {0x41, 0x311}},
254
+ {0x203, {0x61, 0x311}}, {0x204, {0x45, 0x30F}}, {0x205, {0x65, 0x30F}}, {0x206, {0x45, 0x311}}, {0x207, {0x65, 0x311}}, {0x208, {0x49, 0x30F}}, {0x209, {0x69, 0x30F}}, {0x20A, {0x49, 0x311}},
255
+ {0x20B, {0x69, 0x311}}, {0x20C, {0x4F, 0x30F}}, {0x20D, {0x6F, 0x30F}}, {0x20E, {0x4F, 0x311}}, {0x20F, {0x6F, 0x311}}, {0x210, {0x52, 0x30F}}, {0x211, {0x72, 0x30F}}, {0x212, {0x52, 0x311}},
256
+ {0x213, {0x72, 0x311}}, {0x214, {0x55, 0x30F}}, {0x215, {0x75, 0x30F}}, {0x216, {0x55, 0x311}}, {0x217, {0x75, 0x311}}, {0x218, {0x53, 0x326}}, {0x219, {0x73, 0x326}}, {0x21A, {0x54, 0x326}},
257
+ {0x21B, {0x74, 0x326}}, {0x21E, {0x48, 0x30C}}, {0x21F, {0x68, 0x30C}}, {0x226, {0x41, 0x307}}, {0x227, {0x61, 0x307}}, {0x228, {0x45, 0x327}}, {0x229, {0x65, 0x327}}, {0x22A, {0x4F, 0x308, 0x304}},
258
+ {0x22B, {0x6F, 0x308, 0x304}}, {0x22C, {0x4F, 0x303, 0x304}}, {0x22D, {0x6F, 0x303, 0x304}}, {0x22E, {0x4F, 0x307}}, {0x22F, {0x6F, 0x307}}, {0x230, {0x4F, 0x307, 0x304}},
259
+ {0x231, {0x6F, 0x307, 0x304}}, {0x232, {0x59, 0x304}}, {0x233, {0x79, 0x304}}, {0x340, {0x300}}, {0x341, {0x301}}, {0x343, {0x313}}, {0x344, {0x308, 0x301}}, {0x374, {0x2B9}}, {0x37E, {0x3B}},
260
+ {0x385, {0xA8, 0x301}}, {0x386, {0x391, 0x301}}, {0x387, {0xB7}}, {0x388, {0x395, 0x301}}, {0x389, {0x397, 0x301}}, {0x38A, {0x399, 0x301}}, {0x38C, {0x39F, 0x301}}, {0x38E, {0x3A5, 0x301}},
261
+ {0x38F, {0x3A9, 0x301}}, {0x390, {0x3B9, 0x308, 0x301}}, {0x3AA, {0x399, 0x308}}, {0x3AB, {0x3A5, 0x308}}, {0x3AC, {0x3B1, 0x301}}, {0x3AD, {0x3B5, 0x301}}, {0x3AE, {0x3B7, 0x301}},
262
+ {0x3AF, {0x3B9, 0x301}}, {0x3B0, {0x3C5, 0x308, 0x301}}, {0x3CA, {0x3B9, 0x308}}, {0x3CB, {0x3C5, 0x308}}, {0x3CC, {0x3BF, 0x301}}, {0x3CD, {0x3C5, 0x301}}, {0x3CE, {0x3C9, 0x301}},
263
+ {0x3D3, {0x3D2, 0x301}}, {0x3D4, {0x3D2, 0x308}}, {0x400, {0x415, 0x300}}, {0x401, {0x415, 0x308}}, {0x403, {0x413, 0x301}}, {0x407, {0x406, 0x308}}, {0x40C, {0x41A, 0x301}}, {0x40D, {0x418, 0x300}},
264
+ {0x40E, {0x423, 0x306}}, {0x419, {0x418, 0x306}}, {0x439, {0x438, 0x306}}, {0x450, {0x435, 0x300}}, {0x451, {0x435, 0x308}}, {0x453, {0x433, 0x301}}, {0x457, {0x456, 0x308}}, {0x45C, {0x43A, 0x301}},
265
+ {0x45D, {0x438, 0x300}}, {0x45E, {0x443, 0x306}}, {0x476, {0x474, 0x30F}}, {0x477, {0x475, 0x30F}}, {0x4C1, {0x416, 0x306}}, {0x4C2, {0x436, 0x306}}, {0x4D0, {0x410, 0x306}}, {0x4D1, {0x430, 0x306}},
266
+ {0x4D2, {0x410, 0x308}}, {0x4D3, {0x430, 0x308}}, {0x4D6, {0x415, 0x306}}, {0x4D7, {0x435, 0x306}}, {0x4DA, {0x4D8, 0x308}}, {0x4DB, {0x4D9, 0x308}}, {0x4DC, {0x416, 0x308}}, {0x4DD, {0x436, 0x308}},
267
+ {0x4DE, {0x417, 0x308}}, {0x4DF, {0x437, 0x308}}, {0x4E2, {0x418, 0x304}}, {0x4E3, {0x438, 0x304}}, {0x4E4, {0x418, 0x308}}, {0x4E5, {0x438, 0x308}}, {0x4E6, {0x41E, 0x308}}, {0x4E7, {0x43E, 0x308}},
268
+ {0x4EA, {0x4E8, 0x308}}, {0x4EB, {0x4E9, 0x308}}, {0x4EC, {0x42D, 0x308}}, {0x4ED, {0x44D, 0x308}}, {0x4EE, {0x423, 0x304}}, {0x4EF, {0x443, 0x304}}, {0x4F0, {0x423, 0x308}}, {0x4F1, {0x443, 0x308}},
269
+ {0x4F2, {0x423, 0x30B}}, {0x4F3, {0x443, 0x30B}}, {0x4F4, {0x427, 0x308}}, {0x4F5, {0x447, 0x308}}, {0x4F8, {0x42B, 0x308}}, {0x4F9, {0x44B, 0x308}}, {0x622, {0x627, 0x653}}, {0x623, {0x627, 0x654}},
270
+ {0x624, {0x648, 0x654}}, {0x625, {0x627, 0x655}}, {0x626, {0x64A, 0x654}}, {0x6C0, {0x6D5, 0x654}}, {0x6C2, {0x6C1, 0x654}}, {0x6D3, {0x6D2, 0x654}}, {0x929, {0x928, 0x93C}}, {0x931, {0x930, 0x93C}},
271
+ {0x934, {0x933, 0x93C}}, {0x958, {0x915, 0x93C}}, {0x959, {0x916, 0x93C}}, {0x95A, {0x917, 0x93C}}, {0x95B, {0x91C, 0x93C}}, {0x95C, {0x921, 0x93C}}, {0x95D, {0x922, 0x93C}}, {0x95E, {0x92B, 0x93C}},
272
+ {0x95F, {0x92F, 0x93C}}, {0x9CB, {0x9C7, 0x9BE}}, {0x9CC, {0x9C7, 0x9D7}}, {0x9DC, {0x9A1, 0x9BC}}, {0x9DD, {0x9A2, 0x9BC}}, {0x9DF, {0x9AF, 0x9BC}}, {0xA33, {0xA32, 0xA3C}}, {0xA36, {0xA38, 0xA3C}},
273
+ {0xA59, {0xA16, 0xA3C}}, {0xA5A, {0xA17, 0xA3C}}, {0xA5B, {0xA1C, 0xA3C}}, {0xA5E, {0xA2B, 0xA3C}}, {0xB48, {0xB47, 0xB56}}, {0xB4B, {0xB47, 0xB3E}}, {0xB4C, {0xB47, 0xB57}}, {0xB5C, {0xB21, 0xB3C}},
274
+ {0xB5D, {0xB22, 0xB3C}}, {0xB94, {0xB92, 0xBD7}}, {0xBCA, {0xBC6, 0xBBE}}, {0xBCB, {0xBC7, 0xBBE}}, {0xBCC, {0xBC6, 0xBD7}}, {0xC48, {0xC46, 0xC56}}, {0xCC0, {0xCBF, 0xCD5}}, {0xCC7, {0xCC6, 0xCD5}},
275
+ {0xCC8, {0xCC6, 0xCD6}}, {0xCCA, {0xCC6, 0xCC2}}, {0xCCB, {0xCC6, 0xCC2, 0xCD5}}, {0xD4A, {0xD46, 0xD3E}}, {0xD4B, {0xD47, 0xD3E}}, {0xD4C, {0xD46, 0xD57}}, {0xDDA, {0xDD9, 0xDCA}},
276
+ {0xDDC, {0xDD9, 0xDCF}}, {0xDDD, {0xDD9, 0xDCF, 0xDCA}}, {0xDDE, {0xDD9, 0xDDF}}, {0xF43, {0xF42, 0xFB7}}, {0xF4D, {0xF4C, 0xFB7}}, {0xF52, {0xF51, 0xFB7}}, {0xF57, {0xF56, 0xFB7}},
277
+ {0xF5C, {0xF5B, 0xFB7}}, {0xF69, {0xF40, 0xFB5}}, {0xF73, {0xF71, 0xF72}}, {0xF75, {0xF71, 0xF74}}, {0xF76, {0xFB2, 0xF80}}, {0xF78, {0xFB3, 0xF80}}, {0xF81, {0xF71, 0xF80}}, {0xF93, {0xF92, 0xFB7}},
278
+ {0xF9D, {0xF9C, 0xFB7}}, {0xFA2, {0xFA1, 0xFB7}}, {0xFA7, {0xFA6, 0xFB7}}, {0xFAC, {0xFAB, 0xFB7}}, {0xFB9, {0xF90, 0xFB5}}, {0x1026, {0x1025, 0x102E}}, {0x1B06, {0x1B05, 0x1B35}},
279
+ {0x1B08, {0x1B07, 0x1B35}}, {0x1B0A, {0x1B09, 0x1B35}}, {0x1B0C, {0x1B0B, 0x1B35}}, {0x1B0E, {0x1B0D, 0x1B35}}, {0x1B12, {0x1B11, 0x1B35}}, {0x1B3B, {0x1B3A, 0x1B35}}, {0x1B3D, {0x1B3C, 0x1B35}},
280
+ {0x1B40, {0x1B3E, 0x1B35}}, {0x1B41, {0x1B3F, 0x1B35}}, {0x1B43, {0x1B42, 0x1B35}}, {0x1E00, {0x41, 0x325}}, {0x1E01, {0x61, 0x325}}, {0x1E02, {0x42, 0x307}}, {0x1E03, {0x62, 0x307}},
281
+ {0x1E04, {0x42, 0x323}}, {0x1E05, {0x62, 0x323}}, {0x1E06, {0x42, 0x331}}, {0x1E07, {0x62, 0x331}}, {0x1E08, {0x43, 0x327, 0x301}}, {0x1E09, {0x63, 0x327, 0x301}}, {0x1E0A, {0x44, 0x307}},
282
+ {0x1E0B, {0x64, 0x307}}, {0x1E0C, {0x44, 0x323}}, {0x1E0D, {0x64, 0x323}}, {0x1E0E, {0x44, 0x331}}, {0x1E0F, {0x64, 0x331}}, {0x1E10, {0x44, 0x327}}, {0x1E11, {0x64, 0x327}}, {0x1E12, {0x44, 0x32D}},
283
+ {0x1E13, {0x64, 0x32D}}, {0x1E14, {0x45, 0x304, 0x300}}, {0x1E15, {0x65, 0x304, 0x300}}, {0x1E16, {0x45, 0x304, 0x301}}, {0x1E17, {0x65, 0x304, 0x301}}, {0x1E18, {0x45, 0x32D}},
284
+ {0x1E19, {0x65, 0x32D}}, {0x1E1A, {0x45, 0x330}}, {0x1E1B, {0x65, 0x330}}, {0x1E1C, {0x45, 0x327, 0x306}}, {0x1E1D, {0x65, 0x327, 0x306}}, {0x1E1E, {0x46, 0x307}}, {0x1E1F, {0x66, 0x307}},
285
+ {0x1E20, {0x47, 0x304}}, {0x1E21, {0x67, 0x304}}, {0x1E22, {0x48, 0x307}}, {0x1E23, {0x68, 0x307}}, {0x1E24, {0x48, 0x323}}, {0x1E25, {0x68, 0x323}}, {0x1E26, {0x48, 0x308}}, {0x1E27, {0x68, 0x308}},
286
+ {0x1E28, {0x48, 0x327}}, {0x1E29, {0x68, 0x327}}, {0x1E2A, {0x48, 0x32E}}, {0x1E2B, {0x68, 0x32E}}, {0x1E2C, {0x49, 0x330}}, {0x1E2D, {0x69, 0x330}}, {0x1E2E, {0x49, 0x308, 0x301}},
287
+ {0x1E2F, {0x69, 0x308, 0x301}}, {0x1E30, {0x4B, 0x301}}, {0x1E31, {0x6B, 0x301}}, {0x1E32, {0x4B, 0x323}}, {0x1E33, {0x6B, 0x323}}, {0x1E34, {0x4B, 0x331}}, {0x1E35, {0x6B, 0x331}},
288
+ {0x1E36, {0x4C, 0x323}}, {0x1E37, {0x6C, 0x323}}, {0x1E38, {0x4C, 0x323, 0x304}}, {0x1E39, {0x6C, 0x323, 0x304}}, {0x1E3A, {0x4C, 0x331}}, {0x1E3B, {0x6C, 0x331}}, {0x1E3C, {0x4C, 0x32D}},
289
+ {0x1E3D, {0x6C, 0x32D}}, {0x1E3E, {0x4D, 0x301}}, {0x1E3F, {0x6D, 0x301}}, {0x1E40, {0x4D, 0x307}}, {0x1E41, {0x6D, 0x307}}, {0x1E42, {0x4D, 0x323}}, {0x1E43, {0x6D, 0x323}}, {0x1E44, {0x4E, 0x307}},
290
+ {0x1E45, {0x6E, 0x307}}, {0x1E46, {0x4E, 0x323}}, {0x1E47, {0x6E, 0x323}}, {0x1E48, {0x4E, 0x331}}, {0x1E49, {0x6E, 0x331}}, {0x1E4A, {0x4E, 0x32D}}, {0x1E4B, {0x6E, 0x32D}},
291
+ {0x1E4C, {0x4F, 0x303, 0x301}}, {0x1E4D, {0x6F, 0x303, 0x301}}, {0x1E4E, {0x4F, 0x303, 0x308}}, {0x1E4F, {0x6F, 0x303, 0x308}}, {0x1E50, {0x4F, 0x304, 0x300}}, {0x1E51, {0x6F, 0x304, 0x300}},
292
+ {0x1E52, {0x4F, 0x304, 0x301}}, {0x1E53, {0x6F, 0x304, 0x301}}, {0x1E54, {0x50, 0x301}}, {0x1E55, {0x70, 0x301}}, {0x1E56, {0x50, 0x307}}, {0x1E57, {0x70, 0x307}}, {0x1E58, {0x52, 0x307}},
293
+ {0x1E59, {0x72, 0x307}}, {0x1E5A, {0x52, 0x323}}, {0x1E5B, {0x72, 0x323}}, {0x1E5C, {0x52, 0x323, 0x304}}, {0x1E5D, {0x72, 0x323, 0x304}}, {0x1E5E, {0x52, 0x331}}, {0x1E5F, {0x72, 0x331}},
294
+ {0x1E60, {0x53, 0x307}}, {0x1E61, {0x73, 0x307}}, {0x1E62, {0x53, 0x323}}, {0x1E63, {0x73, 0x323}}, {0x1E64, {0x53, 0x301, 0x307}}, {0x1E65, {0x73, 0x301, 0x307}}, {0x1E66, {0x53, 0x30C, 0x307}},
295
+ {0x1E67, {0x73, 0x30C, 0x307}}, {0x1E68, {0x53, 0x323, 0x307}}, {0x1E69, {0x73, 0x323, 0x307}}, {0x1E6A, {0x54, 0x307}}, {0x1E6B, {0x74, 0x307}}, {0x1E6C, {0x54, 0x323}}, {0x1E6D, {0x74, 0x323}},
296
+ {0x1E6E, {0x54, 0x331}}, {0x1E6F, {0x74, 0x331}}, {0x1E70, {0x54, 0x32D}}, {0x1E71, {0x74, 0x32D}}, {0x1E72, {0x55, 0x324}}, {0x1E73, {0x75, 0x324}}, {0x1E74, {0x55, 0x330}}, {0x1E75, {0x75, 0x330}},
297
+ {0x1E76, {0x55, 0x32D}}, {0x1E77, {0x75, 0x32D}}, {0x1E78, {0x55, 0x303, 0x301}}, {0x1E79, {0x75, 0x303, 0x301}}, {0x1E7A, {0x55, 0x304, 0x308}}, {0x1E7B, {0x75, 0x304, 0x308}},
298
+ {0x1E7C, {0x56, 0x303}}, {0x1E7D, {0x76, 0x303}}, {0x1E7E, {0x56, 0x323}}, {0x1E7F, {0x76, 0x323}}, {0x1E80, {0x57, 0x300}}, {0x1E81, {0x77, 0x300}}, {0x1E82, {0x57, 0x301}}, {0x1E83, {0x77, 0x301}},
299
+ {0x1E84, {0x57, 0x308}}, {0x1E85, {0x77, 0x308}}, {0x1E86, {0x57, 0x307}}, {0x1E87, {0x77, 0x307}}, {0x1E88, {0x57, 0x323}}, {0x1E89, {0x77, 0x323}}, {0x1E8A, {0x58, 0x307}}, {0x1E8B, {0x78, 0x307}},
300
+ {0x1E8C, {0x58, 0x308}}, {0x1E8D, {0x78, 0x308}}, {0x1E8E, {0x59, 0x307}}, {0x1E8F, {0x79, 0x307}}, {0x1E90, {0x5A, 0x302}}, {0x1E91, {0x7A, 0x302}}, {0x1E92, {0x5A, 0x323}}, {0x1E93, {0x7A, 0x323}},
301
+ {0x1E94, {0x5A, 0x331}}, {0x1E95, {0x7A, 0x331}}, {0x1E96, {0x68, 0x331}}, {0x1E97, {0x74, 0x308}}, {0x1E98, {0x77, 0x30A}}, {0x1E99, {0x79, 0x30A}}, {0x1E9B, {0x17F, 0x307}}, {0x1EA0, {0x41, 0x323}},
302
+ {0x1EA1, {0x61, 0x323}}, {0x1EA2, {0x41, 0x309}}, {0x1EA3, {0x61, 0x309}}, {0x1EA4, {0x41, 0x302, 0x301}}, {0x1EA5, {0x61, 0x302, 0x301}}, {0x1EA6, {0x41, 0x302, 0x300}},
303
+ {0x1EA7, {0x61, 0x302, 0x300}}, {0x1EA8, {0x41, 0x302, 0x309}}, {0x1EA9, {0x61, 0x302, 0x309}}, {0x1EAA, {0x41, 0x302, 0x303}}, {0x1EAB, {0x61, 0x302, 0x303}}, {0x1EAC, {0x41, 0x323, 0x302}},
304
+ {0x1EAD, {0x61, 0x323, 0x302}}, {0x1EAE, {0x41, 0x306, 0x301}}, {0x1EAF, {0x61, 0x306, 0x301}}, {0x1EB0, {0x41, 0x306, 0x300}}, {0x1EB1, {0x61, 0x306, 0x300}}, {0x1EB2, {0x41, 0x306, 0x309}},
305
+ {0x1EB3, {0x61, 0x306, 0x309}}, {0x1EB4, {0x41, 0x306, 0x303}}, {0x1EB5, {0x61, 0x306, 0x303}}, {0x1EB6, {0x41, 0x323, 0x306}}, {0x1EB7, {0x61, 0x323, 0x306}}, {0x1EB8, {0x45, 0x323}},
306
+ {0x1EB9, {0x65, 0x323}}, {0x1EBA, {0x45, 0x309}}, {0x1EBB, {0x65, 0x309}}, {0x1EBC, {0x45, 0x303}}, {0x1EBD, {0x65, 0x303}}, {0x1EBE, {0x45, 0x302, 0x301}}, {0x1EBF, {0x65, 0x302, 0x301}},
307
+ {0x1EC0, {0x45, 0x302, 0x300}}, {0x1EC1, {0x65, 0x302, 0x300}}, {0x1EC2, {0x45, 0x302, 0x309}}, {0x1EC3, {0x65, 0x302, 0x309}}, {0x1EC4, {0x45, 0x302, 0x303}}, {0x1EC5, {0x65, 0x302, 0x303}},
308
+ {0x1EC6, {0x45, 0x323, 0x302}}, {0x1EC7, {0x65, 0x323, 0x302}}, {0x1EC8, {0x49, 0x309}}, {0x1EC9, {0x69, 0x309}}, {0x1ECA, {0x49, 0x323}}, {0x1ECB, {0x69, 0x323}}, {0x1ECC, {0x4F, 0x323}},
309
+ {0x1ECD, {0x6F, 0x323}}, {0x1ECE, {0x4F, 0x309}}, {0x1ECF, {0x6F, 0x309}}, {0x1ED0, {0x4F, 0x302, 0x301}}, {0x1ED1, {0x6F, 0x302, 0x301}}, {0x1ED2, {0x4F, 0x302, 0x300}},
310
+ {0x1ED3, {0x6F, 0x302, 0x300}}, {0x1ED4, {0x4F, 0x302, 0x309}}, {0x1ED5, {0x6F, 0x302, 0x309}}, {0x1ED6, {0x4F, 0x302, 0x303}}, {0x1ED7, {0x6F, 0x302, 0x303}}, {0x1ED8, {0x4F, 0x323, 0x302}},
311
+ {0x1ED9, {0x6F, 0x323, 0x302}}, {0x1EDA, {0x4F, 0x31B, 0x301}}, {0x1EDB, {0x6F, 0x31B, 0x301}}, {0x1EDC, {0x4F, 0x31B, 0x300}}, {0x1EDD, {0x6F, 0x31B, 0x300}}, {0x1EDE, {0x4F, 0x31B, 0x309}},
312
+ {0x1EDF, {0x6F, 0x31B, 0x309}}, {0x1EE0, {0x4F, 0x31B, 0x303}}, {0x1EE1, {0x6F, 0x31B, 0x303}}, {0x1EE2, {0x4F, 0x31B, 0x323}}, {0x1EE3, {0x6F, 0x31B, 0x323}}, {0x1EE4, {0x55, 0x323}},
313
+ {0x1EE5, {0x75, 0x323}}, {0x1EE6, {0x55, 0x309}}, {0x1EE7, {0x75, 0x309}}, {0x1EE8, {0x55, 0x31B, 0x301}}, {0x1EE9, {0x75, 0x31B, 0x301}}, {0x1EEA, {0x55, 0x31B, 0x300}},
314
+ {0x1EEB, {0x75, 0x31B, 0x300}}, {0x1EEC, {0x55, 0x31B, 0x309}}, {0x1EED, {0x75, 0x31B, 0x309}}, {0x1EEE, {0x55, 0x31B, 0x303}}, {0x1EEF, {0x75, 0x31B, 0x303}}, {0x1EF0, {0x55, 0x31B, 0x323}},
315
+ {0x1EF1, {0x75, 0x31B, 0x323}}, {0x1EF2, {0x59, 0x300}}, {0x1EF3, {0x79, 0x300}}, {0x1EF4, {0x59, 0x323}}, {0x1EF5, {0x79, 0x323}}, {0x1EF6, {0x59, 0x309}}, {0x1EF7, {0x79, 0x309}},
316
+ {0x1EF8, {0x59, 0x303}}, {0x1EF9, {0x79, 0x303}}, {0x1F00, {0x3B1, 0x313}}, {0x1F01, {0x3B1, 0x314}}, {0x1F02, {0x3B1, 0x313, 0x300}}, {0x1F03, {0x3B1, 0x314, 0x300}}, {0x1F04, {0x3B1, 0x313, 0x301}},
317
+ {0x1F05, {0x3B1, 0x314, 0x301}}, {0x1F06, {0x3B1, 0x313, 0x342}}, {0x1F07, {0x3B1, 0x314, 0x342}}, {0x1F08, {0x391, 0x313}}, {0x1F09, {0x391, 0x314}}, {0x1F0A, {0x391, 0x313, 0x300}},
318
+ {0x1F0B, {0x391, 0x314, 0x300}}, {0x1F0C, {0x391, 0x313, 0x301}}, {0x1F0D, {0x391, 0x314, 0x301}}, {0x1F0E, {0x391, 0x313, 0x342}}, {0x1F0F, {0x391, 0x314, 0x342}}, {0x1F10, {0x3B5, 0x313}},
319
+ {0x1F11, {0x3B5, 0x314}}, {0x1F12, {0x3B5, 0x313, 0x300}}, {0x1F13, {0x3B5, 0x314, 0x300}}, {0x1F14, {0x3B5, 0x313, 0x301}}, {0x1F15, {0x3B5, 0x314, 0x301}}, {0x1F18, {0x395, 0x313}},
320
+ {0x1F19, {0x395, 0x314}}, {0x1F1A, {0x395, 0x313, 0x300}}, {0x1F1B, {0x395, 0x314, 0x300}}, {0x1F1C, {0x395, 0x313, 0x301}}, {0x1F1D, {0x395, 0x314, 0x301}}, {0x1F20, {0x3B7, 0x313}},
321
+ {0x1F21, {0x3B7, 0x314}}, {0x1F22, {0x3B7, 0x313, 0x300}}, {0x1F23, {0x3B7, 0x314, 0x300}}, {0x1F24, {0x3B7, 0x313, 0x301}}, {0x1F25, {0x3B7, 0x314, 0x301}}, {0x1F26, {0x3B7, 0x313, 0x342}},
322
+ {0x1F27, {0x3B7, 0x314, 0x342}}, {0x1F28, {0x397, 0x313}}, {0x1F29, {0x397, 0x314}}, {0x1F2A, {0x397, 0x313, 0x300}}, {0x1F2B, {0x397, 0x314, 0x300}}, {0x1F2C, {0x397, 0x313, 0x301}},
323
+ {0x1F2D, {0x397, 0x314, 0x301}}, {0x1F2E, {0x397, 0x313, 0x342}}, {0x1F2F, {0x397, 0x314, 0x342}}, {0x1F30, {0x3B9, 0x313}}, {0x1F31, {0x3B9, 0x314}}, {0x1F32, {0x3B9, 0x313, 0x300}},
324
+ {0x1F33, {0x3B9, 0x314, 0x300}}, {0x1F34, {0x3B9, 0x313, 0x301}}, {0x1F35, {0x3B9, 0x314, 0x301}}, {0x1F36, {0x3B9, 0x313, 0x342}}, {0x1F37, {0x3B9, 0x314, 0x342}}, {0x1F38, {0x399, 0x313}},
325
+ {0x1F39, {0x399, 0x314}}, {0x1F3A, {0x399, 0x313, 0x300}}, {0x1F3B, {0x399, 0x314, 0x300}}, {0x1F3C, {0x399, 0x313, 0x301}}, {0x1F3D, {0x399, 0x314, 0x301}}, {0x1F3E, {0x399, 0x313, 0x342}},
326
+ {0x1F3F, {0x399, 0x314, 0x342}}, {0x1F40, {0x3BF, 0x313}}, {0x1F41, {0x3BF, 0x314}}, {0x1F42, {0x3BF, 0x313, 0x300}}, {0x1F43, {0x3BF, 0x314, 0x300}}, {0x1F44, {0x3BF, 0x313, 0x301}},
327
+ {0x1F45, {0x3BF, 0x314, 0x301}}, {0x1F48, {0x39F, 0x313}}, {0x1F49, {0x39F, 0x314}}, {0x1F4A, {0x39F, 0x313, 0x300}}, {0x1F4B, {0x39F, 0x314, 0x300}}, {0x1F4C, {0x39F, 0x313, 0x301}},
328
+ {0x1F4D, {0x39F, 0x314, 0x301}}, {0x1F50, {0x3C5, 0x313}}, {0x1F51, {0x3C5, 0x314}}, {0x1F52, {0x3C5, 0x313, 0x300}}, {0x1F53, {0x3C5, 0x314, 0x300}}, {0x1F54, {0x3C5, 0x313, 0x301}},
329
+ {0x1F55, {0x3C5, 0x314, 0x301}}, {0x1F56, {0x3C5, 0x313, 0x342}}, {0x1F57, {0x3C5, 0x314, 0x342}}, {0x1F59, {0x3A5, 0x314}}, {0x1F5B, {0x3A5, 0x314, 0x300}}, {0x1F5D, {0x3A5, 0x314, 0x301}},
330
+ {0x1F5F, {0x3A5, 0x314, 0x342}}, {0x1F60, {0x3C9, 0x313}}, {0x1F61, {0x3C9, 0x314}}, {0x1F62, {0x3C9, 0x313, 0x300}}, {0x1F63, {0x3C9, 0x314, 0x300}}, {0x1F64, {0x3C9, 0x313, 0x301}},
331
+ {0x1F65, {0x3C9, 0x314, 0x301}}, {0x1F66, {0x3C9, 0x313, 0x342}}, {0x1F67, {0x3C9, 0x314, 0x342}}, {0x1F68, {0x3A9, 0x313}}, {0x1F69, {0x3A9, 0x314}}, {0x1F6A, {0x3A9, 0x313, 0x300}},
332
+ {0x1F6B, {0x3A9, 0x314, 0x300}}, {0x1F6C, {0x3A9, 0x313, 0x301}}, {0x1F6D, {0x3A9, 0x314, 0x301}}, {0x1F6E, {0x3A9, 0x313, 0x342}}, {0x1F6F, {0x3A9, 0x314, 0x342}}, {0x1F70, {0x3B1, 0x300}},
333
+ {0x1F71, {0x3B1, 0x301}}, {0x1F72, {0x3B5, 0x300}}, {0x1F73, {0x3B5, 0x301}}, {0x1F74, {0x3B7, 0x300}}, {0x1F75, {0x3B7, 0x301}}, {0x1F76, {0x3B9, 0x300}}, {0x1F77, {0x3B9, 0x301}},
334
+ {0x1F78, {0x3BF, 0x300}}, {0x1F79, {0x3BF, 0x301}}, {0x1F7A, {0x3C5, 0x300}}, {0x1F7B, {0x3C5, 0x301}}, {0x1F7C, {0x3C9, 0x300}}, {0x1F7D, {0x3C9, 0x301}}, {0x1F80, {0x3B1, 0x313, 0x345}},
335
+ {0x1F81, {0x3B1, 0x314, 0x345}}, {0x1F82, {0x3B1, 0x313, 0x300, 0x345}}, {0x1F83, {0x3B1, 0x314, 0x300, 0x345}}, {0x1F84, {0x3B1, 0x313, 0x301, 0x345}}, {0x1F85, {0x3B1, 0x314, 0x301, 0x345}},
336
+ {0x1F86, {0x3B1, 0x313, 0x342, 0x345}}, {0x1F87, {0x3B1, 0x314, 0x342, 0x345}}, {0x1F88, {0x391, 0x313, 0x345}}, {0x1F89, {0x391, 0x314, 0x345}}, {0x1F8A, {0x391, 0x313, 0x300, 0x345}},
337
+ {0x1F8B, {0x391, 0x314, 0x300, 0x345}}, {0x1F8C, {0x391, 0x313, 0x301, 0x345}}, {0x1F8D, {0x391, 0x314, 0x301, 0x345}}, {0x1F8E, {0x391, 0x313, 0x342, 0x345}}, {0x1F8F, {0x391, 0x314, 0x342, 0x345}},
338
+ {0x1F90, {0x3B7, 0x313, 0x345}}, {0x1F91, {0x3B7, 0x314, 0x345}}, {0x1F92, {0x3B7, 0x313, 0x300, 0x345}}, {0x1F93, {0x3B7, 0x314, 0x300, 0x345}}, {0x1F94, {0x3B7, 0x313, 0x301, 0x345}},
339
+ {0x1F95, {0x3B7, 0x314, 0x301, 0x345}}, {0x1F96, {0x3B7, 0x313, 0x342, 0x345}}, {0x1F97, {0x3B7, 0x314, 0x342, 0x345}}, {0x1F98, {0x397, 0x313, 0x345}}, {0x1F99, {0x397, 0x314, 0x345}},
340
+ {0x1F9A, {0x397, 0x313, 0x300, 0x345}}, {0x1F9B, {0x397, 0x314, 0x300, 0x345}}, {0x1F9C, {0x397, 0x313, 0x301, 0x345}}, {0x1F9D, {0x397, 0x314, 0x301, 0x345}}, {0x1F9E, {0x397, 0x313, 0x342, 0x345}},
341
+ {0x1F9F, {0x397, 0x314, 0x342, 0x345}}, {0x1FA0, {0x3C9, 0x313, 0x345}}, {0x1FA1, {0x3C9, 0x314, 0x345}}, {0x1FA2, {0x3C9, 0x313, 0x300, 0x345}}, {0x1FA3, {0x3C9, 0x314, 0x300, 0x345}},
342
+ {0x1FA4, {0x3C9, 0x313, 0x301, 0x345}}, {0x1FA5, {0x3C9, 0x314, 0x301, 0x345}}, {0x1FA6, {0x3C9, 0x313, 0x342, 0x345}}, {0x1FA7, {0x3C9, 0x314, 0x342, 0x345}}, {0x1FA8, {0x3A9, 0x313, 0x345}},
343
+ {0x1FA9, {0x3A9, 0x314, 0x345}}, {0x1FAA, {0x3A9, 0x313, 0x300, 0x345}}, {0x1FAB, {0x3A9, 0x314, 0x300, 0x345}}, {0x1FAC, {0x3A9, 0x313, 0x301, 0x345}}, {0x1FAD, {0x3A9, 0x314, 0x301, 0x345}},
344
+ {0x1FAE, {0x3A9, 0x313, 0x342, 0x345}}, {0x1FAF, {0x3A9, 0x314, 0x342, 0x345}}, {0x1FB0, {0x3B1, 0x306}}, {0x1FB1, {0x3B1, 0x304}}, {0x1FB2, {0x3B1, 0x300, 0x345}}, {0x1FB3, {0x3B1, 0x345}},
345
+ {0x1FB4, {0x3B1, 0x301, 0x345}}, {0x1FB6, {0x3B1, 0x342}}, {0x1FB7, {0x3B1, 0x342, 0x345}}, {0x1FB8, {0x391, 0x306}}, {0x1FB9, {0x391, 0x304}}, {0x1FBA, {0x391, 0x300}}, {0x1FBB, {0x391, 0x301}},
346
+ {0x1FBC, {0x391, 0x345}}, {0x1FBE, {0x3B9}}, {0x1FC1, {0xA8, 0x342}}, {0x1FC2, {0x3B7, 0x300, 0x345}}, {0x1FC3, {0x3B7, 0x345}}, {0x1FC4, {0x3B7, 0x301, 0x345}}, {0x1FC6, {0x3B7, 0x342}},
347
+ {0x1FC7, {0x3B7, 0x342, 0x345}}, {0x1FC8, {0x395, 0x300}}, {0x1FC9, {0x395, 0x301}}, {0x1FCA, {0x397, 0x300}}, {0x1FCB, {0x397, 0x301}}, {0x1FCC, {0x397, 0x345}}, {0x1FCD, {0x1FBF, 0x300}},
348
+ {0x1FCE, {0x1FBF, 0x301}}, {0x1FCF, {0x1FBF, 0x342}}, {0x1FD0, {0x3B9, 0x306}}, {0x1FD1, {0x3B9, 0x304}}, {0x1FD2, {0x3B9, 0x308, 0x300}}, {0x1FD3, {0x3B9, 0x308, 0x301}}, {0x1FD6, {0x3B9, 0x342}},
349
+ {0x1FD7, {0x3B9, 0x308, 0x342}}, {0x1FD8, {0x399, 0x306}}, {0x1FD9, {0x399, 0x304}}, {0x1FDA, {0x399, 0x300}}, {0x1FDB, {0x399, 0x301}}, {0x1FDD, {0x1FFE, 0x300}}, {0x1FDE, {0x1FFE, 0x301}},
350
+ {0x1FDF, {0x1FFE, 0x342}}, {0x1FE0, {0x3C5, 0x306}}, {0x1FE1, {0x3C5, 0x304}}, {0x1FE2, {0x3C5, 0x308, 0x300}}, {0x1FE3, {0x3C5, 0x308, 0x301}}, {0x1FE4, {0x3C1, 0x313}}, {0x1FE5, {0x3C1, 0x314}},
351
+ {0x1FE6, {0x3C5, 0x342}}, {0x1FE7, {0x3C5, 0x308, 0x342}}, {0x1FE8, {0x3A5, 0x306}}, {0x1FE9, {0x3A5, 0x304}}, {0x1FEA, {0x3A5, 0x300}}, {0x1FEB, {0x3A5, 0x301}}, {0x1FEC, {0x3A1, 0x314}},
352
+ {0x1FED, {0xA8, 0x300}}, {0x1FEE, {0xA8, 0x301}}, {0x1FEF, {0x60}}, {0x1FF2, {0x3C9, 0x300, 0x345}}, {0x1FF3, {0x3C9, 0x345}}, {0x1FF4, {0x3C9, 0x301, 0x345}}, {0x1FF6, {0x3C9, 0x342}},
353
+ {0x1FF7, {0x3C9, 0x342, 0x345}}, {0x1FF8, {0x39F, 0x300}}, {0x1FF9, {0x39F, 0x301}}, {0x1FFA, {0x3A9, 0x300}}, {0x1FFB, {0x3A9, 0x301}}, {0x1FFC, {0x3A9, 0x345}}, {0x1FFD, {0xB4}}, {0x2000, {0x2002}},
354
+ {0x2001, {0x2003}}, {0x2126, {0x3A9}}, {0x212A, {0x4B}}, {0x212B, {0x41, 0x30A}}, {0x219A, {0x2190, 0x338}}, {0x219B, {0x2192, 0x338}}, {0x21AE, {0x2194, 0x338}}, {0x21CD, {0x21D0, 0x338}},
355
+ {0x21CE, {0x21D4, 0x338}}, {0x21CF, {0x21D2, 0x338}}, {0x2204, {0x2203, 0x338}}, {0x2209, {0x2208, 0x338}}, {0x220C, {0x220B, 0x338}}, {0x2224, {0x2223, 0x338}}, {0x2226, {0x2225, 0x338}},
356
+ {0x2241, {0x223C, 0x338}}, {0x2244, {0x2243, 0x338}}, {0x2247, {0x2245, 0x338}}, {0x2249, {0x2248, 0x338}}, {0x2260, {0x3D, 0x338}}, {0x2262, {0x2261, 0x338}}, {0x226D, {0x224D, 0x338}},
357
+ {0x226E, {0x3C, 0x338}}, {0x226F, {0x3E, 0x338}}, {0x2270, {0x2264, 0x338}}, {0x2271, {0x2265, 0x338}}, {0x2274, {0x2272, 0x338}}, {0x2275, {0x2273, 0x338}}, {0x2278, {0x2276, 0x338}},
358
+ {0x2279, {0x2277, 0x338}}, {0x2280, {0x227A, 0x338}}, {0x2281, {0x227B, 0x338}}, {0x2284, {0x2282, 0x338}}, {0x2285, {0x2283, 0x338}}, {0x2288, {0x2286, 0x338}}, {0x2289, {0x2287, 0x338}},
359
+ {0x22AC, {0x22A2, 0x338}}, {0x22AD, {0x22A8, 0x338}}, {0x22AE, {0x22A9, 0x338}}, {0x22AF, {0x22AB, 0x338}}, {0x22E0, {0x227C, 0x338}}, {0x22E1, {0x227D, 0x338}}, {0x22E2, {0x2291, 0x338}},
360
+ {0x22E3, {0x2292, 0x338}}, {0x22EA, {0x22B2, 0x338}}, {0x22EB, {0x22B3, 0x338}}, {0x22EC, {0x22B4, 0x338}}, {0x22ED, {0x22B5, 0x338}}, {0x2329, {0x3008}}, {0x232A, {0x3009}},
361
+ {0x2ADC, {0x2ADD, 0x338}}, {0x304C, {0x304B, 0x3099}}, {0x304E, {0x304D, 0x3099}}, {0x3050, {0x304F, 0x3099}}, {0x3052, {0x3051, 0x3099}}, {0x3054, {0x3053, 0x3099}}, {0x3056, {0x3055, 0x3099}},
362
+ {0x3058, {0x3057, 0x3099}}, {0x305A, {0x3059, 0x3099}}, {0x305C, {0x305B, 0x3099}}, {0x305E, {0x305D, 0x3099}}, {0x3060, {0x305F, 0x3099}}, {0x3062, {0x3061, 0x3099}}, {0x3065, {0x3064, 0x3099}},
363
+ {0x3067, {0x3066, 0x3099}}, {0x3069, {0x3068, 0x3099}}, {0x3070, {0x306F, 0x3099}}, {0x3071, {0x306F, 0x309A}}, {0x3073, {0x3072, 0x3099}}, {0x3074, {0x3072, 0x309A}}, {0x3076, {0x3075, 0x3099}},
364
+ {0x3077, {0x3075, 0x309A}}, {0x3079, {0x3078, 0x3099}}, {0x307A, {0x3078, 0x309A}}, {0x307C, {0x307B, 0x3099}}, {0x307D, {0x307B, 0x309A}}, {0x3094, {0x3046, 0x3099}}, {0x309E, {0x309D, 0x3099}},
365
+ {0x30AC, {0x30AB, 0x3099}}, {0x30AE, {0x30AD, 0x3099}}, {0x30B0, {0x30AF, 0x3099}}, {0x30B2, {0x30B1, 0x3099}}, {0x30B4, {0x30B3, 0x3099}}, {0x30B6, {0x30B5, 0x3099}}, {0x30B8, {0x30B7, 0x3099}},
366
+ {0x30BA, {0x30B9, 0x3099}}, {0x30BC, {0x30BB, 0x3099}}, {0x30BE, {0x30BD, 0x3099}}, {0x30C0, {0x30BF, 0x3099}}, {0x30C2, {0x30C1, 0x3099}}, {0x30C5, {0x30C4, 0x3099}}, {0x30C7, {0x30C6, 0x3099}},
367
+ {0x30C9, {0x30C8, 0x3099}}, {0x30D0, {0x30CF, 0x3099}}, {0x30D1, {0x30CF, 0x309A}}, {0x30D3, {0x30D2, 0x3099}}, {0x30D4, {0x30D2, 0x309A}}, {0x30D6, {0x30D5, 0x3099}}, {0x30D7, {0x30D5, 0x309A}},
368
+ {0x30D9, {0x30D8, 0x3099}}, {0x30DA, {0x30D8, 0x309A}}, {0x30DC, {0x30DB, 0x3099}}, {0x30DD, {0x30DB, 0x309A}}, {0x30F4, {0x30A6, 0x3099}}, {0x30F7, {0x30EF, 0x3099}}, {0x30F8, {0x30F0, 0x3099}},
369
+ {0x30F9, {0x30F1, 0x3099}}, {0x30FA, {0x30F2, 0x3099}}, {0x30FE, {0x30FD, 0x3099}}, {0xF900, {0x8C48}}, {0xF901, {0x66F4}}, {0xF902, {0x8ECA}}, {0xF903, {0x8CC8}}, {0xF904, {0x6ED1}},
370
+ {0xF905, {0x4E32}}, {0xF906, {0x53E5}}, {0xF907, {0x9F9C}}, {0xF908, {0x9F9C}}, {0xF909, {0x5951}}, {0xF90A, {0x91D1}}, {0xF90B, {0x5587}}, {0xF90C, {0x5948}}, {0xF90D, {0x61F6}}, {0xF90E, {0x7669}},
371
+ {0xF90F, {0x7F85}}, {0xF910, {0x863F}}, {0xF911, {0x87BA}}, {0xF912, {0x88F8}}, {0xF913, {0x908F}}, {0xF914, {0x6A02}}, {0xF915, {0x6D1B}}, {0xF916, {0x70D9}}, {0xF917, {0x73DE}}, {0xF918, {0x843D}},
372
+ {0xF919, {0x916A}}, {0xF91A, {0x99F1}}, {0xF91B, {0x4E82}}, {0xF91C, {0x5375}}, {0xF91D, {0x6B04}}, {0xF91E, {0x721B}}, {0xF91F, {0x862D}}, {0xF920, {0x9E1E}}, {0xF921, {0x5D50}}, {0xF922, {0x6FEB}},
373
+ {0xF923, {0x85CD}}, {0xF924, {0x8964}}, {0xF925, {0x62C9}}, {0xF926, {0x81D8}}, {0xF927, {0x881F}}, {0xF928, {0x5ECA}}, {0xF929, {0x6717}}, {0xF92A, {0x6D6A}}, {0xF92B, {0x72FC}}, {0xF92C, {0x90CE}},
374
+ {0xF92D, {0x4F86}}, {0xF92E, {0x51B7}}, {0xF92F, {0x52DE}}, {0xF930, {0x64C4}}, {0xF931, {0x6AD3}}, {0xF932, {0x7210}}, {0xF933, {0x76E7}}, {0xF934, {0x8001}}, {0xF935, {0x8606}}, {0xF936, {0x865C}},
375
+ {0xF937, {0x8DEF}}, {0xF938, {0x9732}}, {0xF939, {0x9B6F}}, {0xF93A, {0x9DFA}}, {0xF93B, {0x788C}}, {0xF93C, {0x797F}}, {0xF93D, {0x7DA0}}, {0xF93E, {0x83C9}}, {0xF93F, {0x9304}}, {0xF940, {0x9E7F}},
376
+ {0xF941, {0x8AD6}}, {0xF942, {0x58DF}}, {0xF943, {0x5F04}}, {0xF944, {0x7C60}}, {0xF945, {0x807E}}, {0xF946, {0x7262}}, {0xF947, {0x78CA}}, {0xF948, {0x8CC2}}, {0xF949, {0x96F7}}, {0xF94A, {0x58D8}},
377
+ {0xF94B, {0x5C62}}, {0xF94C, {0x6A13}}, {0xF94D, {0x6DDA}}, {0xF94E, {0x6F0F}}, {0xF94F, {0x7D2F}}, {0xF950, {0x7E37}}, {0xF951, {0x964B}}, {0xF952, {0x52D2}}, {0xF953, {0x808B}}, {0xF954, {0x51DC}},
378
+ {0xF955, {0x51CC}}, {0xF956, {0x7A1C}}, {0xF957, {0x7DBE}}, {0xF958, {0x83F1}}, {0xF959, {0x9675}}, {0xF95A, {0x8B80}}, {0xF95B, {0x62CF}}, {0xF95C, {0x6A02}}, {0xF95D, {0x8AFE}}, {0xF95E, {0x4E39}},
379
+ {0xF95F, {0x5BE7}}, {0xF960, {0x6012}}, {0xF961, {0x7387}}, {0xF962, {0x7570}}, {0xF963, {0x5317}}, {0xF964, {0x78FB}}, {0xF965, {0x4FBF}}, {0xF966, {0x5FA9}}, {0xF967, {0x4E0D}}, {0xF968, {0x6CCC}},
380
+ {0xF969, {0x6578}}, {0xF96A, {0x7D22}}, {0xF96B, {0x53C3}}, {0xF96C, {0x585E}}, {0xF96D, {0x7701}}, {0xF96E, {0x8449}}, {0xF96F, {0x8AAA}}, {0xF970, {0x6BBA}}, {0xF971, {0x8FB0}}, {0xF972, {0x6C88}},
381
+ {0xF973, {0x62FE}}, {0xF974, {0x82E5}}, {0xF975, {0x63A0}}, {0xF976, {0x7565}}, {0xF977, {0x4EAE}}, {0xF978, {0x5169}}, {0xF979, {0x51C9}}, {0xF97A, {0x6881}}, {0xF97B, {0x7CE7}}, {0xF97C, {0x826F}},
382
+ {0xF97D, {0x8AD2}}, {0xF97E, {0x91CF}}, {0xF97F, {0x52F5}}, {0xF980, {0x5442}}, {0xF981, {0x5973}}, {0xF982, {0x5EEC}}, {0xF983, {0x65C5}}, {0xF984, {0x6FFE}}, {0xF985, {0x792A}}, {0xF986, {0x95AD}},
383
+ {0xF987, {0x9A6A}}, {0xF988, {0x9E97}}, {0xF989, {0x9ECE}}, {0xF98A, {0x529B}}, {0xF98B, {0x66C6}}, {0xF98C, {0x6B77}}, {0xF98D, {0x8F62}}, {0xF98E, {0x5E74}}, {0xF98F, {0x6190}}, {0xF990, {0x6200}},
384
+ {0xF991, {0x649A}}, {0xF992, {0x6F23}}, {0xF993, {0x7149}}, {0xF994, {0x7489}}, {0xF995, {0x79CA}}, {0xF996, {0x7DF4}}, {0xF997, {0x806F}}, {0xF998, {0x8F26}}, {0xF999, {0x84EE}}, {0xF99A, {0x9023}},
385
+ {0xF99B, {0x934A}}, {0xF99C, {0x5217}}, {0xF99D, {0x52A3}}, {0xF99E, {0x54BD}}, {0xF99F, {0x70C8}}, {0xF9A0, {0x88C2}}, {0xF9A1, {0x8AAA}}, {0xF9A2, {0x5EC9}}, {0xF9A3, {0x5FF5}}, {0xF9A4, {0x637B}},
386
+ {0xF9A5, {0x6BAE}}, {0xF9A6, {0x7C3E}}, {0xF9A7, {0x7375}}, {0xF9A8, {0x4EE4}}, {0xF9A9, {0x56F9}}, {0xF9AA, {0x5BE7}}, {0xF9AB, {0x5DBA}}, {0xF9AC, {0x601C}}, {0xF9AD, {0x73B2}}, {0xF9AE, {0x7469}},
387
+ {0xF9AF, {0x7F9A}}, {0xF9B0, {0x8046}}, {0xF9B1, {0x9234}}, {0xF9B2, {0x96F6}}, {0xF9B3, {0x9748}}, {0xF9B4, {0x9818}}, {0xF9B5, {0x4F8B}}, {0xF9B6, {0x79AE}}, {0xF9B7, {0x91B4}}, {0xF9B8, {0x96B8}},
388
+ {0xF9B9, {0x60E1}}, {0xF9BA, {0x4E86}}, {0xF9BB, {0x50DA}}, {0xF9BC, {0x5BEE}}, {0xF9BD, {0x5C3F}}, {0xF9BE, {0x6599}}, {0xF9BF, {0x6A02}}, {0xF9C0, {0x71CE}}, {0xF9C1, {0x7642}}, {0xF9C2, {0x84FC}},
389
+ {0xF9C3, {0x907C}}, {0xF9C4, {0x9F8D}}, {0xF9C5, {0x6688}}, {0xF9C6, {0x962E}}, {0xF9C7, {0x5289}}, {0xF9C8, {0x677B}}, {0xF9C9, {0x67F3}}, {0xF9CA, {0x6D41}}, {0xF9CB, {0x6E9C}}, {0xF9CC, {0x7409}},
390
+ {0xF9CD, {0x7559}}, {0xF9CE, {0x786B}}, {0xF9CF, {0x7D10}}, {0xF9D0, {0x985E}}, {0xF9D1, {0x516D}}, {0xF9D2, {0x622E}}, {0xF9D3, {0x9678}}, {0xF9D4, {0x502B}}, {0xF9D5, {0x5D19}}, {0xF9D6, {0x6DEA}},
391
+ {0xF9D7, {0x8F2A}}, {0xF9D8, {0x5F8B}}, {0xF9D9, {0x6144}}, {0xF9DA, {0x6817}}, {0xF9DB, {0x7387}}, {0xF9DC, {0x9686}}, {0xF9DD, {0x5229}}, {0xF9DE, {0x540F}}, {0xF9DF, {0x5C65}}, {0xF9E0, {0x6613}},
392
+ {0xF9E1, {0x674E}}, {0xF9E2, {0x68A8}}, {0xF9E3, {0x6CE5}}, {0xF9E4, {0x7406}}, {0xF9E5, {0x75E2}}, {0xF9E6, {0x7F79}}, {0xF9E7, {0x88CF}}, {0xF9E8, {0x88E1}}, {0xF9E9, {0x91CC}}, {0xF9EA, {0x96E2}},
393
+ {0xF9EB, {0x533F}}, {0xF9EC, {0x6EBA}}, {0xF9ED, {0x541D}}, {0xF9EE, {0x71D0}}, {0xF9EF, {0x7498}}, {0xF9F0, {0x85FA}}, {0xF9F1, {0x96A3}}, {0xF9F2, {0x9C57}}, {0xF9F3, {0x9E9F}}, {0xF9F4, {0x6797}},
394
+ {0xF9F5, {0x6DCB}}, {0xF9F6, {0x81E8}}, {0xF9F7, {0x7ACB}}, {0xF9F8, {0x7B20}}, {0xF9F9, {0x7C92}}, {0xF9FA, {0x72C0}}, {0xF9FB, {0x7099}}, {0xF9FC, {0x8B58}}, {0xF9FD, {0x4EC0}}, {0xF9FE, {0x8336}},
395
+ {0xF9FF, {0x523A}}, {0xFA00, {0x5207}}, {0xFA01, {0x5EA6}}, {0xFA02, {0x62D3}}, {0xFA03, {0x7CD6}}, {0xFA04, {0x5B85}}, {0xFA05, {0x6D1E}}, {0xFA06, {0x66B4}}, {0xFA07, {0x8F3B}}, {0xFA08, {0x884C}},
396
+ {0xFA09, {0x964D}}, {0xFA0A, {0x898B}}, {0xFA0B, {0x5ED3}}, {0xFA0C, {0x5140}}, {0xFA0D, {0x55C0}}, {0xFA10, {0x585A}}, {0xFA12, {0x6674}}, {0xFA15, {0x51DE}}, {0xFA16, {0x732A}}, {0xFA17, {0x76CA}},
397
+ {0xFA18, {0x793C}}, {0xFA19, {0x795E}}, {0xFA1A, {0x7965}}, {0xFA1B, {0x798F}}, {0xFA1C, {0x9756}}, {0xFA1D, {0x7CBE}}, {0xFA1E, {0x7FBD}}, {0xFA20, {0x8612}}, {0xFA22, {0x8AF8}}, {0xFA25, {0x9038}},
398
+ {0xFA26, {0x90FD}}, {0xFA2A, {0x98EF}}, {0xFA2B, {0x98FC}}, {0xFA2C, {0x9928}}, {0xFA2D, {0x9DB4}}, {0xFA2E, {0x90DE}}, {0xFA2F, {0x96B7}}, {0xFA30, {0x4FAE}}, {0xFA31, {0x50E7}}, {0xFA32, {0x514D}},
399
+ {0xFA33, {0x52C9}}, {0xFA34, {0x52E4}}, {0xFA35, {0x5351}}, {0xFA36, {0x559D}}, {0xFA37, {0x5606}}, {0xFA38, {0x5668}}, {0xFA39, {0x5840}}, {0xFA3A, {0x58A8}}, {0xFA3B, {0x5C64}}, {0xFA3C, {0x5C6E}},
400
+ {0xFA3D, {0x6094}}, {0xFA3E, {0x6168}}, {0xFA3F, {0x618E}}, {0xFA40, {0x61F2}}, {0xFA41, {0x654F}}, {0xFA42, {0x65E2}}, {0xFA43, {0x6691}}, {0xFA44, {0x6885}}, {0xFA45, {0x6D77}}, {0xFA46, {0x6E1A}},
401
+ {0xFA47, {0x6F22}}, {0xFA48, {0x716E}}, {0xFA49, {0x722B}}, {0xFA4A, {0x7422}}, {0xFA4B, {0x7891}}, {0xFA4C, {0x793E}}, {0xFA4D, {0x7949}}, {0xFA4E, {0x7948}}, {0xFA4F, {0x7950}}, {0xFA50, {0x7956}},
402
+ {0xFA51, {0x795D}}, {0xFA52, {0x798D}}, {0xFA53, {0x798E}}, {0xFA54, {0x7A40}}, {0xFA55, {0x7A81}}, {0xFA56, {0x7BC0}}, {0xFA57, {0x7DF4}}, {0xFA58, {0x7E09}}, {0xFA59, {0x7E41}}, {0xFA5A, {0x7F72}},
403
+ {0xFA5B, {0x8005}}, {0xFA5C, {0x81ED}}, {0xFA5D, {0x8279}}, {0xFA5E, {0x8279}}, {0xFA5F, {0x8457}}, {0xFA60, {0x8910}}, {0xFA61, {0x8996}}, {0xFA62, {0x8B01}}, {0xFA63, {0x8B39}}, {0xFA64, {0x8CD3}},
404
+ {0xFA65, {0x8D08}}, {0xFA66, {0x8FB6}}, {0xFA67, {0x9038}}, {0xFA68, {0x96E3}}, {0xFA69, {0x97FF}}, {0xFA6A, {0x983B}}, {0xFA6B, {0x6075}}, {0xFA6C, {0x242EE}}, {0xFA6D, {0x8218}}, {0xFA70, {0x4E26}},
405
+ {0xFA71, {0x51B5}}, {0xFA72, {0x5168}}, {0xFA73, {0x4F80}}, {0xFA74, {0x5145}}, {0xFA75, {0x5180}}, {0xFA76, {0x52C7}}, {0xFA77, {0x52FA}}, {0xFA78, {0x559D}}, {0xFA79, {0x5555}}, {0xFA7A, {0x5599}},
406
+ {0xFA7B, {0x55E2}}, {0xFA7C, {0x585A}}, {0xFA7D, {0x58B3}}, {0xFA7E, {0x5944}}, {0xFA7F, {0x5954}}, {0xFA80, {0x5A62}}, {0xFA81, {0x5B28}}, {0xFA82, {0x5ED2}}, {0xFA83, {0x5ED9}}, {0xFA84, {0x5F69}},
407
+ {0xFA85, {0x5FAD}}, {0xFA86, {0x60D8}}, {0xFA87, {0x614E}}, {0xFA88, {0x6108}}, {0xFA89, {0x618E}}, {0xFA8A, {0x6160}}, {0xFA8B, {0x61F2}}, {0xFA8C, {0x6234}}, {0xFA8D, {0x63C4}}, {0xFA8E, {0x641C}},
408
+ {0xFA8F, {0x6452}}, {0xFA90, {0x6556}}, {0xFA91, {0x6674}}, {0xFA92, {0x6717}}, {0xFA93, {0x671B}}, {0xFA94, {0x6756}}, {0xFA95, {0x6B79}}, {0xFA96, {0x6BBA}}, {0xFA97, {0x6D41}}, {0xFA98, {0x6EDB}},
409
+ {0xFA99, {0x6ECB}}, {0xFA9A, {0x6F22}}, {0xFA9B, {0x701E}}, {0xFA9C, {0x716E}}, {0xFA9D, {0x77A7}}, {0xFA9E, {0x7235}}, {0xFA9F, {0x72AF}}, {0xFAA0, {0x732A}}, {0xFAA1, {0x7471}}, {0xFAA2, {0x7506}},
410
+ {0xFAA3, {0x753B}}, {0xFAA4, {0x761D}}, {0xFAA5, {0x761F}}, {0xFAA6, {0x76CA}}, {0xFAA7, {0x76DB}}, {0xFAA8, {0x76F4}}, {0xFAA9, {0x774A}}, {0xFAAA, {0x7740}}, {0xFAAB, {0x78CC}}, {0xFAAC, {0x7AB1}},
411
+ {0xFAAD, {0x7BC0}}, {0xFAAE, {0x7C7B}}, {0xFAAF, {0x7D5B}}, {0xFAB0, {0x7DF4}}, {0xFAB1, {0x7F3E}}, {0xFAB2, {0x8005}}, {0xFAB3, {0x8352}}, {0xFAB4, {0x83EF}}, {0xFAB5, {0x8779}}, {0xFAB6, {0x8941}},
412
+ {0xFAB7, {0x8986}}, {0xFAB8, {0x8996}}, {0xFAB9, {0x8ABF}}, {0xFABA, {0x8AF8}}, {0xFABB, {0x8ACB}}, {0xFABC, {0x8B01}}, {0xFABD, {0x8AFE}}, {0xFABE, {0x8AED}}, {0xFABF, {0x8B39}}, {0xFAC0, {0x8B8A}},
413
+ {0xFAC1, {0x8D08}}, {0xFAC2, {0x8F38}}, {0xFAC3, {0x9072}}, {0xFAC4, {0x9199}}, {0xFAC5, {0x9276}}, {0xFAC6, {0x967C}}, {0xFAC7, {0x96E3}}, {0xFAC8, {0x9756}}, {0xFAC9, {0x97DB}}, {0xFACA, {0x97FF}},
414
+ {0xFACB, {0x980B}}, {0xFACC, {0x983B}}, {0xFACD, {0x9B12}}, {0xFACE, {0x9F9C}}, {0xFACF, {0x2284A}}, {0xFAD0, {0x22844}}, {0xFAD1, {0x233D5}}, {0xFAD2, {0x3B9D}}, {0xFAD3, {0x4018}},
415
+ {0xFAD4, {0x4039}}, {0xFAD5, {0x25249}}, {0xFAD6, {0x25CD0}}, {0xFAD7, {0x27ED3}}, {0xFAD8, {0x9F43}}, {0xFAD9, {0x9F8E}}, {0xFB1D, {0x5D9, 0x5B4}}, {0xFB1F, {0x5F2, 0x5B7}}, {0xFB2A, {0x5E9, 0x5C1}},
416
+ {0xFB2B, {0x5E9, 0x5C2}}, {0xFB2C, {0x5E9, 0x5BC, 0x5C1}}, {0xFB2D, {0x5E9, 0x5BC, 0x5C2}}, {0xFB2E, {0x5D0, 0x5B7}}, {0xFB2F, {0x5D0, 0x5B8}}, {0xFB30, {0x5D0, 0x5BC}}, {0xFB31, {0x5D1, 0x5BC}},
417
+ {0xFB32, {0x5D2, 0x5BC}}, {0xFB33, {0x5D3, 0x5BC}}, {0xFB34, {0x5D4, 0x5BC}}, {0xFB35, {0x5D5, 0x5BC}}, {0xFB36, {0x5D6, 0x5BC}}, {0xFB38, {0x5D8, 0x5BC}}, {0xFB39, {0x5D9, 0x5BC}},
418
+ {0xFB3A, {0x5DA, 0x5BC}}, {0xFB3B, {0x5DB, 0x5BC}}, {0xFB3C, {0x5DC, 0x5BC}}, {0xFB3E, {0x5DE, 0x5BC}}, {0xFB40, {0x5E0, 0x5BC}}, {0xFB41, {0x5E1, 0x5BC}}, {0xFB43, {0x5E3, 0x5BC}},
419
+ {0xFB44, {0x5E4, 0x5BC}}, {0xFB46, {0x5E6, 0x5BC}}, {0xFB47, {0x5E7, 0x5BC}}, {0xFB48, {0x5E8, 0x5BC}}, {0xFB49, {0x5E9, 0x5BC}}, {0xFB4A, {0x5EA, 0x5BC}}, {0xFB4B, {0x5D5, 0x5B9}},
420
+ {0xFB4C, {0x5D1, 0x5BF}}, {0xFB4D, {0x5DB, 0x5BF}}, {0xFB4E, {0x5E4, 0x5BF}}, {0x1109A, {0x11099, 0x110BA}}, {0x1109C, {0x1109B, 0x110BA}}, {0x110AB, {0x110A5, 0x110BA}},
421
+ {0x1112E, {0x11131, 0x11127}}, {0x1112F, {0x11132, 0x11127}}, {0x1134B, {0x11347, 0x1133E}}, {0x1134C, {0x11347, 0x11357}}, {0x114BB, {0x114B9, 0x114BA}}, {0x114BC, {0x114B9, 0x114B0}},
422
+ {0x114BE, {0x114B9, 0x114BD}}, {0x115BA, {0x115B8, 0x115AF}}, {0x115BB, {0x115B9, 0x115AF}}, {0x1D15E, {0x1D157, 0x1D165}}, {0x1D15F, {0x1D158, 0x1D165}}, {0x1D160, {0x1D158, 0x1D165, 0x1D16E}},
423
+ {0x1D161, {0x1D158, 0x1D165, 0x1D16F}}, {0x1D162, {0x1D158, 0x1D165, 0x1D170}}, {0x1D163, {0x1D158, 0x1D165, 0x1D171}}, {0x1D164, {0x1D158, 0x1D165, 0x1D172}}, {0x1D1BB, {0x1D1B9, 0x1D165}},
424
+ {0x1D1BC, {0x1D1BA, 0x1D165}}, {0x1D1BD, {0x1D1B9, 0x1D165, 0x1D16E}}, {0x1D1BE, {0x1D1BA, 0x1D165, 0x1D16E}}, {0x1D1BF, {0x1D1B9, 0x1D165, 0x1D16F}}, {0x1D1C0, {0x1D1BA, 0x1D165, 0x1D16F}},
425
+ {0x2F800, {0x4E3D}}, {0x2F801, {0x4E38}}, {0x2F802, {0x4E41}}, {0x2F803, {0x20122}}, {0x2F804, {0x4F60}}, {0x2F805, {0x4FAE}}, {0x2F806, {0x4FBB}}, {0x2F807, {0x5002}}, {0x2F808, {0x507A}},
426
+ {0x2F809, {0x5099}}, {0x2F80A, {0x50E7}}, {0x2F80B, {0x50CF}}, {0x2F80C, {0x349E}}, {0x2F80D, {0x2063A}}, {0x2F80E, {0x514D}}, {0x2F80F, {0x5154}}, {0x2F810, {0x5164}}, {0x2F811, {0x5177}},
427
+ {0x2F812, {0x2051C}}, {0x2F813, {0x34B9}}, {0x2F814, {0x5167}}, {0x2F815, {0x518D}}, {0x2F816, {0x2054B}}, {0x2F817, {0x5197}}, {0x2F818, {0x51A4}}, {0x2F819, {0x4ECC}}, {0x2F81A, {0x51AC}},
428
+ {0x2F81B, {0x51B5}}, {0x2F81C, {0x291DF}}, {0x2F81D, {0x51F5}}, {0x2F81E, {0x5203}}, {0x2F81F, {0x34DF}}, {0x2F820, {0x523B}}, {0x2F821, {0x5246}}, {0x2F822, {0x5272}}, {0x2F823, {0x5277}},
429
+ {0x2F824, {0x3515}}, {0x2F825, {0x52C7}}, {0x2F826, {0x52C9}}, {0x2F827, {0x52E4}}, {0x2F828, {0x52FA}}, {0x2F829, {0x5305}}, {0x2F82A, {0x5306}}, {0x2F82B, {0x5317}}, {0x2F82C, {0x5349}},
430
+ {0x2F82D, {0x5351}}, {0x2F82E, {0x535A}}, {0x2F82F, {0x5373}}, {0x2F830, {0x537D}}, {0x2F831, {0x537F}}, {0x2F832, {0x537F}}, {0x2F833, {0x537F}}, {0x2F834, {0x20A2C}}, {0x2F835, {0x7070}},
431
+ {0x2F836, {0x53CA}}, {0x2F837, {0x53DF}}, {0x2F838, {0x20B63}}, {0x2F839, {0x53EB}}, {0x2F83A, {0x53F1}}, {0x2F83B, {0x5406}}, {0x2F83C, {0x549E}}, {0x2F83D, {0x5438}}, {0x2F83E, {0x5448}},
432
+ {0x2F83F, {0x5468}}, {0x2F840, {0x54A2}}, {0x2F841, {0x54F6}}, {0x2F842, {0x5510}}, {0x2F843, {0x5553}}, {0x2F844, {0x5563}}, {0x2F845, {0x5584}}, {0x2F846, {0x5584}}, {0x2F847, {0x5599}},
433
+ {0x2F848, {0x55AB}}, {0x2F849, {0x55B3}}, {0x2F84A, {0x55C2}}, {0x2F84B, {0x5716}}, {0x2F84C, {0x5606}}, {0x2F84D, {0x5717}}, {0x2F84E, {0x5651}}, {0x2F84F, {0x5674}}, {0x2F850, {0x5207}},
434
+ {0x2F851, {0x58EE}}, {0x2F852, {0x57CE}}, {0x2F853, {0x57F4}}, {0x2F854, {0x580D}}, {0x2F855, {0x578B}}, {0x2F856, {0x5832}}, {0x2F857, {0x5831}}, {0x2F858, {0x58AC}}, {0x2F859, {0x214E4}},
435
+ {0x2F85A, {0x58F2}}, {0x2F85B, {0x58F7}}, {0x2F85C, {0x5906}}, {0x2F85D, {0x591A}}, {0x2F85E, {0x5922}}, {0x2F85F, {0x5962}}, {0x2F860, {0x216A8}}, {0x2F861, {0x216EA}}, {0x2F862, {0x59EC}},
436
+ {0x2F863, {0x5A1B}}, {0x2F864, {0x5A27}}, {0x2F865, {0x59D8}}, {0x2F866, {0x5A66}}, {0x2F867, {0x36EE}}, {0x2F868, {0x36FC}}, {0x2F869, {0x5B08}}, {0x2F86A, {0x5B3E}}, {0x2F86B, {0x5B3E}},
437
+ {0x2F86C, {0x219C8}}, {0x2F86D, {0x5BC3}}, {0x2F86E, {0x5BD8}}, {0x2F86F, {0x5BE7}}, {0x2F870, {0x5BF3}}, {0x2F871, {0x21B18}}, {0x2F872, {0x5BFF}}, {0x2F873, {0x5C06}}, {0x2F874, {0x5F53}},
438
+ {0x2F875, {0x5C22}}, {0x2F876, {0x3781}}, {0x2F877, {0x5C60}}, {0x2F878, {0x5C6E}}, {0x2F879, {0x5CC0}}, {0x2F87A, {0x5C8D}}, {0x2F87B, {0x21DE4}}, {0x2F87C, {0x5D43}}, {0x2F87D, {0x21DE6}},
439
+ {0x2F87E, {0x5D6E}}, {0x2F87F, {0x5D6B}}, {0x2F880, {0x5D7C}}, {0x2F881, {0x5DE1}}, {0x2F882, {0x5DE2}}, {0x2F883, {0x382F}}, {0x2F884, {0x5DFD}}, {0x2F885, {0x5E28}}, {0x2F886, {0x5E3D}},
440
+ {0x2F887, {0x5E69}}, {0x2F888, {0x3862}}, {0x2F889, {0x22183}}, {0x2F88A, {0x387C}}, {0x2F88B, {0x5EB0}}, {0x2F88C, {0x5EB3}}, {0x2F88D, {0x5EB6}}, {0x2F88E, {0x5ECA}}, {0x2F88F, {0x2A392}},
441
+ {0x2F890, {0x5EFE}}, {0x2F891, {0x22331}}, {0x2F892, {0x22331}}, {0x2F893, {0x8201}}, {0x2F894, {0x5F22}}, {0x2F895, {0x5F22}}, {0x2F896, {0x38C7}}, {0x2F897, {0x232B8}}, {0x2F898, {0x261DA}},
442
+ {0x2F899, {0x5F62}}, {0x2F89A, {0x5F6B}}, {0x2F89B, {0x38E3}}, {0x2F89C, {0x5F9A}}, {0x2F89D, {0x5FCD}}, {0x2F89E, {0x5FD7}}, {0x2F89F, {0x5FF9}}, {0x2F8A0, {0x6081}}, {0x2F8A1, {0x393A}},
443
+ {0x2F8A2, {0x391C}}, {0x2F8A3, {0x6094}}, {0x2F8A4, {0x226D4}}, {0x2F8A5, {0x60C7}}, {0x2F8A6, {0x6148}}, {0x2F8A7, {0x614C}}, {0x2F8A8, {0x614E}}, {0x2F8A9, {0x614C}}, {0x2F8AA, {0x617A}},
444
+ {0x2F8AB, {0x618E}}, {0x2F8AC, {0x61B2}}, {0x2F8AD, {0x61A4}}, {0x2F8AE, {0x61AF}}, {0x2F8AF, {0x61DE}}, {0x2F8B0, {0x61F2}}, {0x2F8B1, {0x61F6}}, {0x2F8B2, {0x6210}}, {0x2F8B3, {0x621B}},
445
+ {0x2F8B4, {0x625D}}, {0x2F8B5, {0x62B1}}, {0x2F8B6, {0x62D4}}, {0x2F8B7, {0x6350}}, {0x2F8B8, {0x22B0C}}, {0x2F8B9, {0x633D}}, {0x2F8BA, {0x62FC}}, {0x2F8BB, {0x6368}}, {0x2F8BC, {0x6383}},
446
+ {0x2F8BD, {0x63E4}}, {0x2F8BE, {0x22BF1}}, {0x2F8BF, {0x6422}}, {0x2F8C0, {0x63C5}}, {0x2F8C1, {0x63A9}}, {0x2F8C2, {0x3A2E}}, {0x2F8C3, {0x6469}}, {0x2F8C4, {0x647E}}, {0x2F8C5, {0x649D}},
447
+ {0x2F8C6, {0x6477}}, {0x2F8C7, {0x3A6C}}, {0x2F8C8, {0x654F}}, {0x2F8C9, {0x656C}}, {0x2F8CA, {0x2300A}}, {0x2F8CB, {0x65E3}}, {0x2F8CC, {0x66F8}}, {0x2F8CD, {0x6649}}, {0x2F8CE, {0x3B19}},
448
+ {0x2F8CF, {0x6691}}, {0x2F8D0, {0x3B08}}, {0x2F8D1, {0x3AE4}}, {0x2F8D2, {0x5192}}, {0x2F8D3, {0x5195}}, {0x2F8D4, {0x6700}}, {0x2F8D5, {0x669C}}, {0x2F8D6, {0x80AD}}, {0x2F8D7, {0x43D9}},
449
+ {0x2F8D8, {0x6717}}, {0x2F8D9, {0x671B}}, {0x2F8DA, {0x6721}}, {0x2F8DB, {0x675E}}, {0x2F8DC, {0x6753}}, {0x2F8DD, {0x233C3}}, {0x2F8DE, {0x3B49}}, {0x2F8DF, {0x67FA}}, {0x2F8E0, {0x6785}},
450
+ {0x2F8E1, {0x6852}}, {0x2F8E2, {0x6885}}, {0x2F8E3, {0x2346D}}, {0x2F8E4, {0x688E}}, {0x2F8E5, {0x681F}}, {0x2F8E6, {0x6914}}, {0x2F8E7, {0x3B9D}}, {0x2F8E8, {0x6942}}, {0x2F8E9, {0x69A3}},
451
+ {0x2F8EA, {0x69EA}}, {0x2F8EB, {0x6AA8}}, {0x2F8EC, {0x236A3}}, {0x2F8ED, {0x6ADB}}, {0x2F8EE, {0x3C18}}, {0x2F8EF, {0x6B21}}, {0x2F8F0, {0x238A7}}, {0x2F8F1, {0x6B54}}, {0x2F8F2, {0x3C4E}},
452
+ {0x2F8F3, {0x6B72}}, {0x2F8F4, {0x6B9F}}, {0x2F8F5, {0x6BBA}}, {0x2F8F6, {0x6BBB}}, {0x2F8F7, {0x23A8D}}, {0x2F8F8, {0x21D0B}}, {0x2F8F9, {0x23AFA}}, {0x2F8FA, {0x6C4E}}, {0x2F8FB, {0x23CBC}},
453
+ {0x2F8FC, {0x6CBF}}, {0x2F8FD, {0x6CCD}}, {0x2F8FE, {0x6C67}}, {0x2F8FF, {0x6D16}}, {0x2F900, {0x6D3E}}, {0x2F901, {0x6D77}}, {0x2F902, {0x6D41}}, {0x2F903, {0x6D69}}, {0x2F904, {0x6D78}},
454
+ {0x2F905, {0x6D85}}, {0x2F906, {0x23D1E}}, {0x2F907, {0x6D34}}, {0x2F908, {0x6E2F}}, {0x2F909, {0x6E6E}}, {0x2F90A, {0x3D33}}, {0x2F90B, {0x6ECB}}, {0x2F90C, {0x6EC7}}, {0x2F90D, {0x23ED1}},
455
+ {0x2F90E, {0x6DF9}}, {0x2F90F, {0x6F6E}}, {0x2F910, {0x23F5E}}, {0x2F911, {0x23F8E}}, {0x2F912, {0x6FC6}}, {0x2F913, {0x7039}}, {0x2F914, {0x701E}}, {0x2F915, {0x701B}}, {0x2F916, {0x3D96}},
456
+ {0x2F917, {0x704A}}, {0x2F918, {0x707D}}, {0x2F919, {0x7077}}, {0x2F91A, {0x70AD}}, {0x2F91B, {0x20525}}, {0x2F91C, {0x7145}}, {0x2F91D, {0x24263}}, {0x2F91E, {0x719C}}, {0x2F91F, {0x243AB}},
457
+ {0x2F920, {0x7228}}, {0x2F921, {0x7235}}, {0x2F922, {0x7250}}, {0x2F923, {0x24608}}, {0x2F924, {0x7280}}, {0x2F925, {0x7295}}, {0x2F926, {0x24735}}, {0x2F927, {0x24814}}, {0x2F928, {0x737A}},
458
+ {0x2F929, {0x738B}}, {0x2F92A, {0x3EAC}}, {0x2F92B, {0x73A5}}, {0x2F92C, {0x3EB8}}, {0x2F92D, {0x3EB8}}, {0x2F92E, {0x7447}}, {0x2F92F, {0x745C}}, {0x2F930, {0x7471}}, {0x2F931, {0x7485}},
459
+ {0x2F932, {0x74CA}}, {0x2F933, {0x3F1B}}, {0x2F934, {0x7524}}, {0x2F935, {0x24C36}}, {0x2F936, {0x753E}}, {0x2F937, {0x24C92}}, {0x2F938, {0x7570}}, {0x2F939, {0x2219F}}, {0x2F93A, {0x7610}},
460
+ {0x2F93B, {0x24FA1}}, {0x2F93C, {0x24FB8}}, {0x2F93D, {0x25044}}, {0x2F93E, {0x3FFC}}, {0x2F93F, {0x4008}}, {0x2F940, {0x76F4}}, {0x2F941, {0x250F3}}, {0x2F942, {0x250F2}}, {0x2F943, {0x25119}},
461
+ {0x2F944, {0x25133}}, {0x2F945, {0x771E}}, {0x2F946, {0x771F}}, {0x2F947, {0x771F}}, {0x2F948, {0x774A}}, {0x2F949, {0x4039}}, {0x2F94A, {0x778B}}, {0x2F94B, {0x4046}}, {0x2F94C, {0x4096}},
462
+ {0x2F94D, {0x2541D}}, {0x2F94E, {0x784E}}, {0x2F94F, {0x788C}}, {0x2F950, {0x78CC}}, {0x2F951, {0x40E3}}, {0x2F952, {0x25626}}, {0x2F953, {0x7956}}, {0x2F954, {0x2569A}}, {0x2F955, {0x256C5}},
463
+ {0x2F956, {0x798F}}, {0x2F957, {0x79EB}}, {0x2F958, {0x412F}}, {0x2F959, {0x7A40}}, {0x2F95A, {0x7A4A}}, {0x2F95B, {0x7A4F}}, {0x2F95C, {0x2597C}}, {0x2F95D, {0x25AA7}}, {0x2F95E, {0x25AA7}},
464
+ {0x2F95F, {0x7AEE}}, {0x2F960, {0x4202}}, {0x2F961, {0x25BAB}}, {0x2F962, {0x7BC6}}, {0x2F963, {0x7BC9}}, {0x2F964, {0x4227}}, {0x2F965, {0x25C80}}, {0x2F966, {0x7CD2}}, {0x2F967, {0x42A0}},
465
+ {0x2F968, {0x7CE8}}, {0x2F969, {0x7CE3}}, {0x2F96A, {0x7D00}}, {0x2F96B, {0x25F86}}, {0x2F96C, {0x7D63}}, {0x2F96D, {0x4301}}, {0x2F96E, {0x7DC7}}, {0x2F96F, {0x7E02}}, {0x2F970, {0x7E45}},
466
+ {0x2F971, {0x4334}}, {0x2F972, {0x26228}}, {0x2F973, {0x26247}}, {0x2F974, {0x4359}}, {0x2F975, {0x262D9}}, {0x2F976, {0x7F7A}}, {0x2F977, {0x2633E}}, {0x2F978, {0x7F95}}, {0x2F979, {0x7FFA}},
467
+ {0x2F97A, {0x8005}}, {0x2F97B, {0x264DA}}, {0x2F97C, {0x26523}}, {0x2F97D, {0x8060}}, {0x2F97E, {0x265A8}}, {0x2F97F, {0x8070}}, {0x2F980, {0x2335F}}, {0x2F981, {0x43D5}}, {0x2F982, {0x80B2}},
468
+ {0x2F983, {0x8103}}, {0x2F984, {0x440B}}, {0x2F985, {0x813E}}, {0x2F986, {0x5AB5}}, {0x2F987, {0x267A7}}, {0x2F988, {0x267B5}}, {0x2F989, {0x23393}}, {0x2F98A, {0x2339C}}, {0x2F98B, {0x8201}},
469
+ {0x2F98C, {0x8204}}, {0x2F98D, {0x8F9E}}, {0x2F98E, {0x446B}}, {0x2F98F, {0x8291}}, {0x2F990, {0x828B}}, {0x2F991, {0x829D}}, {0x2F992, {0x52B3}}, {0x2F993, {0x82B1}}, {0x2F994, {0x82B3}},
470
+ {0x2F995, {0x82BD}}, {0x2F996, {0x82E6}}, {0x2F997, {0x26B3C}}, {0x2F998, {0x82E5}}, {0x2F999, {0x831D}}, {0x2F99A, {0x8363}}, {0x2F99B, {0x83AD}}, {0x2F99C, {0x8323}}, {0x2F99D, {0x83BD}},
471
+ {0x2F99E, {0x83E7}}, {0x2F99F, {0x8457}}, {0x2F9A0, {0x8353}}, {0x2F9A1, {0x83CA}}, {0x2F9A2, {0x83CC}}, {0x2F9A3, {0x83DC}}, {0x2F9A4, {0x26C36}}, {0x2F9A5, {0x26D6B}}, {0x2F9A6, {0x26CD5}},
472
+ {0x2F9A7, {0x452B}}, {0x2F9A8, {0x84F1}}, {0x2F9A9, {0x84F3}}, {0x2F9AA, {0x8516}}, {0x2F9AB, {0x273CA}}, {0x2F9AC, {0x8564}}, {0x2F9AD, {0x26F2C}}, {0x2F9AE, {0x455D}}, {0x2F9AF, {0x4561}},
473
+ {0x2F9B0, {0x26FB1}}, {0x2F9B1, {0x270D2}}, {0x2F9B2, {0x456B}}, {0x2F9B3, {0x8650}}, {0x2F9B4, {0x865C}}, {0x2F9B5, {0x8667}}, {0x2F9B6, {0x8669}}, {0x2F9B7, {0x86A9}}, {0x2F9B8, {0x8688}},
474
+ {0x2F9B9, {0x870E}}, {0x2F9BA, {0x86E2}}, {0x2F9BB, {0x8779}}, {0x2F9BC, {0x8728}}, {0x2F9BD, {0x876B}}, {0x2F9BE, {0x8786}}, {0x2F9BF, {0x45D7}}, {0x2F9C0, {0x87E1}}, {0x2F9C1, {0x8801}},
475
+ {0x2F9C2, {0x45F9}}, {0x2F9C3, {0x8860}}, {0x2F9C4, {0x8863}}, {0x2F9C5, {0x27667}}, {0x2F9C6, {0x88D7}}, {0x2F9C7, {0x88DE}}, {0x2F9C8, {0x4635}}, {0x2F9C9, {0x88FA}}, {0x2F9CA, {0x34BB}},
476
+ {0x2F9CB, {0x278AE}}, {0x2F9CC, {0x27966}}, {0x2F9CD, {0x46BE}}, {0x2F9CE, {0x46C7}}, {0x2F9CF, {0x8AA0}}, {0x2F9D0, {0x8AED}}, {0x2F9D1, {0x8B8A}}, {0x2F9D2, {0x8C55}}, {0x2F9D3, {0x27CA8}},
477
+ {0x2F9D4, {0x8CAB}}, {0x2F9D5, {0x8CC1}}, {0x2F9D6, {0x8D1B}}, {0x2F9D7, {0x8D77}}, {0x2F9D8, {0x27F2F}}, {0x2F9D9, {0x20804}}, {0x2F9DA, {0x8DCB}}, {0x2F9DB, {0x8DBC}}, {0x2F9DC, {0x8DF0}},
478
+ {0x2F9DD, {0x208DE}}, {0x2F9DE, {0x8ED4}}, {0x2F9DF, {0x8F38}}, {0x2F9E0, {0x285D2}}, {0x2F9E1, {0x285ED}}, {0x2F9E2, {0x9094}}, {0x2F9E3, {0x90F1}}, {0x2F9E4, {0x9111}}, {0x2F9E5, {0x2872E}},
479
+ {0x2F9E6, {0x911B}}, {0x2F9E7, {0x9238}}, {0x2F9E8, {0x92D7}}, {0x2F9E9, {0x92D8}}, {0x2F9EA, {0x927C}}, {0x2F9EB, {0x93F9}}, {0x2F9EC, {0x9415}}, {0x2F9ED, {0x28BFA}}, {0x2F9EE, {0x958B}},
480
+ {0x2F9EF, {0x4995}}, {0x2F9F0, {0x95B7}}, {0x2F9F1, {0x28D77}}, {0x2F9F2, {0x49E6}}, {0x2F9F3, {0x96C3}}, {0x2F9F4, {0x5DB2}}, {0x2F9F5, {0x9723}}, {0x2F9F6, {0x29145}}, {0x2F9F7, {0x2921A}},
481
+ {0x2F9F8, {0x4A6E}}, {0x2F9F9, {0x4A76}}, {0x2F9FA, {0x97E0}}, {0x2F9FB, {0x2940A}}, {0x2F9FC, {0x4AB2}}, {0x2F9FD, {0x29496}}, {0x2F9FE, {0x980B}}, {0x2F9FF, {0x980B}}, {0x2FA00, {0x9829}},
482
+ {0x2FA01, {0x295B6}}, {0x2FA02, {0x98E2}}, {0x2FA03, {0x4B33}}, {0x2FA04, {0x9929}}, {0x2FA05, {0x99A7}}, {0x2FA06, {0x99C2}}, {0x2FA07, {0x99FE}}, {0x2FA08, {0x4BCE}}, {0x2FA09, {0x29B30}},
483
+ {0x2FA0A, {0x9B12}}, {0x2FA0B, {0x9C40}}, {0x2FA0C, {0x9CFD}}, {0x2FA0D, {0x4CCE}}, {0x2FA0E, {0x4CED}}, {0x2FA0F, {0x9D67}}, {0x2FA10, {0x2A0CE}}, {0x2FA11, {0x4CF8}}, {0x2FA12, {0x2A105}},
484
+ {0x2FA13, {0x2A20E}}, {0x2FA14, {0x2A291}}, {0x2FA15, {0x9EBB}}, {0x2FA16, {0x4D56}}, {0x2FA17, {0x9EF9}}, {0x2FA18, {0x9EFE}}, {0x2FA19, {0x9F05}}, {0x2FA1A, {0x9F0F}}, {0x2FA1B, {0x9F16}},
485
+ {0x2FA1D, {0x2A600}},
486
+ };
487
+
488
  static std::string codepoint_to_utf8(uint32_t cp) {
489
  std::string result;
490
  if (/* 0x00 <= cp && */ cp <= 0x7f) {
 
666
 
667
  static int codepoint_type(uint32_t cp) {
668
  static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
669
+ const auto it = codepoint_types.find(cp);
670
+ return it == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second;
671
  }
672
 
673
  static int codepoint_type(const std::string & utf8) {