Spaces:
Running
Running
talk-llama : sync llama.cpp
Browse files- examples/talk-llama/llama.cpp +742 -403
- examples/talk-llama/llama.h +36 -4
- examples/talk-llama/unicode.h +264 -1
examples/talk-llama/llama.cpp
CHANGED
|
@@ -68,10 +68,12 @@
|
|
| 68 |
#include <cstdio>
|
| 69 |
#include <cstring>
|
| 70 |
#include <ctime>
|
|
|
|
| 71 |
#include <forward_list>
|
| 72 |
#include <fstream>
|
| 73 |
#include <functional>
|
| 74 |
#include <initializer_list>
|
|
|
|
| 75 |
#include <map>
|
| 76 |
#include <memory>
|
| 77 |
#include <mutex>
|
|
@@ -1550,8 +1552,9 @@ static const size_t MiB = 1024*kiB;
|
|
| 1550 |
static const size_t GiB = 1024*MiB;
|
| 1551 |
|
| 1552 |
struct llama_hparams {
|
| 1553 |
-
bool
|
| 1554 |
-
bool
|
|
|
|
| 1555 |
uint32_t n_vocab;
|
| 1556 |
uint32_t n_ctx_train; // context size the model was trained on
|
| 1557 |
uint32_t n_embd;
|
|
@@ -1580,7 +1583,8 @@ struct llama_hparams {
|
|
| 1580 |
bool causal_attn = true;
|
| 1581 |
bool need_kq_pos = false;
|
| 1582 |
|
| 1583 |
-
|
|
|
|
| 1584 |
|
| 1585 |
bool operator!=(const llama_hparams & other) const {
|
| 1586 |
if (this->vocab_only != other.vocab_only) return true;
|
|
@@ -1639,6 +1643,7 @@ struct llama_cparams {
|
|
| 1639 |
float yarn_attn_factor;
|
| 1640 |
float yarn_beta_fast;
|
| 1641 |
float yarn_beta_slow;
|
|
|
|
| 1642 |
|
| 1643 |
bool mul_mat_q;
|
| 1644 |
bool offload_kqv;
|
|
@@ -1707,11 +1712,20 @@ struct llama_kv_cell {
|
|
| 1707 |
bool has_seq_id(const llama_seq_id & id) const {
|
| 1708 |
return seq_id.find(id) != seq_id.end();
|
| 1709 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1710 |
};
|
| 1711 |
|
| 1712 |
// ring-buffer of cached KV data
|
| 1713 |
struct llama_kv_cache {
|
| 1714 |
bool has_shift = false;
|
|
|
|
| 1715 |
|
| 1716 |
// Note: The value of head isn't only used to optimize searching
|
| 1717 |
// for a free KV slot. llama_decode_internal also uses it, so it
|
|
@@ -1723,6 +1737,9 @@ struct llama_kv_cache {
|
|
| 1723 |
// computed before each graph build
|
| 1724 |
uint32_t n = 0;
|
| 1725 |
|
|
|
|
|
|
|
|
|
|
| 1726 |
std::vector<llama_kv_cell> cells;
|
| 1727 |
|
| 1728 |
std::vector<struct ggml_tensor *> k_l; // per layer
|
|
@@ -1958,8 +1975,8 @@ struct llama_context {
|
|
| 1958 |
static bool llama_kv_cache_init(
|
| 1959 |
struct llama_kv_cache & cache,
|
| 1960 |
const llama_model & model,
|
| 1961 |
-
ggml_type
|
| 1962 |
-
ggml_type
|
| 1963 |
uint32_t n_ctx,
|
| 1964 |
bool offload) {
|
| 1965 |
const struct llama_hparams & hparams = model.hparams;
|
|
@@ -1974,6 +1991,9 @@ static bool llama_kv_cache_init(
|
|
| 1974 |
cache.size = n_ctx;
|
| 1975 |
cache.used = 0;
|
| 1976 |
|
|
|
|
|
|
|
|
|
|
| 1977 |
cache.cells.clear();
|
| 1978 |
cache.cells.resize(n_ctx);
|
| 1979 |
|
|
@@ -2014,8 +2034,8 @@ static bool llama_kv_cache_init(
|
|
| 2014 |
|
| 2015 |
for (int i = 0; i < (int) n_layer; i++) {
|
| 2016 |
struct ggml_context * ctx = offload ? ctx_map.at(model.buft_layer[i].buft) : cache.ctxs.front();
|
| 2017 |
-
ggml_tensor * k = ggml_new_tensor_1d(ctx,
|
| 2018 |
-
ggml_tensor * v = ggml_new_tensor_1d(ctx,
|
| 2019 |
ggml_format_name(k, "cache_k_l%d", i);
|
| 2020 |
ggml_format_name(v, "cache_v_l%d", i);
|
| 2021 |
cache.k_l.push_back(k);
|
|
@@ -2099,7 +2119,7 @@ static bool llama_kv_cache_find_slot(
|
|
| 2099 |
// find how many cells are currently in use
|
| 2100 |
static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
|
| 2101 |
for (uint32_t i = cache.size - 1; i > 0; --i) {
|
| 2102 |
-
if (cache.cells[i].pos >= 0 && !cache.cells[i].
|
| 2103 |
return i + 1;
|
| 2104 |
}
|
| 2105 |
}
|
|
@@ -2135,7 +2155,7 @@ static void llama_kv_cache_seq_rm(
|
|
| 2135 |
} else {
|
| 2136 |
continue;
|
| 2137 |
}
|
| 2138 |
-
if (cache.cells[i].
|
| 2139 |
// keep count of the number of used cells
|
| 2140 |
if (cache.cells[i].pos >= 0) cache.used--;
|
| 2141 |
|
|
@@ -2186,7 +2206,7 @@ static void llama_kv_cache_seq_keep(struct llama_kv_cache & cache, llama_seq_id
|
|
| 2186 |
if (new_head != cache.size && new_head < cache.head) cache.head = new_head;
|
| 2187 |
}
|
| 2188 |
|
| 2189 |
-
static void
|
| 2190 |
struct llama_kv_cache & cache,
|
| 2191 |
llama_seq_id seq_id,
|
| 2192 |
llama_pos p0,
|
|
@@ -2204,10 +2224,14 @@ static void llama_kv_cache_seq_shift(
|
|
| 2204 |
cache.cells[i].delta += delta;
|
| 2205 |
|
| 2206 |
if (cache.cells[i].pos < 0) {
|
| 2207 |
-
if (!cache.cells[i].
|
|
|
|
|
|
|
| 2208 |
cache.cells[i].pos = -1;
|
| 2209 |
cache.cells[i].seq_id.clear();
|
| 2210 |
-
if (new_head == cache.size)
|
|
|
|
|
|
|
| 2211 |
}
|
| 2212 |
}
|
| 2213 |
}
|
|
@@ -2239,6 +2263,22 @@ static void llama_kv_cache_seq_div(
|
|
| 2239 |
}
|
| 2240 |
}
|
| 2241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2242 |
//
|
| 2243 |
// model loading and saving
|
| 2244 |
//
|
|
@@ -2310,7 +2350,7 @@ namespace GGUFMeta {
|
|
| 2310 |
}
|
| 2311 |
};
|
| 2312 |
|
| 2313 |
-
struct ArrayInfo{
|
| 2314 |
const gguf_type gt;
|
| 2315 |
const size_t length;
|
| 2316 |
const void * data;
|
|
@@ -2329,7 +2369,7 @@ namespace GGUFMeta {
|
|
| 2329 |
};
|
| 2330 |
|
| 2331 |
template<typename T>
|
| 2332 |
-
class GKV: public GKV_Base<T> {
|
| 2333 |
GKV() = delete;
|
| 2334 |
|
| 2335 |
public:
|
|
@@ -2352,39 +2392,39 @@ namespace GGUFMeta {
|
|
| 2352 |
return "unknown";
|
| 2353 |
}
|
| 2354 |
|
| 2355 |
-
static bool validate_override(const llama_model_kv_override_type expected_type, const struct llama_model_kv_override *
|
| 2356 |
-
if (!
|
| 2357 |
-
if (
|
| 2358 |
LLAMA_LOG_INFO("%s: Using metadata override (%5s) '%s' = ",
|
| 2359 |
-
__func__, override_type_to_str(
|
| 2360 |
-
switch (
|
| 2361 |
case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
|
| 2362 |
-
LLAMA_LOG_INFO("%s\n",
|
| 2363 |
} break;
|
| 2364 |
case LLAMA_KV_OVERRIDE_TYPE_INT: {
|
| 2365 |
-
LLAMA_LOG_INFO("%" PRId64 "\n",
|
| 2366 |
} break;
|
| 2367 |
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
|
| 2368 |
-
LLAMA_LOG_INFO("%.6f\n",
|
| 2369 |
} break;
|
| 2370 |
default:
|
| 2371 |
// Shouldn't be possible to end up here, but just in case...
|
| 2372 |
throw std::runtime_error(
|
| 2373 |
format("Unsupported attempt to override %s type for metadata key %s\n",
|
| 2374 |
-
override_type_to_str(
|
| 2375 |
}
|
| 2376 |
return true;
|
| 2377 |
}
|
| 2378 |
LLAMA_LOG_WARN("%s: Warning: Bad metadata override type for key '%s', expected %s but got %s\n",
|
| 2379 |
-
__func__,
|
| 2380 |
return false;
|
| 2381 |
}
|
| 2382 |
|
| 2383 |
template<typename OT>
|
| 2384 |
static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
|
| 2385 |
-
try_override(OT & target, const struct llama_model_kv_override *
|
| 2386 |
-
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL,
|
| 2387 |
-
target =
|
| 2388 |
return true;
|
| 2389 |
}
|
| 2390 |
return false;
|
|
@@ -2392,9 +2432,9 @@ namespace GGUFMeta {
|
|
| 2392 |
|
| 2393 |
template<typename OT>
|
| 2394 |
static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
|
| 2395 |
-
try_override(OT & target, const struct llama_model_kv_override *
|
| 2396 |
-
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT,
|
| 2397 |
-
target =
|
| 2398 |
return true;
|
| 2399 |
}
|
| 2400 |
return false;
|
|
@@ -2402,9 +2442,9 @@ namespace GGUFMeta {
|
|
| 2402 |
|
| 2403 |
template<typename OT>
|
| 2404 |
static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
|
| 2405 |
-
try_override(T & target, const struct llama_model_kv_override *
|
| 2406 |
-
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT,
|
| 2407 |
-
target =
|
| 2408 |
return true;
|
| 2409 |
}
|
| 2410 |
return false;
|
|
@@ -2412,17 +2452,17 @@ namespace GGUFMeta {
|
|
| 2412 |
|
| 2413 |
template<typename OT>
|
| 2414 |
static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
|
| 2415 |
-
try_override(T & target, const struct llama_model_kv_override *
|
| 2416 |
(void)target;
|
| 2417 |
-
(void)
|
| 2418 |
-
if (!
|
| 2419 |
// Currently, we should never end up here so it would be a bug if we do.
|
| 2420 |
throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n",
|
| 2421 |
-
|
| 2422 |
}
|
| 2423 |
|
| 2424 |
-
static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override *
|
| 2425 |
-
if (try_override<T>(target,
|
| 2426 |
return true;
|
| 2427 |
}
|
| 2428 |
if (k < 0) { return false; }
|
|
@@ -2430,12 +2470,12 @@ namespace GGUFMeta {
|
|
| 2430 |
return true;
|
| 2431 |
}
|
| 2432 |
|
| 2433 |
-
static bool set(const gguf_context * ctx, const char * key, T & target, const struct llama_model_kv_override *
|
| 2434 |
-
return set(ctx, gguf_find_key(ctx, key), target,
|
| 2435 |
}
|
| 2436 |
|
| 2437 |
-
static bool set(const gguf_context * ctx, const std::string & key, T & target, const struct llama_model_kv_override *
|
| 2438 |
-
return set(ctx, key.c_str(), target,
|
| 2439 |
}
|
| 2440 |
};
|
| 2441 |
}
|
|
@@ -2542,9 +2582,11 @@ struct llama_model_loader {
|
|
| 2542 |
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
|
| 2543 |
case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
|
| 2544 |
case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break;
|
|
|
|
| 2545 |
case GGML_TYPE_IQ3_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ3_XXS; break;
|
| 2546 |
case GGML_TYPE_IQ1_S: ftype = LLAMA_FTYPE_MOSTLY_IQ1_S; break;
|
| 2547 |
case GGML_TYPE_IQ4_NL: ftype = LLAMA_FTYPE_MOSTLY_IQ4_NL; break;
|
|
|
|
| 2548 |
case GGML_TYPE_IQ3_S: ftype = LLAMA_FTYPE_MOSTLY_IQ3_S; break;
|
| 2549 |
default:
|
| 2550 |
{
|
|
@@ -2846,6 +2888,15 @@ struct llama_model_loader {
|
|
| 2846 |
}
|
| 2847 |
};
|
| 2848 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2849 |
//
|
| 2850 |
// load LLaMA models
|
| 2851 |
//
|
|
@@ -2887,10 +2938,13 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|
| 2887 |
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
| 2888 |
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XXS - 2.0625 bpw";
|
| 2889 |
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
| 2890 |
-
case
|
|
|
|
|
|
|
| 2891 |
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
|
| 2892 |
case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
|
| 2893 |
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
|
|
|
| 2894 |
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
| 2895 |
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
|
| 2896 |
|
|
@@ -2926,16 +2980,16 @@ static const char * llama_model_type_name(e_model type) {
|
|
| 2926 |
default: return "?B";
|
| 2927 |
}
|
| 2928 |
}
|
|
|
|
| 2929 |
static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
|
| 2930 |
switch (type) {
|
| 2931 |
-
case LLAMA_VOCAB_TYPE_SPM:
|
| 2932 |
-
case LLAMA_VOCAB_TYPE_BPE:
|
| 2933 |
-
case LLAMA_VOCAB_TYPE_WPM:
|
| 2934 |
-
default:
|
| 2935 |
}
|
| 2936 |
}
|
| 2937 |
|
| 2938 |
-
|
| 2939 |
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
|
| 2940 |
model.arch = ml.get_arch();
|
| 2941 |
if (model.arch == LLM_ARCH_UNKNOWN) {
|
|
@@ -3112,10 +3166,10 @@ static void llm_load_hparams(
|
|
| 3112 |
} break;
|
| 3113 |
case LLM_ARCH_BERT:
|
| 3114 |
{
|
| 3115 |
-
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS,
|
| 3116 |
-
ml.get_key(LLM_KV_ATTENTION_CAUSAL,
|
| 3117 |
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
| 3118 |
-
ml.get_key(LLM_KV_POOLING_TYPE,
|
| 3119 |
|
| 3120 |
switch (hparams.n_layer) {
|
| 3121 |
case 3:
|
|
@@ -3133,10 +3187,10 @@ static void llm_load_hparams(
|
|
| 3133 |
} break;
|
| 3134 |
case LLM_ARCH_NOMIC_BERT:
|
| 3135 |
{
|
| 3136 |
-
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS,
|
| 3137 |
-
ml.get_key(LLM_KV_ATTENTION_CAUSAL,
|
| 3138 |
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
| 3139 |
-
ml.get_key(LLM_KV_POOLING_TYPE,
|
| 3140 |
|
| 3141 |
if (hparams.n_layer == 12 && hparams.n_embd == 768) {
|
| 3142 |
model.type = e_model::MODEL_137M;
|
|
@@ -3275,6 +3329,8 @@ static void llm_load_hparams(
|
|
| 3275 |
if (hparams.f_max_alibi_bias > 0.0f) {
|
| 3276 |
hparams.need_kq_pos = true;
|
| 3277 |
}
|
|
|
|
|
|
|
| 3278 |
}
|
| 3279 |
|
| 3280 |
// TODO: This should probably be in llama.h
|
|
@@ -3577,6 +3633,8 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
|
|
| 3577 |
LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff);
|
| 3578 |
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
|
| 3579 |
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
|
|
|
|
|
|
|
| 3580 |
LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type);
|
| 3581 |
LLAMA_LOG_INFO("%s: freq_base_train = %.1f\n", __func__, hparams.rope_freq_base_train);
|
| 3582 |
LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train);
|
|
@@ -4598,12 +4656,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
|
| 4598 |
|
| 4599 |
using llm_build_cb = std::function<void(struct ggml_tensor * cur, const char * name, int nl)>;
|
| 4600 |
|
| 4601 |
-
enum llm_rope_type {
|
| 4602 |
-
LLM_ROPE,
|
| 4603 |
-
LLM_ROPE_NEOX,
|
| 4604 |
-
LLM_ROPE_GLM,
|
| 4605 |
-
};
|
| 4606 |
-
|
| 4607 |
enum llm_ffn_op_type {
|
| 4608 |
LLM_FFN_SILU,
|
| 4609 |
LLM_FFN_GELU,
|
|
@@ -4649,55 +4701,6 @@ static struct ggml_tensor * llm_build_inp_embd(
|
|
| 4649 |
return inpL;
|
| 4650 |
}
|
| 4651 |
|
| 4652 |
-
// Persimmon: n_rot = n_embd_head_k/2
|
| 4653 |
-
// Other: n_rot = n_embd_head_k
|
| 4654 |
-
static void llm_build_k_shift(
|
| 4655 |
-
struct ggml_context * ctx,
|
| 4656 |
-
const llama_hparams & hparams,
|
| 4657 |
-
const llama_cparams & cparams,
|
| 4658 |
-
const llama_kv_cache & kv,
|
| 4659 |
-
struct ggml_cgraph * graph,
|
| 4660 |
-
struct ggml_tensor * K_shift,
|
| 4661 |
-
llm_rope_type type,
|
| 4662 |
-
int64_t n_ctx,
|
| 4663 |
-
float freq_base,
|
| 4664 |
-
float freq_scale,
|
| 4665 |
-
const llm_build_cb & cb) {
|
| 4666 |
-
const int64_t n_layer = hparams.n_layer;
|
| 4667 |
-
const int64_t n_head_kv = hparams.n_head_kv;
|
| 4668 |
-
const int64_t n_embd_head_k = hparams.n_embd_head_k;
|
| 4669 |
-
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
|
| 4670 |
-
const int32_t n_rot = hparams.n_rot;
|
| 4671 |
-
const int32_t n_orig_ctx = cparams.n_yarn_orig_ctx;
|
| 4672 |
-
const float ext_factor = cparams.yarn_ext_factor;
|
| 4673 |
-
const float attn_factor = cparams.yarn_attn_factor;
|
| 4674 |
-
const float beta_fast = cparams.yarn_beta_fast;
|
| 4675 |
-
const float beta_slow = cparams.yarn_beta_slow;
|
| 4676 |
-
|
| 4677 |
-
int rope_type = 0;
|
| 4678 |
-
|
| 4679 |
-
switch (type) {
|
| 4680 |
-
case LLM_ROPE: rope_type = 0; break;
|
| 4681 |
-
case LLM_ROPE_NEOX: rope_type = 2; break;
|
| 4682 |
-
case LLM_ROPE_GLM: rope_type = 4; break;
|
| 4683 |
-
}
|
| 4684 |
-
|
| 4685 |
-
for (int il = 0; il < n_layer; ++il) {
|
| 4686 |
-
struct ggml_tensor * tmp =
|
| 4687 |
-
// we rotate only the first n_rot dimensions
|
| 4688 |
-
ggml_rope_custom_inplace(ctx,
|
| 4689 |
-
ggml_view_3d(ctx, kv.k_l[il],
|
| 4690 |
-
n_embd_head_k, n_head_kv, n_ctx,
|
| 4691 |
-
ggml_row_size(kv.k_l[il]->type, n_embd_head_k),
|
| 4692 |
-
ggml_row_size(kv.k_l[il]->type, n_embd_k_gqa),
|
| 4693 |
-
0),
|
| 4694 |
-
K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 4695 |
-
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 4696 |
-
cb(tmp, "K_shifted", il);
|
| 4697 |
-
ggml_build_forward_expand(graph, tmp);
|
| 4698 |
-
}
|
| 4699 |
-
}
|
| 4700 |
-
|
| 4701 |
static void llm_build_kv_store(
|
| 4702 |
struct ggml_context * ctx,
|
| 4703 |
const llama_hparams & hparams,
|
|
@@ -4899,8 +4902,8 @@ static struct ggml_tensor * llm_build_kqv(
|
|
| 4899 |
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
| 4900 |
}
|
| 4901 |
|
| 4902 |
-
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE)
|
| 4903 |
-
#pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan,
|
| 4904 |
#pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
|
| 4905 |
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
|
| 4906 |
if (hparams.f_max_alibi_bias > 0.0f) {
|
|
@@ -5001,6 +5004,7 @@ struct llm_build_context {
|
|
| 5001 |
|
| 5002 |
const int64_t n_embd;
|
| 5003 |
const int64_t n_layer;
|
|
|
|
| 5004 |
const int64_t n_ctx; // user-specified context size (can be different from n_ctx_train)
|
| 5005 |
const int64_t n_head;
|
| 5006 |
const int64_t n_head_kv;
|
|
@@ -5025,8 +5029,8 @@ struct llm_build_context {
|
|
| 5025 |
const int32_t kv_head; // index of where we store new KV data in the cache
|
| 5026 |
const int32_t n_orig_ctx;
|
| 5027 |
|
| 5028 |
-
const
|
| 5029 |
-
const
|
| 5030 |
|
| 5031 |
const llm_build_cb & cb;
|
| 5032 |
|
|
@@ -5048,6 +5052,7 @@ struct llm_build_context {
|
|
| 5048 |
kv_self (lctx.kv_self),
|
| 5049 |
n_embd (hparams.n_embd),
|
| 5050 |
n_layer (hparams.n_layer),
|
|
|
|
| 5051 |
n_ctx (cparams.n_ctx),
|
| 5052 |
n_head (hparams.n_head),
|
| 5053 |
n_head_kv (hparams.n_head_kv),
|
|
@@ -5069,8 +5074,8 @@ struct llm_build_context {
|
|
| 5069 |
n_kv (worst_case ? n_ctx : kv_self.n),
|
| 5070 |
kv_head (worst_case ? n_ctx - n_tokens : kv_self.head),
|
| 5071 |
n_orig_ctx (cparams.n_yarn_orig_ctx),
|
| 5072 |
-
|
| 5073 |
-
|
| 5074 |
cb (cb),
|
| 5075 |
buf_compute_meta (lctx.buf_compute_meta) {
|
| 5076 |
// all initializations should be done in init()
|
|
@@ -5093,6 +5098,76 @@ struct llm_build_context {
|
|
| 5093 |
}
|
| 5094 |
}
|
| 5095 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5096 |
struct ggml_cgraph * build_llama() {
|
| 5097 |
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
| 5098 |
|
|
@@ -5114,11 +5189,6 @@ struct llm_build_context {
|
|
| 5114 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5115 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5116 |
|
| 5117 |
-
// shift the entire K-cache if needed
|
| 5118 |
-
if (do_rope_shift) {
|
| 5119 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 5120 |
-
}
|
| 5121 |
-
|
| 5122 |
for (int il = 0; il < n_layer; ++il) {
|
| 5123 |
struct ggml_tensor * inpSA = inpL;
|
| 5124 |
|
|
@@ -5154,14 +5224,14 @@ struct llm_build_context {
|
|
| 5154 |
|
| 5155 |
Qcur = ggml_rope_custom(
|
| 5156 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 5157 |
-
|
| 5158 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5159 |
);
|
| 5160 |
cb(Qcur, "Qcur", il);
|
| 5161 |
|
| 5162 |
Kcur = ggml_rope_custom(
|
| 5163 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 5164 |
-
|
| 5165 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5166 |
);
|
| 5167 |
cb(Kcur, "Kcur", il);
|
|
@@ -5302,11 +5372,6 @@ struct llm_build_context {
|
|
| 5302 |
struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0);
|
| 5303 |
cb(KQ_pos, "KQ_pos", -1);
|
| 5304 |
|
| 5305 |
-
// shift the entire K-cache if needed
|
| 5306 |
-
if (do_rope_shift) {
|
| 5307 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 5308 |
-
}
|
| 5309 |
-
|
| 5310 |
for (int il = 0; il < n_layer; ++il) {
|
| 5311 |
struct ggml_tensor * inpSA = inpL;
|
| 5312 |
|
|
@@ -5330,12 +5395,12 @@ struct llm_build_context {
|
|
| 5330 |
case MODEL_7B:
|
| 5331 |
Qcur = ggml_rope_custom(
|
| 5332 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 5333 |
-
|
| 5334 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5335 |
);
|
| 5336 |
Kcur = ggml_rope_custom(
|
| 5337 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 5338 |
-
|
| 5339 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5340 |
);
|
| 5341 |
break;
|
|
@@ -5420,11 +5485,6 @@ struct llm_build_context {
|
|
| 5420 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5421 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5422 |
|
| 5423 |
-
// shift the entire K-cache if needed
|
| 5424 |
-
if (do_rope_shift) {
|
| 5425 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 5426 |
-
}
|
| 5427 |
-
|
| 5428 |
for (int il = 0; il < n_layer; ++il) {
|
| 5429 |
struct ggml_tensor * attn_norm;
|
| 5430 |
|
|
@@ -5463,13 +5523,13 @@ struct llm_build_context {
|
|
| 5463 |
|
| 5464 |
// using mode = 2 for neox mode
|
| 5465 |
Qcur = ggml_rope_custom(
|
| 5466 |
-
ctx0, Qcur, inp_pos,
|
| 5467 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5468 |
);
|
| 5469 |
cb(Qcur, "Qcur", il);
|
| 5470 |
|
| 5471 |
Kcur = ggml_rope_custom(
|
| 5472 |
-
ctx0, Kcur, inp_pos,
|
| 5473 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5474 |
);
|
| 5475 |
cb(Kcur, "Kcur", il);
|
|
@@ -5639,10 +5699,6 @@ struct llm_build_context {
|
|
| 5639 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5640 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5641 |
|
| 5642 |
-
if (do_rope_shift) {
|
| 5643 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 5644 |
-
}
|
| 5645 |
-
|
| 5646 |
for (int il = 0; il < n_layer; ++il) {
|
| 5647 |
struct ggml_tensor * residual = inpL;
|
| 5648 |
|
|
@@ -5700,7 +5756,7 @@ struct llm_build_context {
|
|
| 5700 |
|
| 5701 |
// RoPE the first n_rot of q/k, pass the other half, and concat.
|
| 5702 |
struct ggml_tensor * qrot = ggml_view_3d(
|
| 5703 |
-
ctx0, tmpq,
|
| 5704 |
ggml_element_size(tmpq) * n_embd_head,
|
| 5705 |
ggml_element_size(tmpq) * n_embd_head * n_head,
|
| 5706 |
0
|
|
@@ -5708,7 +5764,7 @@ struct llm_build_context {
|
|
| 5708 |
cb(qrot, "qrot", il);
|
| 5709 |
|
| 5710 |
struct ggml_tensor * krot = ggml_view_3d(
|
| 5711 |
-
ctx0, tmpk,
|
| 5712 |
ggml_element_size(tmpk) * n_embd_head,
|
| 5713 |
ggml_element_size(tmpk) * n_embd_head * n_head,
|
| 5714 |
0
|
|
@@ -5717,29 +5773,29 @@ struct llm_build_context {
|
|
| 5717 |
|
| 5718 |
// get the second half of tmpq, e.g tmpq[n_rot:, :, :]
|
| 5719 |
struct ggml_tensor * qpass = ggml_view_3d(
|
| 5720 |
-
ctx0, tmpq,
|
| 5721 |
ggml_element_size(tmpq) * n_embd_head,
|
| 5722 |
ggml_element_size(tmpq) * n_embd_head * n_head,
|
| 5723 |
-
ggml_element_size(tmpq) *
|
| 5724 |
);
|
| 5725 |
cb(qpass, "qpass", il);
|
| 5726 |
|
| 5727 |
struct ggml_tensor * kpass = ggml_view_3d(
|
| 5728 |
-
ctx0, tmpk,
|
| 5729 |
ggml_element_size(tmpk) * n_embd_head,
|
| 5730 |
ggml_element_size(tmpk) * n_embd_head * n_head,
|
| 5731 |
-
ggml_element_size(tmpk) *
|
| 5732 |
);
|
| 5733 |
cb(kpass, "kpass", il);
|
| 5734 |
|
| 5735 |
struct ggml_tensor * qrotated = ggml_rope_custom(
|
| 5736 |
-
ctx0, qrot, inp_pos,
|
| 5737 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5738 |
);
|
| 5739 |
cb(qrotated, "qrotated", il);
|
| 5740 |
|
| 5741 |
struct ggml_tensor * krotated = ggml_rope_custom(
|
| 5742 |
-
ctx0, krot, inp_pos,
|
| 5743 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5744 |
);
|
| 5745 |
cb(krotated, "krotated", il);
|
|
@@ -5991,14 +6047,14 @@ struct llm_build_context {
|
|
| 5991 |
|
| 5992 |
Qcur = ggml_rope_custom(
|
| 5993 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 5994 |
-
|
| 5995 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5996 |
);
|
| 5997 |
cb(Qcur, "Qcur", il);
|
| 5998 |
|
| 5999 |
Kcur = ggml_rope_custom(
|
| 6000 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6001 |
-
|
| 6002 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6003 |
);
|
| 6004 |
cb(Kcur, "Kcur", il);
|
|
@@ -6287,11 +6343,6 @@ struct llm_build_context {
|
|
| 6287 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6288 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6289 |
|
| 6290 |
-
// shift the entire K-cache if needed
|
| 6291 |
-
if (do_rope_shift) {
|
| 6292 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 6293 |
-
}
|
| 6294 |
-
|
| 6295 |
for (int il = 0; il < n_layer; ++il) {
|
| 6296 |
struct ggml_tensor * inpSA = inpL;
|
| 6297 |
|
|
@@ -6328,14 +6379,14 @@ struct llm_build_context {
|
|
| 6328 |
|
| 6329 |
Qcur = ggml_rope_custom(
|
| 6330 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 6331 |
-
|
| 6332 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6333 |
);
|
| 6334 |
cb(Qcur, "Qcur", il);
|
| 6335 |
|
| 6336 |
Kcur = ggml_rope_custom(
|
| 6337 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6338 |
-
|
| 6339 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6340 |
);
|
| 6341 |
cb(Kcur, "Kcur", il);
|
|
@@ -6410,11 +6461,6 @@ struct llm_build_context {
|
|
| 6410 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6411 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6412 |
|
| 6413 |
-
// shift the entire K-cache if needed
|
| 6414 |
-
if (do_rope_shift) {
|
| 6415 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 6416 |
-
}
|
| 6417 |
-
|
| 6418 |
for (int il = 0; il < n_layer; ++il) {
|
| 6419 |
struct ggml_tensor * inpSA = inpL;
|
| 6420 |
|
|
@@ -6444,13 +6490,13 @@ struct llm_build_context {
|
|
| 6444 |
|
| 6445 |
// using mode = 2 for neox mode
|
| 6446 |
Qcur = ggml_rope_custom(
|
| 6447 |
-
ctx0, Qcur, inp_pos,
|
| 6448 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6449 |
);
|
| 6450 |
cb(Qcur, "Qcur", il);
|
| 6451 |
|
| 6452 |
Kcur = ggml_rope_custom(
|
| 6453 |
-
ctx0, Kcur, inp_pos,
|
| 6454 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6455 |
);
|
| 6456 |
cb(Kcur, "Kcur", il);
|
|
@@ -6524,11 +6570,6 @@ struct llm_build_context {
|
|
| 6524 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6525 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6526 |
|
| 6527 |
-
// shift the entire K-cache if needed
|
| 6528 |
-
if (do_rope_shift) {
|
| 6529 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 6530 |
-
}
|
| 6531 |
-
|
| 6532 |
for (int il = 0; il < n_layer; ++il) {
|
| 6533 |
struct ggml_tensor * inpSA = inpL;
|
| 6534 |
|
|
@@ -6564,14 +6605,14 @@ struct llm_build_context {
|
|
| 6564 |
|
| 6565 |
Qcur = ggml_rope_custom(
|
| 6566 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 6567 |
-
|
| 6568 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6569 |
);
|
| 6570 |
cb(Qcur, "Qcur", il);
|
| 6571 |
|
| 6572 |
Kcur = ggml_rope_custom(
|
| 6573 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6574 |
-
|
| 6575 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6576 |
);
|
| 6577 |
cb(Kcur, "Kcur", il);
|
|
@@ -6645,11 +6686,6 @@ struct llm_build_context {
|
|
| 6645 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6646 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6647 |
|
| 6648 |
-
// shift the entire K-cache if needed
|
| 6649 |
-
if (do_rope_shift) {
|
| 6650 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE_NEOX, n_ctx, freq_base, freq_scale, cb);
|
| 6651 |
-
}
|
| 6652 |
-
|
| 6653 |
for (int il = 0; il < n_layer; ++il) {
|
| 6654 |
attn_norm_output = llm_build_norm(ctx0, inpL, hparams,
|
| 6655 |
model.layers[il].attn_norm,
|
|
@@ -6687,7 +6723,7 @@ struct llm_build_context {
|
|
| 6687 |
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
|
| 6688 |
|
| 6689 |
Qcur = ggml_rope_custom(
|
| 6690 |
-
ctx0, Qcur, inp_pos,
|
| 6691 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6692 |
);
|
| 6693 |
cb(Qcur, "Qcur", il);
|
|
@@ -6698,7 +6734,7 @@ struct llm_build_context {
|
|
| 6698 |
cb(Qcur, "Qcur", il);
|
| 6699 |
|
| 6700 |
Kcur = ggml_rope_custom(
|
| 6701 |
-
ctx0, Kcur, inp_pos,
|
| 6702 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6703 |
);
|
| 6704 |
cb(Kcur, "Kcur", il);
|
|
@@ -6767,11 +6803,6 @@ struct llm_build_context {
|
|
| 6767 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6768 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6769 |
|
| 6770 |
-
// shift the entire K-cache if needed
|
| 6771 |
-
if (do_rope_shift) {
|
| 6772 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 6773 |
-
}
|
| 6774 |
-
|
| 6775 |
for (int il = 0; il < n_layer; ++il) {
|
| 6776 |
|
| 6777 |
// norm
|
|
@@ -6795,14 +6826,14 @@ struct llm_build_context {
|
|
| 6795 |
cb(Vcur, "Vcur", il);
|
| 6796 |
|
| 6797 |
Qcur = ggml_rope_custom(
|
| 6798 |
-
ctx0, ggml_reshape_3d(ctx0, Qcur,
|
| 6799 |
-
n_embd_head,
|
| 6800 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 6801 |
cb(Qcur, "Qcur", il);
|
| 6802 |
|
| 6803 |
Kcur = ggml_rope_custom(
|
| 6804 |
-
ctx0, ggml_reshape_3d(ctx0, Kcur,
|
| 6805 |
-
n_embd_head,
|
| 6806 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 6807 |
cb(Kcur, "Kcur", il);
|
| 6808 |
|
|
@@ -6972,11 +7003,6 @@ struct llm_build_context {
|
|
| 6972 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6973 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6974 |
|
| 6975 |
-
// shift the entire K-cache if needed
|
| 6976 |
-
if (do_rope_shift) {
|
| 6977 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 6978 |
-
}
|
| 6979 |
-
|
| 6980 |
for (int il = 0; il < n_layer; ++il) {
|
| 6981 |
cur = llm_build_norm(ctx0, inpL, hparams,
|
| 6982 |
model.layers[il].attn_norm,
|
|
@@ -7002,14 +7028,14 @@ struct llm_build_context {
|
|
| 7002 |
|
| 7003 |
struct ggml_tensor * Qcur = ggml_rope_custom(
|
| 7004 |
ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7005 |
-
|
| 7006 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7007 |
);
|
| 7008 |
cb(Qcur, "Qcur", il);
|
| 7009 |
|
| 7010 |
struct ggml_tensor * Kcur = ggml_rope_custom(
|
| 7011 |
ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7012 |
-
|
| 7013 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7014 |
);
|
| 7015 |
cb(Kcur, "Kcur", il);
|
|
@@ -7080,11 +7106,6 @@ struct llm_build_context {
|
|
| 7080 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7081 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7082 |
|
| 7083 |
-
// shift the entire K-cache if needed
|
| 7084 |
-
if (do_rope_shift) {
|
| 7085 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 7086 |
-
}
|
| 7087 |
-
|
| 7088 |
for (int il = 0; il < n_layer; ++il) {
|
| 7089 |
struct ggml_tensor * inpSA = inpL;
|
| 7090 |
|
|
@@ -7120,14 +7141,14 @@ struct llm_build_context {
|
|
| 7120 |
|
| 7121 |
Qcur = ggml_rope_custom(
|
| 7122 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7123 |
-
|
| 7124 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7125 |
);
|
| 7126 |
cb(Qcur, "Qcur", il);
|
| 7127 |
|
| 7128 |
Kcur = ggml_rope_custom(
|
| 7129 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7130 |
-
|
| 7131 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7132 |
);
|
| 7133 |
cb(Kcur, "Kcur", il);
|
|
@@ -7199,11 +7220,6 @@ struct llm_build_context {
|
|
| 7199 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7200 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7201 |
|
| 7202 |
-
// shift the entire K-cache if needed
|
| 7203 |
-
if (do_rope_shift) {
|
| 7204 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 7205 |
-
}
|
| 7206 |
-
|
| 7207 |
for (int il = 0; il < n_layer; ++il) {
|
| 7208 |
struct ggml_tensor * inpSA = inpL;
|
| 7209 |
|
|
@@ -7239,14 +7255,14 @@ struct llm_build_context {
|
|
| 7239 |
|
| 7240 |
Qcur = ggml_rope_custom(
|
| 7241 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7242 |
-
|
| 7243 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7244 |
);
|
| 7245 |
cb(Qcur, "Qcur", il);
|
| 7246 |
|
| 7247 |
Kcur = ggml_rope_custom(
|
| 7248 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7249 |
-
|
| 7250 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7251 |
);
|
| 7252 |
cb(Kcur, "Kcur", il);
|
|
@@ -7331,11 +7347,6 @@ struct llm_build_context {
|
|
| 7331 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7332 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7333 |
|
| 7334 |
-
// shift the entire K-cache if needed
|
| 7335 |
-
if (do_rope_shift) {
|
| 7336 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 7337 |
-
}
|
| 7338 |
-
|
| 7339 |
for (int il = 0; il < n_layer; ++il) {
|
| 7340 |
struct ggml_tensor * inpSA = inpL;
|
| 7341 |
|
|
@@ -7371,14 +7382,14 @@ struct llm_build_context {
|
|
| 7371 |
|
| 7372 |
Qcur = ggml_rope_custom(
|
| 7373 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7374 |
-
|
| 7375 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7376 |
);
|
| 7377 |
cb(Qcur, "Qcur", il);
|
| 7378 |
|
| 7379 |
Kcur = ggml_rope_custom(
|
| 7380 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7381 |
-
|
| 7382 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7383 |
);
|
| 7384 |
cb(Kcur, "Kcur", il);
|
|
@@ -7467,11 +7478,6 @@ struct llm_build_context {
|
|
| 7467 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7468 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7469 |
|
| 7470 |
-
// shift the entire K-cache if needed
|
| 7471 |
-
if (do_rope_shift) {
|
| 7472 |
-
llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb);
|
| 7473 |
-
}
|
| 7474 |
-
|
| 7475 |
for (int il = 0; il < n_layer; ++il) {
|
| 7476 |
|
| 7477 |
// norm
|
|
@@ -7494,7 +7500,7 @@ struct llm_build_context {
|
|
| 7494 |
|
| 7495 |
Qcur = ggml_rope_custom(
|
| 7496 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos,
|
| 7497 |
-
n_embd_head_k,
|
| 7498 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 7499 |
cb(Qcur, "Qcur", il);
|
| 7500 |
|
|
@@ -7503,7 +7509,7 @@ struct llm_build_context {
|
|
| 7503 |
|
| 7504 |
Kcur = ggml_rope_custom(
|
| 7505 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos,
|
| 7506 |
-
n_embd_head_k,
|
| 7507 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 7508 |
cb(Kcur, "Kcur", il);
|
| 7509 |
|
|
@@ -7556,6 +7562,40 @@ struct llm_build_context {
|
|
| 7556 |
}
|
| 7557 |
};
|
| 7558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7559 |
static struct ggml_cgraph * llama_build_graph(
|
| 7560 |
llama_context & lctx,
|
| 7561 |
const llama_batch & batch,
|
|
@@ -7675,6 +7715,20 @@ static struct ggml_cgraph * llama_build_graph(
|
|
| 7675 |
return result;
|
| 7676 |
}
|
| 7677 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7678 |
static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
|
| 7679 |
//
|
| 7680 |
// set input data
|
|
@@ -7742,18 +7796,6 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
|
|
| 7742 |
}
|
| 7743 |
}
|
| 7744 |
|
| 7745 |
-
if (kv_self.has_shift) {
|
| 7746 |
-
const int64_t n_ctx = cparams.n_ctx;
|
| 7747 |
-
|
| 7748 |
-
assert(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer));
|
| 7749 |
-
|
| 7750 |
-
int32_t * data = (int32_t *) lctx.inp_K_shift->data;
|
| 7751 |
-
|
| 7752 |
-
for (int i = 0; i < n_ctx; ++i) {
|
| 7753 |
-
data[i] = lctx.kv_self.cells[i].delta;
|
| 7754 |
-
}
|
| 7755 |
-
}
|
| 7756 |
-
|
| 7757 |
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
|
| 7758 |
const int64_t n_tokens = batch.n_tokens;
|
| 7759 |
|
|
@@ -7798,6 +7840,34 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
|
|
| 7798 |
}
|
| 7799 |
}
|
| 7800 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7801 |
// decode a batch of tokens by evaluating the transformer
|
| 7802 |
//
|
| 7803 |
// - lctx: llama context
|
|
@@ -7875,6 +7945,8 @@ static int llama_decode_internal(
|
|
| 7875 |
batch.seq_id = seq_id_arr.data();
|
| 7876 |
}
|
| 7877 |
|
|
|
|
|
|
|
| 7878 |
// if we have enough unused cells before the current head ->
|
| 7879 |
// better to start searching from the beginning of the cache, hoping to fill it
|
| 7880 |
if (kv_self.head > kv_self.used + 2*n_tokens) {
|
|
@@ -7899,8 +7971,9 @@ static int llama_decode_internal(
|
|
| 7899 |
ggml_cgraph * gf = llama_build_graph(lctx, batch, false);
|
| 7900 |
|
| 7901 |
// the output is always the last tensor in the graph
|
| 7902 |
-
struct ggml_tensor * res
|
| 7903 |
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
|
|
|
|
| 7904 |
if (strcmp(res->name, "result_output") == 0) {
|
| 7905 |
// the embeddings could be the second to last tensor, or the third to last tensor
|
| 7906 |
if (strcmp(embeddings->name, "result_norm") != 0) {
|
|
@@ -7927,40 +8000,12 @@ static int llama_decode_internal(
|
|
| 7927 |
n_threads = std::min(4, n_threads);
|
| 7928 |
}
|
| 7929 |
|
| 7930 |
-
#ifdef GGML_USE_MPI
|
| 7931 |
-
const int64_t n_layer = hparams.n_layer;
|
| 7932 |
-
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
| 7933 |
-
#endif
|
| 7934 |
-
|
| 7935 |
-
#ifdef GGML_USE_METAL
|
| 7936 |
-
if (ggml_backend_is_metal(lctx.backend_metal)) {
|
| 7937 |
-
ggml_backend_metal_set_n_cb(lctx.backend_metal, n_threads);
|
| 7938 |
-
}
|
| 7939 |
-
#endif
|
| 7940 |
-
|
| 7941 |
-
if (lctx.backend_cpu != nullptr) {
|
| 7942 |
-
ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
|
| 7943 |
-
}
|
| 7944 |
-
|
| 7945 |
llama_set_inputs(lctx, batch);
|
| 7946 |
|
| 7947 |
-
|
| 7948 |
-
|
| 7949 |
-
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
|
| 7950 |
-
|
| 7951 |
-
#ifdef GGML_USE_MPI
|
| 7952 |
-
ggml_mpi_graph_compute_post(lctx.ctx_mpi, gf, n_layer);
|
| 7953 |
-
#endif
|
| 7954 |
|
| 7955 |
// update the kv ring buffer
|
| 7956 |
{
|
| 7957 |
-
if (kv_self.has_shift) {
|
| 7958 |
-
kv_self.has_shift = false;
|
| 7959 |
-
for (uint32_t i = 0; i < kv_self.size; ++i) {
|
| 7960 |
-
kv_self.cells[i].delta = 0;
|
| 7961 |
-
}
|
| 7962 |
-
}
|
| 7963 |
-
|
| 7964 |
kv_self.head += n_tokens;
|
| 7965 |
|
| 7966 |
// Ensure kv cache head points to a valid index.
|
|
@@ -7969,6 +8014,18 @@ static int llama_decode_internal(
|
|
| 7969 |
}
|
| 7970 |
}
|
| 7971 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7972 |
#ifdef GGML_PERF
|
| 7973 |
// print timing information per ggml operation (for debugging purposes)
|
| 7974 |
// requires GGML_PERF to be defined
|
|
@@ -8056,6 +8113,245 @@ static int llama_decode_internal(
|
|
| 8056 |
return 0;
|
| 8057 |
}
|
| 8058 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8059 |
//
|
| 8060 |
// tokenizer
|
| 8061 |
//
|
|
@@ -8647,37 +8943,46 @@ struct llm_tokenizer_wpm {
|
|
| 8647 |
}
|
| 8648 |
|
| 8649 |
std::vector<std::string> preprocess(const std::string & text) {
|
| 8650 |
-
|
| 8651 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8652 |
|
| 8653 |
-
//
|
| 8654 |
-
//
|
| 8655 |
-
std::vector<std::string> words;
|
| 8656 |
std::string new_str = "";
|
| 8657 |
-
|
| 8658 |
-
|
| 8659 |
-
|
| 8660 |
-
|
| 8661 |
-
|
| 8662 |
-
|
| 8663 |
-
|
| 8664 |
-
|
| 8665 |
}
|
| 8666 |
-
|
|
|
|
| 8667 |
new_str += " ";
|
| 8668 |
-
new_str +=
|
| 8669 |
new_str += " ";
|
| 8670 |
-
|
| 8671 |
-
|
| 8672 |
-
else {
|
| 8673 |
-
new_str += ori_str[i];
|
| 8674 |
-
i += 1;
|
| 8675 |
}
|
| 8676 |
}
|
| 8677 |
|
| 8678 |
// split by whitespace
|
| 8679 |
uint64_t l = 0;
|
| 8680 |
uint64_t r = 0;
|
|
|
|
| 8681 |
while (r < new_str.size()) {
|
| 8682 |
// if is whitespace
|
| 8683 |
if (isspace(new_str[r])) {
|
|
@@ -8695,47 +9000,20 @@ struct llm_tokenizer_wpm {
|
|
| 8695 |
return words;
|
| 8696 |
}
|
| 8697 |
|
| 8698 |
-
|
| 8699 |
-
|
| 8700 |
-
|
| 8701 |
-
|
| 8702 |
-
char c = text2[i];
|
| 8703 |
-
if (c >= 'A' && c <= 'Z') {
|
| 8704 |
-
text2[i] = c - 'A' + 'a';
|
| 8705 |
-
}
|
| 8706 |
}
|
| 8707 |
-
|
|
|
|
| 8708 |
}
|
| 8709 |
|
| 8710 |
-
bool
|
| 8711 |
-
|
| 8712 |
-
|
| 8713 |
-
|
| 8714 |
-
|
| 8715 |
-
unsigned char ch = static_cast<unsigned char>(str[i]);
|
| 8716 |
-
if (ch <= 0x7f) {
|
| 8717 |
-
codepoint = ch;
|
| 8718 |
-
num_bytes = 1;
|
| 8719 |
-
} else if ((ch >> 5) == 0x06) {
|
| 8720 |
-
codepoint = ch & 0x1f;
|
| 8721 |
-
num_bytes = 2;
|
| 8722 |
-
} else if ((ch >> 4) == 0x0e) {
|
| 8723 |
-
codepoint = ch & 0x0f;
|
| 8724 |
-
num_bytes = 3;
|
| 8725 |
-
} else if ((ch >> 3) == 0x1e) {
|
| 8726 |
-
codepoint = ch & 0x07;
|
| 8727 |
-
num_bytes = 4;
|
| 8728 |
-
}
|
| 8729 |
-
for (int j = 1; j < num_bytes; ++j) {
|
| 8730 |
-
if (i + j >= len) {
|
| 8731 |
-
return false; // incomplete UTF-8 character
|
| 8732 |
-
}
|
| 8733 |
-
unsigned char next_ch = static_cast<unsigned char>(str[i + j]);
|
| 8734 |
-
if ((next_ch >> 6) != 0x02) {
|
| 8735 |
-
return false; // invalid trailing byte
|
| 8736 |
-
}
|
| 8737 |
-
codepoint = (codepoint << 6) | (next_ch & 0x3f);
|
| 8738 |
-
}
|
| 8739 |
if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) ||
|
| 8740 |
(codepoint >= 0x3400 && codepoint <= 0x4DBF) ||
|
| 8741 |
(codepoint >= 0x20000 && codepoint <= 0x2A6DF) ||
|
|
@@ -8751,41 +9029,6 @@ struct llm_tokenizer_wpm {
|
|
| 8751 |
return false;
|
| 8752 |
}
|
| 8753 |
|
| 8754 |
-
std::string strip_accents(const std::string & input_string) {
|
| 8755 |
-
std::string resultString;
|
| 8756 |
-
std::map<std::string, char> accent_map = {
|
| 8757 |
-
{"À", 'A'}, {"Á", 'A'}, {"Â", 'A'}, {"Ã", 'A'}, {"Ä", 'A'}, {"Å", 'A'},
|
| 8758 |
-
{"à", 'a'}, {"á", 'a'}, {"â", 'a'}, {"ã", 'a'}, {"ä", 'a'}, {"å", 'a'},
|
| 8759 |
-
{"È", 'E'}, {"É", 'E'}, {"Ê", 'E'}, {"Ë", 'E'}, {"è", 'e'}, {"é", 'e'},
|
| 8760 |
-
{"ê", 'e'}, {"ë", 'e'}, {"Ì", 'I'}, {"Í", 'I'}, {"Î", 'I'}, {"Ï", 'I'},
|
| 8761 |
-
{"ì", 'i'}, {"í", 'i'}, {"î", 'i'}, {"ï", 'i'}, {"Ò", 'O'}, {"Ó", 'O'},
|
| 8762 |
-
{"Ô", 'O'}, {"Õ", 'O'}, {"Ö", 'O'}, {"ò", 'o'}, {"ó", 'o'}, {"ô", 'o'},
|
| 8763 |
-
{"õ", 'o'}, {"ö", 'o'}, {"Ù", 'U'}, {"Ú", 'U'}, {"Û", 'U'}, {"Ü", 'U'},
|
| 8764 |
-
{"ù", 'u'}, {"ú", 'u'}, {"û", 'u'}, {"ü", 'u'}, {"Ý", 'Y'}, {"ý", 'y'},
|
| 8765 |
-
{"Ç", 'C'}, {"ç", 'c'}, {"Ñ", 'N'}, {"ñ", 'n'},
|
| 8766 |
-
};
|
| 8767 |
-
|
| 8768 |
-
for (size_t i = 0; i < input_string.length();) {
|
| 8769 |
-
int len = utf8_len(input_string[i]);
|
| 8770 |
-
std::string curChar = input_string.substr(i, len);
|
| 8771 |
-
auto iter = accent_map.find(curChar);
|
| 8772 |
-
if (iter != accent_map.end()) {
|
| 8773 |
-
resultString += iter->second;
|
| 8774 |
-
} else {
|
| 8775 |
-
resultString += curChar;
|
| 8776 |
-
}
|
| 8777 |
-
i += len;
|
| 8778 |
-
}
|
| 8779 |
-
|
| 8780 |
-
return resultString;
|
| 8781 |
-
}
|
| 8782 |
-
|
| 8783 |
-
static size_t utf8_len(char src) {
|
| 8784 |
-
const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
|
| 8785 |
-
uint8_t highbits = static_cast<uint8_t>(src) >> 4;
|
| 8786 |
-
return lookup[highbits];
|
| 8787 |
-
}
|
| 8788 |
-
|
| 8789 |
const llama_vocab & vocab;
|
| 8790 |
};
|
| 8791 |
|
|
@@ -10511,31 +10754,47 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10511 |
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
|
| 10512 |
new_type = GGML_TYPE_Q8_0;
|
| 10513 |
}
|
| 10514 |
-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype ==
|
|
|
|
| 10515 |
new_type = GGML_TYPE_Q5_K;
|
| 10516 |
}
|
| 10517 |
else if (new_type != GGML_TYPE_Q8_0) {
|
| 10518 |
new_type = GGML_TYPE_Q6_K;
|
| 10519 |
}
|
| 10520 |
} else if (name == "token_embd.weight") {
|
| 10521 |
-
if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
|
|
|
|
| 10522 |
new_type = GGML_TYPE_Q2_K;
|
| 10523 |
}
|
|
|
|
|
|
|
|
|
|
| 10524 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10525 |
-
new_type =
|
| 10526 |
}
|
| 10527 |
-
} else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S
|
|
|
|
| 10528 |
if (name.find("attn_v.weight") != std::string::npos) {
|
| 10529 |
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
|
| 10530 |
-
else new_type = GGML_TYPE_Q2_K;
|
| 10531 |
++qs.i_attention_wv;
|
| 10532 |
}
|
|
|
|
|
|
|
|
|
|
| 10533 |
else if (name.find("ffn_down") != std::string::npos) {
|
| 10534 |
-
if (qs.i_ffn_down < qs.n_ffn_down/8)
|
|
|
|
|
|
|
| 10535 |
++qs.i_ffn_down;
|
| 10536 |
}
|
| 10537 |
else if (name.find("attn_output.weight") != std::string::npos) {
|
| 10538 |
-
if (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10539 |
}
|
| 10540 |
} else if (name.find("attn_v.weight") != std::string::npos) {
|
| 10541 |
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
|
|
@@ -10545,7 +10804,13 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10545 |
new_type = GGML_TYPE_Q4_K;
|
| 10546 |
}
|
| 10547 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10548 |
-
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10549 |
}
|
| 10550 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
|
| 10551 |
new_type = GGML_TYPE_Q4_K;
|
|
@@ -10557,7 +10822,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10557 |
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
|
| 10558 |
}
|
| 10559 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
|
| 10560 |
-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL && qs.model.hparams.n_gqa() >= 4) {
|
| 10561 |
new_type = GGML_TYPE_Q5_K;
|
| 10562 |
}
|
| 10563 |
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
|
|
@@ -10583,13 +10848,19 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10583 |
// TODO: explore better strategies
|
| 10584 |
new_type = GGML_TYPE_Q8_0;
|
| 10585 |
}
|
| 10586 |
-
else if (ftype ==
|
| 10587 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10588 |
}
|
|
|
|
|
|
|
|
|
|
| 10589 |
} else if (name.find("attn_q.weight") != std::string::npos) {
|
| 10590 |
-
if (ftype ==
|
| 10591 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10592 |
}
|
|
|
|
|
|
|
|
|
|
| 10593 |
} else if (name.find("ffn_down") != std::string::npos) {
|
| 10594 |
auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
|
| 10595 |
int i_layer = info.first, n_layer = info.second;
|
|
@@ -10620,8 +10891,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10620 |
if (use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
|
| 10621 |
}
|
| 10622 |
}
|
| 10623 |
-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL && !qs.has_imatrix) {
|
| 10624 |
-
|
| 10625 |
}
|
| 10626 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M && use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
|
| 10627 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && arch != LLM_ARCH_FALCON && i_layer < n_layer/8) {
|
|
@@ -10638,15 +10909,15 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10638 |
} else if (name.find("attn_output.weight") != std::string::npos) {
|
| 10639 |
if (arch != LLM_ARCH_FALCON) {
|
| 10640 |
if (qs.model.hparams.n_expert == 8) {
|
| 10641 |
-
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype ==
|
| 10642 |
ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
|
| 10643 |
ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
|
| 10644 |
-
ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
|
| 10645 |
new_type = GGML_TYPE_Q5_K;
|
| 10646 |
}
|
| 10647 |
} else {
|
| 10648 |
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
|
| 10649 |
-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type =
|
| 10650 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
|
| 10651 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ) new_type = GGML_TYPE_Q5_K;
|
| 10652 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ) new_type = GGML_TYPE_Q4_K;
|
|
@@ -10665,7 +10936,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10665 |
else if (name.find("ffn_gate") != std::string::npos) {
|
| 10666 |
auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
|
| 10667 |
int i_layer = info.first, n_layer = info.second;
|
| 10668 |
-
if (ftype ==
|
| 10669 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10670 |
}
|
| 10671 |
++qs.i_ffn_gate;
|
|
@@ -10673,7 +10944,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10673 |
else if (name.find("ffn_up") != std::string::npos) {
|
| 10674 |
auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
|
| 10675 |
int i_layer = info.first, n_layer = info.second;
|
| 10676 |
-
if (ftype ==
|
| 10677 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10678 |
}
|
| 10679 |
++qs.i_ffn_up;
|
|
@@ -10692,8 +10963,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10692 |
//}
|
| 10693 |
bool convert_incompatible_tensor = false;
|
| 10694 |
if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
|
| 10695 |
-
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K ||
|
| 10696 |
-
new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS ||
|
| 10697 |
new_type == GGML_TYPE_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || new_type == GGML_TYPE_IQ3_S) {
|
| 10698 |
int nx = tensor->ne[0];
|
| 10699 |
int ny = tensor->ne[1];
|
|
@@ -10708,14 +10979,16 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|
| 10708 |
switch (new_type) {
|
| 10709 |
case GGML_TYPE_IQ2_XXS:
|
| 10710 |
case GGML_TYPE_IQ2_XS:
|
|
|
|
| 10711 |
case GGML_TYPE_IQ3_XXS:
|
| 10712 |
case GGML_TYPE_IQ3_S:
|
| 10713 |
case GGML_TYPE_IQ1_S:
|
| 10714 |
case GGML_TYPE_Q2_K:
|
| 10715 |
-
case GGML_TYPE_Q3_K:
|
| 10716 |
-
case
|
| 10717 |
-
case
|
| 10718 |
-
case
|
|
|
|
| 10719 |
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
| 10720 |
}
|
| 10721 |
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
|
|
@@ -10741,7 +11014,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|
| 10741 |
// K-quants
|
| 10742 |
case LLAMA_FTYPE_MOSTLY_Q2_K_S:
|
| 10743 |
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
|
| 10744 |
-
case
|
| 10745 |
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
|
| 10746 |
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
|
| 10747 |
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
|
|
@@ -10752,9 +11025,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|
| 10752 |
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
|
| 10753 |
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break;
|
| 10754 |
case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break;
|
|
|
|
|
|
|
| 10755 |
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break;
|
| 10756 |
case LLAMA_FTYPE_MOSTLY_IQ1_S: quantized_type = GGML_TYPE_IQ1_S; break;
|
| 10757 |
case LLAMA_FTYPE_MOSTLY_IQ4_NL: quantized_type = GGML_TYPE_IQ4_NL; break;
|
|
|
|
| 10758 |
case LLAMA_FTYPE_MOSTLY_IQ3_S: quantized_type = GGML_TYPE_IQ3_S; break;
|
| 10759 |
case LLAMA_FTYPE_MOSTLY_IQ3_M: quantized_type = GGML_TYPE_IQ3_S; break;
|
| 10760 |
|
|
@@ -10886,7 +11162,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|
| 10886 |
quantize &= !params->only_copy;
|
| 10887 |
|
| 10888 |
// do not quantize expert gating tensors
|
| 10889 |
-
|
|
|
|
| 10890 |
|
| 10891 |
// do not quantize positional embeddings and token types (BERT)
|
| 10892 |
quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight");
|
|
@@ -10930,6 +11207,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|
| 10930 |
}
|
| 10931 |
if ((new_type == GGML_TYPE_IQ2_XXS ||
|
| 10932 |
new_type == GGML_TYPE_IQ2_XS ||
|
|
|
|
| 10933 |
new_type == GGML_TYPE_IQ1_S ||
|
| 10934 |
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
|
| 10935 |
LLAMA_LOG_ERROR("\n\n============================================================\n");
|
|
@@ -11385,6 +11663,7 @@ struct llama_context_params llama_context_default_params() {
|
|
| 11385 |
/*.yarn_beta_fast =*/ 32.0f,
|
| 11386 |
/*.yarn_beta_slow =*/ 1.0f,
|
| 11387 |
/*.yarn_orig_ctx =*/ 0,
|
|
|
|
| 11388 |
/*.cb_eval =*/ nullptr,
|
| 11389 |
/*.cb_eval_user_data =*/ nullptr,
|
| 11390 |
/*.type_k =*/ GGML_TYPE_F16,
|
|
@@ -11549,6 +11828,7 @@ struct llama_context * llama_new_context_with_model(
|
|
| 11549 |
cparams.yarn_attn_factor = params.yarn_attn_factor;
|
| 11550 |
cparams.yarn_beta_fast = params.yarn_beta_fast;
|
| 11551 |
cparams.yarn_beta_slow = params.yarn_beta_slow;
|
|
|
|
| 11552 |
cparams.mul_mat_q = params.mul_mat_q;
|
| 11553 |
cparams.offload_kqv = params.offload_kqv;
|
| 11554 |
cparams.do_pooling = params.do_pooling;
|
|
@@ -11671,8 +11951,7 @@ struct llama_context * llama_new_context_with_model(
|
|
| 11671 |
}
|
| 11672 |
ctx->backends.push_back(ctx->backend_cpu);
|
| 11673 |
|
| 11674 |
-
if (!llama_kv_cache_init(ctx->kv_self, ctx->model, type_k, type_v,
|
| 11675 |
-
cparams.n_ctx, cparams.offload_kqv)) {
|
| 11676 |
LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
|
| 11677 |
llama_free(ctx);
|
| 11678 |
return nullptr;
|
|
@@ -11751,7 +12030,7 @@ struct llama_context * llama_new_context_with_model(
|
|
| 11751 |
}
|
| 11752 |
|
| 11753 |
// buffer used to store the computation graph and the tensor meta data
|
| 11754 |
-
ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES +
|
| 11755 |
|
| 11756 |
ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES);
|
| 11757 |
|
|
@@ -11820,6 +12099,49 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
|
|
| 11820 |
return model->vocab.type;
|
| 11821 |
}
|
| 11822 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11823 |
int32_t llama_n_vocab(const struct llama_model * model) {
|
| 11824 |
return model->vocab.id_to_token.size();
|
| 11825 |
}
|
|
@@ -12062,12 +12384,12 @@ void llama_kv_cache_seq_keep(struct llama_context * ctx, llama_seq_id seq_id) {
|
|
| 12062 |
llama_kv_cache_seq_keep(ctx->kv_self, seq_id);
|
| 12063 |
}
|
| 12064 |
|
| 12065 |
-
void
|
| 12066 |
if (delta == 0) {
|
| 12067 |
return;
|
| 12068 |
}
|
| 12069 |
|
| 12070 |
-
|
| 12071 |
}
|
| 12072 |
|
| 12073 |
void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
|
|
@@ -12078,6 +12400,19 @@ void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, lla
|
|
| 12078 |
llama_kv_cache_seq_div(ctx->kv_self, seq_id, p0, p1, d);
|
| 12079 |
}
|
| 12080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12081 |
// Returns the *maximum* size of the state
|
| 12082 |
size_t llama_get_state_size(const struct llama_context * ctx) {
|
| 12083 |
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
|
|
@@ -12204,10 +12539,10 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|
| 12204 |
const auto & hparams = ctx->model.hparams;
|
| 12205 |
const auto & cparams = ctx->cparams;
|
| 12206 |
|
| 12207 |
-
const
|
| 12208 |
-
const
|
| 12209 |
-
const
|
| 12210 |
-
const
|
| 12211 |
|
| 12212 |
const size_t kv_buf_size = kv_self.total_size();
|
| 12213 |
const uint32_t kv_head = kv_self.head;
|
|
@@ -12222,14 +12557,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|
| 12222 |
if (kv_buf_size) {
|
| 12223 |
std::vector<uint8_t> tmp_buf;
|
| 12224 |
for (int il = 0; il < (int) n_layer; ++il) {
|
| 12225 |
-
size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
|
|
|
|
| 12226 |
tmp_buf.resize(k_size);
|
| 12227 |
ggml_backend_tensor_get(kv_self.k_l[il], tmp_buf.data(), 0, tmp_buf.size());
|
| 12228 |
data_ctx->write(tmp_buf.data(), tmp_buf.size());
|
| 12229 |
|
| 12230 |
// v is not contiguous, copy row by row
|
| 12231 |
-
size_t v_row_size
|
| 12232 |
-
size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
|
|
|
|
| 12233 |
tmp_buf.resize(v_row_size);
|
| 12234 |
for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
|
| 12235 |
ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), ir*v_row_stride, tmp_buf.size());
|
|
@@ -12316,10 +12653,10 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|
| 12316 |
const auto & hparams = ctx->model.hparams;
|
| 12317 |
const auto & cparams = ctx->cparams;
|
| 12318 |
|
| 12319 |
-
const
|
| 12320 |
-
const
|
| 12321 |
-
const
|
| 12322 |
-
const
|
| 12323 |
|
| 12324 |
size_t kv_buf_size;
|
| 12325 |
uint32_t kv_head;
|
|
@@ -12335,13 +12672,15 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|
| 12335 |
GGML_ASSERT(kv_self.total_size() == kv_buf_size);
|
| 12336 |
|
| 12337 |
for (int il = 0; il < (int) n_layer; ++il) {
|
| 12338 |
-
size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
|
|
|
|
| 12339 |
ggml_backend_tensor_set(kv_self.k_l[il], inp, 0, k_size);
|
| 12340 |
inp += k_size;
|
| 12341 |
|
| 12342 |
// v is not contiguous, copy row by row
|
| 12343 |
-
size_t v_row_size
|
| 12344 |
-
size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
|
|
|
|
| 12345 |
for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
|
| 12346 |
ggml_backend_tensor_set(kv_self.v_l[il], inp, ir*v_row_stride, v_row_size);
|
| 12347 |
inp += v_row_size;
|
|
|
|
| 68 |
#include <cstdio>
|
| 69 |
#include <cstring>
|
| 70 |
#include <ctime>
|
| 71 |
+
#include <cwctype>
|
| 72 |
#include <forward_list>
|
| 73 |
#include <fstream>
|
| 74 |
#include <functional>
|
| 75 |
#include <initializer_list>
|
| 76 |
+
#include <locale>
|
| 77 |
#include <map>
|
| 78 |
#include <memory>
|
| 79 |
#include <mutex>
|
|
|
|
| 1552 |
static const size_t GiB = 1024*MiB;
|
| 1553 |
|
| 1554 |
struct llama_hparams {
|
| 1555 |
+
bool vocab_only;
|
| 1556 |
+
bool rope_finetuned;
|
| 1557 |
+
|
| 1558 |
uint32_t n_vocab;
|
| 1559 |
uint32_t n_ctx_train; // context size the model was trained on
|
| 1560 |
uint32_t n_embd;
|
|
|
|
| 1583 |
bool causal_attn = true;
|
| 1584 |
bool need_kq_pos = false;
|
| 1585 |
|
| 1586 |
+
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
|
| 1587 |
+
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
|
| 1588 |
|
| 1589 |
bool operator!=(const llama_hparams & other) const {
|
| 1590 |
if (this->vocab_only != other.vocab_only) return true;
|
|
|
|
| 1643 |
float yarn_attn_factor;
|
| 1644 |
float yarn_beta_fast;
|
| 1645 |
float yarn_beta_slow;
|
| 1646 |
+
float defrag_thold;
|
| 1647 |
|
| 1648 |
bool mul_mat_q;
|
| 1649 |
bool offload_kqv;
|
|
|
|
| 1712 |
bool has_seq_id(const llama_seq_id & id) const {
|
| 1713 |
return seq_id.find(id) != seq_id.end();
|
| 1714 |
}
|
| 1715 |
+
|
| 1716 |
+
bool is_empty() const {
|
| 1717 |
+
return seq_id.empty();
|
| 1718 |
+
}
|
| 1719 |
+
|
| 1720 |
+
bool is_same_seq(const llama_kv_cell & other) const {
|
| 1721 |
+
return seq_id == other.seq_id;
|
| 1722 |
+
}
|
| 1723 |
};
|
| 1724 |
|
| 1725 |
// ring-buffer of cached KV data
|
| 1726 |
struct llama_kv_cache {
|
| 1727 |
bool has_shift = false;
|
| 1728 |
+
bool do_defrag = false;
|
| 1729 |
|
| 1730 |
// Note: The value of head isn't only used to optimize searching
|
| 1731 |
// for a free KV slot. llama_decode_internal also uses it, so it
|
|
|
|
| 1737 |
// computed before each graph build
|
| 1738 |
uint32_t n = 0;
|
| 1739 |
|
| 1740 |
+
ggml_type type_k = GGML_TYPE_F16;
|
| 1741 |
+
ggml_type type_v = GGML_TYPE_F16;
|
| 1742 |
+
|
| 1743 |
std::vector<llama_kv_cell> cells;
|
| 1744 |
|
| 1745 |
std::vector<struct ggml_tensor *> k_l; // per layer
|
|
|
|
| 1975 |
static bool llama_kv_cache_init(
|
| 1976 |
struct llama_kv_cache & cache,
|
| 1977 |
const llama_model & model,
|
| 1978 |
+
ggml_type type_k,
|
| 1979 |
+
ggml_type type_v,
|
| 1980 |
uint32_t n_ctx,
|
| 1981 |
bool offload) {
|
| 1982 |
const struct llama_hparams & hparams = model.hparams;
|
|
|
|
| 1991 |
cache.size = n_ctx;
|
| 1992 |
cache.used = 0;
|
| 1993 |
|
| 1994 |
+
cache.type_k = type_k;
|
| 1995 |
+
cache.type_v = type_v;
|
| 1996 |
+
|
| 1997 |
cache.cells.clear();
|
| 1998 |
cache.cells.resize(n_ctx);
|
| 1999 |
|
|
|
|
| 2034 |
|
| 2035 |
for (int i = 0; i < (int) n_layer; i++) {
|
| 2036 |
struct ggml_context * ctx = offload ? ctx_map.at(model.buft_layer[i].buft) : cache.ctxs.front();
|
| 2037 |
+
ggml_tensor * k = ggml_new_tensor_1d(ctx, type_k, n_embd_k_gqa*n_ctx);
|
| 2038 |
+
ggml_tensor * v = ggml_new_tensor_1d(ctx, type_v, n_embd_v_gqa*n_ctx);
|
| 2039 |
ggml_format_name(k, "cache_k_l%d", i);
|
| 2040 |
ggml_format_name(v, "cache_v_l%d", i);
|
| 2041 |
cache.k_l.push_back(k);
|
|
|
|
| 2119 |
// find how many cells are currently in use
|
| 2120 |
static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
|
| 2121 |
for (uint32_t i = cache.size - 1; i > 0; --i) {
|
| 2122 |
+
if (cache.cells[i].pos >= 0 && !cache.cells[i].is_empty()) {
|
| 2123 |
return i + 1;
|
| 2124 |
}
|
| 2125 |
}
|
|
|
|
| 2155 |
} else {
|
| 2156 |
continue;
|
| 2157 |
}
|
| 2158 |
+
if (cache.cells[i].is_empty()) {
|
| 2159 |
// keep count of the number of used cells
|
| 2160 |
if (cache.cells[i].pos >= 0) cache.used--;
|
| 2161 |
|
|
|
|
| 2206 |
if (new_head != cache.size && new_head < cache.head) cache.head = new_head;
|
| 2207 |
}
|
| 2208 |
|
| 2209 |
+
static void llama_kv_cache_seq_add(
|
| 2210 |
struct llama_kv_cache & cache,
|
| 2211 |
llama_seq_id seq_id,
|
| 2212 |
llama_pos p0,
|
|
|
|
| 2224 |
cache.cells[i].delta += delta;
|
| 2225 |
|
| 2226 |
if (cache.cells[i].pos < 0) {
|
| 2227 |
+
if (!cache.cells[i].is_empty()) {
|
| 2228 |
+
cache.used--;
|
| 2229 |
+
}
|
| 2230 |
cache.cells[i].pos = -1;
|
| 2231 |
cache.cells[i].seq_id.clear();
|
| 2232 |
+
if (new_head == cache.size) {
|
| 2233 |
+
new_head = i;
|
| 2234 |
+
}
|
| 2235 |
}
|
| 2236 |
}
|
| 2237 |
}
|
|
|
|
| 2263 |
}
|
| 2264 |
}
|
| 2265 |
|
| 2266 |
+
static llama_pos llama_kv_cache_seq_pos_max(struct llama_kv_cache & cache, llama_seq_id seq_id) {
|
| 2267 |
+
llama_pos result = 0;
|
| 2268 |
+
|
| 2269 |
+
for (uint32_t i = 0; i < cache.size; ++i) {
|
| 2270 |
+
if (cache.cells[i].has_seq_id(seq_id)) {
|
| 2271 |
+
result = std::max(result, cache.cells[i].pos);
|
| 2272 |
+
}
|
| 2273 |
+
}
|
| 2274 |
+
|
| 2275 |
+
return result;
|
| 2276 |
+
}
|
| 2277 |
+
|
| 2278 |
+
static void llama_kv_cache_defrag(struct llama_kv_cache & cache) {
|
| 2279 |
+
cache.do_defrag = true;
|
| 2280 |
+
}
|
| 2281 |
+
|
| 2282 |
//
|
| 2283 |
// model loading and saving
|
| 2284 |
//
|
|
|
|
| 2350 |
}
|
| 2351 |
};
|
| 2352 |
|
| 2353 |
+
struct ArrayInfo {
|
| 2354 |
const gguf_type gt;
|
| 2355 |
const size_t length;
|
| 2356 |
const void * data;
|
|
|
|
| 2369 |
};
|
| 2370 |
|
| 2371 |
template<typename T>
|
| 2372 |
+
class GKV : public GKV_Base<T> {
|
| 2373 |
GKV() = delete;
|
| 2374 |
|
| 2375 |
public:
|
|
|
|
| 2392 |
return "unknown";
|
| 2393 |
}
|
| 2394 |
|
| 2395 |
+
static bool validate_override(const llama_model_kv_override_type expected_type, const struct llama_model_kv_override * ovrd) {
|
| 2396 |
+
if (!ovrd) { return false; }
|
| 2397 |
+
if (ovrd->tag == expected_type) {
|
| 2398 |
LLAMA_LOG_INFO("%s: Using metadata override (%5s) '%s' = ",
|
| 2399 |
+
__func__, override_type_to_str(ovrd->tag), ovrd->key);
|
| 2400 |
+
switch (ovrd->tag) {
|
| 2401 |
case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
|
| 2402 |
+
LLAMA_LOG_INFO("%s\n", ovrd->bool_value ? "true" : "false");
|
| 2403 |
} break;
|
| 2404 |
case LLAMA_KV_OVERRIDE_TYPE_INT: {
|
| 2405 |
+
LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->int_value);
|
| 2406 |
} break;
|
| 2407 |
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
|
| 2408 |
+
LLAMA_LOG_INFO("%.6f\n", ovrd->float_value);
|
| 2409 |
} break;
|
| 2410 |
default:
|
| 2411 |
// Shouldn't be possible to end up here, but just in case...
|
| 2412 |
throw std::runtime_error(
|
| 2413 |
format("Unsupported attempt to override %s type for metadata key %s\n",
|
| 2414 |
+
override_type_to_str(ovrd->tag), ovrd->key));
|
| 2415 |
}
|
| 2416 |
return true;
|
| 2417 |
}
|
| 2418 |
LLAMA_LOG_WARN("%s: Warning: Bad metadata override type for key '%s', expected %s but got %s\n",
|
| 2419 |
+
__func__, ovrd->key, override_type_to_str(expected_type), override_type_to_str(ovrd->tag));
|
| 2420 |
return false;
|
| 2421 |
}
|
| 2422 |
|
| 2423 |
template<typename OT>
|
| 2424 |
static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
|
| 2425 |
+
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
| 2426 |
+
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, ovrd)) {
|
| 2427 |
+
target = ovrd->bool_value;
|
| 2428 |
return true;
|
| 2429 |
}
|
| 2430 |
return false;
|
|
|
|
| 2432 |
|
| 2433 |
template<typename OT>
|
| 2434 |
static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
|
| 2435 |
+
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
| 2436 |
+
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, ovrd)) {
|
| 2437 |
+
target = ovrd->int_value;
|
| 2438 |
return true;
|
| 2439 |
}
|
| 2440 |
return false;
|
|
|
|
| 2442 |
|
| 2443 |
template<typename OT>
|
| 2444 |
static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
|
| 2445 |
+
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
| 2446 |
+
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, ovrd)) {
|
| 2447 |
+
target = ovrd->float_value;
|
| 2448 |
return true;
|
| 2449 |
}
|
| 2450 |
return false;
|
|
|
|
| 2452 |
|
| 2453 |
template<typename OT>
|
| 2454 |
static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
|
| 2455 |
+
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
| 2456 |
(void)target;
|
| 2457 |
+
(void)ovrd;
|
| 2458 |
+
if (!ovrd) { return false; }
|
| 2459 |
// Currently, we should never end up here so it would be a bug if we do.
|
| 2460 |
throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n",
|
| 2461 |
+
ovrd ? ovrd->key : "NULL"));
|
| 2462 |
}
|
| 2463 |
|
| 2464 |
+
static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
|
| 2465 |
+
if (try_override<T>(target, ovrd)) {
|
| 2466 |
return true;
|
| 2467 |
}
|
| 2468 |
if (k < 0) { return false; }
|
|
|
|
| 2470 |
return true;
|
| 2471 |
}
|
| 2472 |
|
| 2473 |
+
static bool set(const gguf_context * ctx, const char * key, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
|
| 2474 |
+
return set(ctx, gguf_find_key(ctx, key), target, ovrd);
|
| 2475 |
}
|
| 2476 |
|
| 2477 |
+
static bool set(const gguf_context * ctx, const std::string & key, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
|
| 2478 |
+
return set(ctx, key.c_str(), target, ovrd);
|
| 2479 |
}
|
| 2480 |
};
|
| 2481 |
}
|
|
|
|
| 2582 |
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
|
| 2583 |
case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
|
| 2584 |
case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break;
|
| 2585 |
+
case GGML_TYPE_IQ2_S: ftype = LLAMA_FTYPE_MOSTLY_IQ2_S; break;
|
| 2586 |
case GGML_TYPE_IQ3_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ3_XXS; break;
|
| 2587 |
case GGML_TYPE_IQ1_S: ftype = LLAMA_FTYPE_MOSTLY_IQ1_S; break;
|
| 2588 |
case GGML_TYPE_IQ4_NL: ftype = LLAMA_FTYPE_MOSTLY_IQ4_NL; break;
|
| 2589 |
+
case GGML_TYPE_IQ4_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ4_XS; break;
|
| 2590 |
case GGML_TYPE_IQ3_S: ftype = LLAMA_FTYPE_MOSTLY_IQ3_S; break;
|
| 2591 |
default:
|
| 2592 |
{
|
|
|
|
| 2888 |
}
|
| 2889 |
};
|
| 2890 |
|
| 2891 |
+
template<>
|
| 2892 |
+
bool llama_model_loader::get_key(const enum llm_kv kid, enum llama_pooling_type & result, const bool required) {
|
| 2893 |
+
uint32_t tmp;
|
| 2894 |
+
const bool found = get_key(kid, tmp, required);
|
| 2895 |
+
result = (enum llama_pooling_type) tmp;
|
| 2896 |
+
return found;
|
| 2897 |
+
}
|
| 2898 |
+
|
| 2899 |
+
|
| 2900 |
//
|
| 2901 |
// load LLaMA models
|
| 2902 |
//
|
|
|
|
| 2938 |
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
| 2939 |
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XXS - 2.0625 bpw";
|
| 2940 |
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
| 2941 |
+
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
|
| 2942 |
+
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
|
| 2943 |
+
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
|
| 2944 |
case LLAMA_FTYPE_MOSTLY_IQ3_XXS:return "IQ3_XXS - 3.0625 bpw";
|
| 2945 |
case LLAMA_FTYPE_MOSTLY_IQ1_S :return "IQ1_S - 1.5625 bpw";
|
| 2946 |
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
|
| 2947 |
+
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
|
| 2948 |
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
|
| 2949 |
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";
|
| 2950 |
|
|
|
|
| 2980 |
default: return "?B";
|
| 2981 |
}
|
| 2982 |
}
|
| 2983 |
+
|
| 2984 |
static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
|
| 2985 |
switch (type) {
|
| 2986 |
+
case LLAMA_VOCAB_TYPE_SPM: return "SPM";
|
| 2987 |
+
case LLAMA_VOCAB_TYPE_BPE: return "BPE";
|
| 2988 |
+
case LLAMA_VOCAB_TYPE_WPM: return "WPM";
|
| 2989 |
+
default: return "unknown";
|
| 2990 |
}
|
| 2991 |
}
|
| 2992 |
|
|
|
|
| 2993 |
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
|
| 2994 |
model.arch = ml.get_arch();
|
| 2995 |
if (model.arch == LLM_ARCH_UNKNOWN) {
|
|
|
|
| 3166 |
} break;
|
| 3167 |
case LLM_ARCH_BERT:
|
| 3168 |
{
|
| 3169 |
+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
| 3170 |
+
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
| 3171 |
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
| 3172 |
+
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
|
| 3173 |
|
| 3174 |
switch (hparams.n_layer) {
|
| 3175 |
case 3:
|
|
|
|
| 3187 |
} break;
|
| 3188 |
case LLM_ARCH_NOMIC_BERT:
|
| 3189 |
{
|
| 3190 |
+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
| 3191 |
+
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
| 3192 |
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
| 3193 |
+
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
|
| 3194 |
|
| 3195 |
if (hparams.n_layer == 12 && hparams.n_embd == 768) {
|
| 3196 |
model.type = e_model::MODEL_137M;
|
|
|
|
| 3329 |
if (hparams.f_max_alibi_bias > 0.0f) {
|
| 3330 |
hparams.need_kq_pos = true;
|
| 3331 |
}
|
| 3332 |
+
|
| 3333 |
+
hparams.rope_type = llama_rope_type(&model);
|
| 3334 |
}
|
| 3335 |
|
| 3336 |
// TODO: This should probably be in llama.h
|
|
|
|
| 3633 |
LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff);
|
| 3634 |
LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert);
|
| 3635 |
LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used);
|
| 3636 |
+
LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type);
|
| 3637 |
+
LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type);
|
| 3638 |
LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type);
|
| 3639 |
LLAMA_LOG_INFO("%s: freq_base_train = %.1f\n", __func__, hparams.rope_freq_base_train);
|
| 3640 |
LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train);
|
|
|
|
| 4656 |
|
| 4657 |
using llm_build_cb = std::function<void(struct ggml_tensor * cur, const char * name, int nl)>;
|
| 4658 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4659 |
enum llm_ffn_op_type {
|
| 4660 |
LLM_FFN_SILU,
|
| 4661 |
LLM_FFN_GELU,
|
|
|
|
| 4701 |
return inpL;
|
| 4702 |
}
|
| 4703 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4704 |
static void llm_build_kv_store(
|
| 4705 |
struct ggml_context * ctx,
|
| 4706 |
const llama_hparams & hparams,
|
|
|
|
| 4902 |
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
| 4903 |
}
|
| 4904 |
|
| 4905 |
+
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE)
|
| 4906 |
+
#pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, and Kompute")
|
| 4907 |
#pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024")
|
| 4908 |
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
|
| 4909 |
if (hparams.f_max_alibi_bias > 0.0f) {
|
|
|
|
| 5004 |
|
| 5005 |
const int64_t n_embd;
|
| 5006 |
const int64_t n_layer;
|
| 5007 |
+
const int64_t n_rot;
|
| 5008 |
const int64_t n_ctx; // user-specified context size (can be different from n_ctx_train)
|
| 5009 |
const int64_t n_head;
|
| 5010 |
const int64_t n_head_kv;
|
|
|
|
| 5029 |
const int32_t kv_head; // index of where we store new KV data in the cache
|
| 5030 |
const int32_t n_orig_ctx;
|
| 5031 |
|
| 5032 |
+
const enum llama_pooling_type pooling_type;
|
| 5033 |
+
const enum llama_rope_type rope_type;
|
| 5034 |
|
| 5035 |
const llm_build_cb & cb;
|
| 5036 |
|
|
|
|
| 5052 |
kv_self (lctx.kv_self),
|
| 5053 |
n_embd (hparams.n_embd),
|
| 5054 |
n_layer (hparams.n_layer),
|
| 5055 |
+
n_rot (hparams.n_rot),
|
| 5056 |
n_ctx (cparams.n_ctx),
|
| 5057 |
n_head (hparams.n_head),
|
| 5058 |
n_head_kv (hparams.n_head_kv),
|
|
|
|
| 5074 |
n_kv (worst_case ? n_ctx : kv_self.n),
|
| 5075 |
kv_head (worst_case ? n_ctx - n_tokens : kv_self.head),
|
| 5076 |
n_orig_ctx (cparams.n_yarn_orig_ctx),
|
| 5077 |
+
pooling_type (cparams.do_pooling ? hparams.pooling_type : LLAMA_POOLING_TYPE_NONE),
|
| 5078 |
+
rope_type (hparams.rope_type),
|
| 5079 |
cb (cb),
|
| 5080 |
buf_compute_meta (lctx.buf_compute_meta) {
|
| 5081 |
// all initializations should be done in init()
|
|
|
|
| 5098 |
}
|
| 5099 |
}
|
| 5100 |
|
| 5101 |
+
struct ggml_cgraph * build_k_shift() {
|
| 5102 |
+
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
| 5103 |
+
|
| 5104 |
+
for (int il = 0; il < n_layer; ++il) {
|
| 5105 |
+
struct ggml_tensor * tmp =
|
| 5106 |
+
// we rotate only the first n_rot dimensions
|
| 5107 |
+
ggml_rope_custom_inplace(ctx0,
|
| 5108 |
+
ggml_view_3d(ctx0, kv_self.k_l[il],
|
| 5109 |
+
n_embd_head_k, n_head_kv, n_ctx,
|
| 5110 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_head_k),
|
| 5111 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
|
| 5112 |
+
0),
|
| 5113 |
+
lctx.inp_K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 5114 |
+
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 5115 |
+
cb(tmp, "K_shifted", il);
|
| 5116 |
+
ggml_build_forward_expand(gf, tmp);
|
| 5117 |
+
}
|
| 5118 |
+
|
| 5119 |
+
return gf;
|
| 5120 |
+
}
|
| 5121 |
+
|
| 5122 |
+
struct ggml_cgraph * build_defrag(const std::vector<uint32_t> & ids) {
|
| 5123 |
+
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
| 5124 |
+
|
| 5125 |
+
for (uint32_t i = 0; i < ids.size(); ++i) {
|
| 5126 |
+
const uint32_t id = ids[i];
|
| 5127 |
+
|
| 5128 |
+
if (i == id || id == ids.size()) {
|
| 5129 |
+
continue;
|
| 5130 |
+
}
|
| 5131 |
+
|
| 5132 |
+
uint32_t nm = 1;
|
| 5133 |
+
|
| 5134 |
+
while (i + nm < ids.size() && ids[i + nm] == id + nm) {
|
| 5135 |
+
nm++;
|
| 5136 |
+
}
|
| 5137 |
+
|
| 5138 |
+
for (int il = 0; il < n_layer; ++il) {
|
| 5139 |
+
ggml_tensor * view_k_src = ggml_view_2d(ctx0, kv_self.k_l[il],
|
| 5140 |
+
n_embd_k_gqa, nm,
|
| 5141 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
|
| 5142 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*i));
|
| 5143 |
+
|
| 5144 |
+
ggml_tensor * view_k_dst = ggml_view_2d(ctx0, kv_self.k_l[il],
|
| 5145 |
+
n_embd_k_gqa, nm,
|
| 5146 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
|
| 5147 |
+
ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*id));
|
| 5148 |
+
|
| 5149 |
+
ggml_tensor * view_v_src = ggml_view_2d(ctx0, kv_self.v_l[il],
|
| 5150 |
+
nm, n_embd_v_gqa,
|
| 5151 |
+
ggml_row_size(kv_self.v_l[il]->type, kv_self.size),
|
| 5152 |
+
ggml_row_size(kv_self.v_l[il]->type, i));
|
| 5153 |
+
|
| 5154 |
+
ggml_tensor * view_v_dst = ggml_view_2d(ctx0, kv_self.v_l[il],
|
| 5155 |
+
nm, n_embd_v_gqa,
|
| 5156 |
+
ggml_row_size(kv_self.v_l[il]->type, kv_self.size),
|
| 5157 |
+
ggml_row_size(kv_self.v_l[il]->type, id));
|
| 5158 |
+
|
| 5159 |
+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_k_src, view_k_dst));
|
| 5160 |
+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_v_src, view_v_dst));
|
| 5161 |
+
}
|
| 5162 |
+
|
| 5163 |
+
i += nm - 1;
|
| 5164 |
+
}
|
| 5165 |
+
|
| 5166 |
+
//LLAMA_LOG_INFO("gf->n_nodes = %d\n", gf->n_nodes);
|
| 5167 |
+
|
| 5168 |
+
return gf;
|
| 5169 |
+
}
|
| 5170 |
+
|
| 5171 |
struct ggml_cgraph * build_llama() {
|
| 5172 |
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
| 5173 |
|
|
|
|
| 5189 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5190 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5192 |
for (int il = 0; il < n_layer; ++il) {
|
| 5193 |
struct ggml_tensor * inpSA = inpL;
|
| 5194 |
|
|
|
|
| 5224 |
|
| 5225 |
Qcur = ggml_rope_custom(
|
| 5226 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 5227 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 5228 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5229 |
);
|
| 5230 |
cb(Qcur, "Qcur", il);
|
| 5231 |
|
| 5232 |
Kcur = ggml_rope_custom(
|
| 5233 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 5234 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 5235 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5236 |
);
|
| 5237 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 5372 |
struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0);
|
| 5373 |
cb(KQ_pos, "KQ_pos", -1);
|
| 5374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5375 |
for (int il = 0; il < n_layer; ++il) {
|
| 5376 |
struct ggml_tensor * inpSA = inpL;
|
| 5377 |
|
|
|
|
| 5395 |
case MODEL_7B:
|
| 5396 |
Qcur = ggml_rope_custom(
|
| 5397 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 5398 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 5399 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5400 |
);
|
| 5401 |
Kcur = ggml_rope_custom(
|
| 5402 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 5403 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 5404 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 5405 |
);
|
| 5406 |
break;
|
|
|
|
| 5485 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5486 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5488 |
for (int il = 0; il < n_layer; ++il) {
|
| 5489 |
struct ggml_tensor * attn_norm;
|
| 5490 |
|
|
|
|
| 5523 |
|
| 5524 |
// using mode = 2 for neox mode
|
| 5525 |
Qcur = ggml_rope_custom(
|
| 5526 |
+
ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 5527 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5528 |
);
|
| 5529 |
cb(Qcur, "Qcur", il);
|
| 5530 |
|
| 5531 |
Kcur = ggml_rope_custom(
|
| 5532 |
+
ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 5533 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5534 |
);
|
| 5535 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 5699 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 5700 |
cb(KQ_mask, "KQ_mask", -1);
|
| 5701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5702 |
for (int il = 0; il < n_layer; ++il) {
|
| 5703 |
struct ggml_tensor * residual = inpL;
|
| 5704 |
|
|
|
|
| 5756 |
|
| 5757 |
// RoPE the first n_rot of q/k, pass the other half, and concat.
|
| 5758 |
struct ggml_tensor * qrot = ggml_view_3d(
|
| 5759 |
+
ctx0, tmpq, n_rot, n_head, n_tokens,
|
| 5760 |
ggml_element_size(tmpq) * n_embd_head,
|
| 5761 |
ggml_element_size(tmpq) * n_embd_head * n_head,
|
| 5762 |
0
|
|
|
|
| 5764 |
cb(qrot, "qrot", il);
|
| 5765 |
|
| 5766 |
struct ggml_tensor * krot = ggml_view_3d(
|
| 5767 |
+
ctx0, tmpk, n_rot, n_head, n_tokens,
|
| 5768 |
ggml_element_size(tmpk) * n_embd_head,
|
| 5769 |
ggml_element_size(tmpk) * n_embd_head * n_head,
|
| 5770 |
0
|
|
|
|
| 5773 |
|
| 5774 |
// get the second half of tmpq, e.g tmpq[n_rot:, :, :]
|
| 5775 |
struct ggml_tensor * qpass = ggml_view_3d(
|
| 5776 |
+
ctx0, tmpq, n_rot, n_head, n_tokens,
|
| 5777 |
ggml_element_size(tmpq) * n_embd_head,
|
| 5778 |
ggml_element_size(tmpq) * n_embd_head * n_head,
|
| 5779 |
+
ggml_element_size(tmpq) * n_rot
|
| 5780 |
);
|
| 5781 |
cb(qpass, "qpass", il);
|
| 5782 |
|
| 5783 |
struct ggml_tensor * kpass = ggml_view_3d(
|
| 5784 |
+
ctx0, tmpk, n_rot, n_head, n_tokens,
|
| 5785 |
ggml_element_size(tmpk) * n_embd_head,
|
| 5786 |
ggml_element_size(tmpk) * n_embd_head * n_head,
|
| 5787 |
+
ggml_element_size(tmpk) * n_rot
|
| 5788 |
);
|
| 5789 |
cb(kpass, "kpass", il);
|
| 5790 |
|
| 5791 |
struct ggml_tensor * qrotated = ggml_rope_custom(
|
| 5792 |
+
ctx0, qrot, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 5793 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5794 |
);
|
| 5795 |
cb(qrotated, "qrotated", il);
|
| 5796 |
|
| 5797 |
struct ggml_tensor * krotated = ggml_rope_custom(
|
| 5798 |
+
ctx0, krot, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 5799 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 5800 |
);
|
| 5801 |
cb(krotated, "krotated", il);
|
|
|
|
| 6047 |
|
| 6048 |
Qcur = ggml_rope_custom(
|
| 6049 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 6050 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6051 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6052 |
);
|
| 6053 |
cb(Qcur, "Qcur", il);
|
| 6054 |
|
| 6055 |
Kcur = ggml_rope_custom(
|
| 6056 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6057 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6058 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6059 |
);
|
| 6060 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 6343 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6344 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6346 |
for (int il = 0; il < n_layer; ++il) {
|
| 6347 |
struct ggml_tensor * inpSA = inpL;
|
| 6348 |
|
|
|
|
| 6379 |
|
| 6380 |
Qcur = ggml_rope_custom(
|
| 6381 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 6382 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6383 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6384 |
);
|
| 6385 |
cb(Qcur, "Qcur", il);
|
| 6386 |
|
| 6387 |
Kcur = ggml_rope_custom(
|
| 6388 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6389 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6390 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6391 |
);
|
| 6392 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 6461 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6462 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6463 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6464 |
for (int il = 0; il < n_layer; ++il) {
|
| 6465 |
struct ggml_tensor * inpSA = inpL;
|
| 6466 |
|
|
|
|
| 6490 |
|
| 6491 |
// using mode = 2 for neox mode
|
| 6492 |
Qcur = ggml_rope_custom(
|
| 6493 |
+
ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 6494 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6495 |
);
|
| 6496 |
cb(Qcur, "Qcur", il);
|
| 6497 |
|
| 6498 |
Kcur = ggml_rope_custom(
|
| 6499 |
+
ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 6500 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6501 |
);
|
| 6502 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 6570 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6571 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6572 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6573 |
for (int il = 0; il < n_layer; ++il) {
|
| 6574 |
struct ggml_tensor * inpSA = inpL;
|
| 6575 |
|
|
|
|
| 6605 |
|
| 6606 |
Qcur = ggml_rope_custom(
|
| 6607 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 6608 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6609 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6610 |
);
|
| 6611 |
cb(Qcur, "Qcur", il);
|
| 6612 |
|
| 6613 |
Kcur = ggml_rope_custom(
|
| 6614 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 6615 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6616 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 6617 |
);
|
| 6618 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 6686 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6687 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6688 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6689 |
for (int il = 0; il < n_layer; ++il) {
|
| 6690 |
attn_norm_output = llm_build_norm(ctx0, inpL, hparams,
|
| 6691 |
model.layers[il].attn_norm,
|
|
|
|
| 6723 |
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
|
| 6724 |
|
| 6725 |
Qcur = ggml_rope_custom(
|
| 6726 |
+
ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 6727 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6728 |
);
|
| 6729 |
cb(Qcur, "Qcur", il);
|
|
|
|
| 6734 |
cb(Qcur, "Qcur", il);
|
| 6735 |
|
| 6736 |
Kcur = ggml_rope_custom(
|
| 6737 |
+
ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
| 6738 |
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
| 6739 |
);
|
| 6740 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 6803 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 6804 |
cb(KQ_mask, "KQ_mask", -1);
|
| 6805 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6806 |
for (int il = 0; il < n_layer; ++il) {
|
| 6807 |
|
| 6808 |
// norm
|
|
|
|
| 6826 |
cb(Vcur, "Vcur", il);
|
| 6827 |
|
| 6828 |
Qcur = ggml_rope_custom(
|
| 6829 |
+
ctx0, ggml_reshape_3d(ctx0, Qcur, n_rot, n_head, n_tokens), inp_pos,
|
| 6830 |
+
n_embd_head, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6831 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 6832 |
cb(Qcur, "Qcur", il);
|
| 6833 |
|
| 6834 |
Kcur = ggml_rope_custom(
|
| 6835 |
+
ctx0, ggml_reshape_3d(ctx0, Kcur, n_rot, n_head_kv, n_tokens), inp_pos,
|
| 6836 |
+
n_embd_head, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 6837 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 6838 |
cb(Kcur, "Kcur", il);
|
| 6839 |
|
|
|
|
| 7003 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7004 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7005 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7006 |
for (int il = 0; il < n_layer; ++il) {
|
| 7007 |
cur = llm_build_norm(ctx0, inpL, hparams,
|
| 7008 |
model.layers[il].attn_norm,
|
|
|
|
| 7028 |
|
| 7029 |
struct ggml_tensor * Qcur = ggml_rope_custom(
|
| 7030 |
ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7031 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7032 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7033 |
);
|
| 7034 |
cb(Qcur, "Qcur", il);
|
| 7035 |
|
| 7036 |
struct ggml_tensor * Kcur = ggml_rope_custom(
|
| 7037 |
ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7038 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7039 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7040 |
);
|
| 7041 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 7106 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7107 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7109 |
for (int il = 0; il < n_layer; ++il) {
|
| 7110 |
struct ggml_tensor * inpSA = inpL;
|
| 7111 |
|
|
|
|
| 7141 |
|
| 7142 |
Qcur = ggml_rope_custom(
|
| 7143 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7144 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7145 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7146 |
);
|
| 7147 |
cb(Qcur, "Qcur", il);
|
| 7148 |
|
| 7149 |
Kcur = ggml_rope_custom(
|
| 7150 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7151 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7152 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7153 |
);
|
| 7154 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 7220 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7221 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7223 |
for (int il = 0; il < n_layer; ++il) {
|
| 7224 |
struct ggml_tensor * inpSA = inpL;
|
| 7225 |
|
|
|
|
| 7255 |
|
| 7256 |
Qcur = ggml_rope_custom(
|
| 7257 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7258 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7259 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7260 |
);
|
| 7261 |
cb(Qcur, "Qcur", il);
|
| 7262 |
|
| 7263 |
Kcur = ggml_rope_custom(
|
| 7264 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7265 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7266 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7267 |
);
|
| 7268 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 7347 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7348 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7350 |
for (int il = 0; il < n_layer; ++il) {
|
| 7351 |
struct ggml_tensor * inpSA = inpL;
|
| 7352 |
|
|
|
|
| 7382 |
|
| 7383 |
Qcur = ggml_rope_custom(
|
| 7384 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
|
| 7385 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7386 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7387 |
);
|
| 7388 |
cb(Qcur, "Qcur", il);
|
| 7389 |
|
| 7390 |
Kcur = ggml_rope_custom(
|
| 7391 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
|
| 7392 |
+
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7393 |
ext_factor, attn_factor, beta_fast, beta_slow
|
| 7394 |
);
|
| 7395 |
cb(Kcur, "Kcur", il);
|
|
|
|
| 7478 |
struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0);
|
| 7479 |
cb(KQ_mask, "KQ_mask", -1);
|
| 7480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7481 |
for (int il = 0; il < n_layer; ++il) {
|
| 7482 |
|
| 7483 |
// norm
|
|
|
|
| 7500 |
|
| 7501 |
Qcur = ggml_rope_custom(
|
| 7502 |
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head_k, n_head, n_tokens), inp_pos,
|
| 7503 |
+
n_embd_head_k, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7504 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 7505 |
cb(Qcur, "Qcur", il);
|
| 7506 |
|
|
|
|
| 7509 |
|
| 7510 |
Kcur = ggml_rope_custom(
|
| 7511 |
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head_k, n_head_kv, n_tokens), inp_pos,
|
| 7512 |
+
n_embd_head_k, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
|
| 7513 |
ext_factor, attn_factor, beta_fast, beta_slow);
|
| 7514 |
cb(Kcur, "Kcur", il);
|
| 7515 |
|
|
|
|
| 7562 |
}
|
| 7563 |
};
|
| 7564 |
|
| 7565 |
+
static struct ggml_cgraph * llama_build_graph_defrag(llama_context & lctx, const std::vector<uint32_t> & ids) {
|
| 7566 |
+
llama_batch dummy;
|
| 7567 |
+
dummy.n_tokens = 0;
|
| 7568 |
+
|
| 7569 |
+
llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
|
| 7570 |
+
|
| 7571 |
+
struct llm_build_context llm(lctx, dummy, cb, false);
|
| 7572 |
+
|
| 7573 |
+
llm.init();
|
| 7574 |
+
|
| 7575 |
+
struct ggml_cgraph * result = llm.build_defrag(ids);
|
| 7576 |
+
|
| 7577 |
+
llm.free();
|
| 7578 |
+
|
| 7579 |
+
return result;
|
| 7580 |
+
}
|
| 7581 |
+
|
| 7582 |
+
static struct ggml_cgraph * llama_build_graph_k_shift(llama_context & lctx) {
|
| 7583 |
+
llama_batch dummy;
|
| 7584 |
+
dummy.n_tokens = 0;
|
| 7585 |
+
|
| 7586 |
+
llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
|
| 7587 |
+
|
| 7588 |
+
struct llm_build_context llm(lctx, dummy, cb, false);
|
| 7589 |
+
|
| 7590 |
+
llm.init();
|
| 7591 |
+
|
| 7592 |
+
struct ggml_cgraph * result = llm.build_k_shift();
|
| 7593 |
+
|
| 7594 |
+
llm.free();
|
| 7595 |
+
|
| 7596 |
+
return result;
|
| 7597 |
+
}
|
| 7598 |
+
|
| 7599 |
static struct ggml_cgraph * llama_build_graph(
|
| 7600 |
llama_context & lctx,
|
| 7601 |
const llama_batch & batch,
|
|
|
|
| 7715 |
return result;
|
| 7716 |
}
|
| 7717 |
|
| 7718 |
+
static void llama_set_k_shift(llama_context & lctx) {
|
| 7719 |
+
const auto & cparams = lctx.cparams;
|
| 7720 |
+
|
| 7721 |
+
const int64_t n_ctx = cparams.n_ctx;
|
| 7722 |
+
|
| 7723 |
+
assert(ggml_backend_buffer_is_host(lctx.inp_K_shift->buffer));
|
| 7724 |
+
|
| 7725 |
+
int32_t * data = (int32_t *) lctx.inp_K_shift->data;
|
| 7726 |
+
|
| 7727 |
+
for (int i = 0; i < n_ctx; ++i) {
|
| 7728 |
+
data[i] = lctx.kv_self.cells[i].delta;
|
| 7729 |
+
}
|
| 7730 |
+
}
|
| 7731 |
+
|
| 7732 |
static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
|
| 7733 |
//
|
| 7734 |
// set input data
|
|
|
|
| 7796 |
}
|
| 7797 |
}
|
| 7798 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7799 |
if (cparams.do_pooling && hparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
|
| 7800 |
const int64_t n_tokens = batch.n_tokens;
|
| 7801 |
|
|
|
|
| 7840 |
}
|
| 7841 |
}
|
| 7842 |
|
| 7843 |
+
static void llama_graph_compute(
|
| 7844 |
+
llama_context & lctx,
|
| 7845 |
+
ggml_cgraph * gf,
|
| 7846 |
+
int n_threads) {
|
| 7847 |
+
#ifdef GGML_USE_MPI
|
| 7848 |
+
const int64_t n_layer = lctx.model.hparams.n_layer;
|
| 7849 |
+
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
| 7850 |
+
#endif
|
| 7851 |
+
|
| 7852 |
+
#ifdef GGML_USE_METAL
|
| 7853 |
+
if (ggml_backend_is_metal(lctx.backend_metal)) {
|
| 7854 |
+
ggml_backend_metal_set_n_cb(lctx.backend_metal, n_threads);
|
| 7855 |
+
}
|
| 7856 |
+
#endif
|
| 7857 |
+
|
| 7858 |
+
if (lctx.backend_cpu != nullptr) {
|
| 7859 |
+
ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
|
| 7860 |
+
}
|
| 7861 |
+
|
| 7862 |
+
ggml_backend_sched_graph_compute(lctx.sched, gf);
|
| 7863 |
+
|
| 7864 |
+
// fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
|
| 7865 |
+
|
| 7866 |
+
#ifdef GGML_USE_MPI
|
| 7867 |
+
ggml_mpi_graph_compute_post(lctx.ctx_mpi, gf, n_layer);
|
| 7868 |
+
#endif
|
| 7869 |
+
}
|
| 7870 |
+
|
| 7871 |
// decode a batch of tokens by evaluating the transformer
|
| 7872 |
//
|
| 7873 |
// - lctx: llama context
|
|
|
|
| 7945 |
batch.seq_id = seq_id_arr.data();
|
| 7946 |
}
|
| 7947 |
|
| 7948 |
+
llama_kv_cache_update(&lctx);
|
| 7949 |
+
|
| 7950 |
// if we have enough unused cells before the current head ->
|
| 7951 |
// better to start searching from the beginning of the cache, hoping to fill it
|
| 7952 |
if (kv_self.head > kv_self.used + 2*n_tokens) {
|
|
|
|
| 7971 |
ggml_cgraph * gf = llama_build_graph(lctx, batch, false);
|
| 7972 |
|
| 7973 |
// the output is always the last tensor in the graph
|
| 7974 |
+
struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
|
| 7975 |
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
|
| 7976 |
+
|
| 7977 |
if (strcmp(res->name, "result_output") == 0) {
|
| 7978 |
// the embeddings could be the second to last tensor, or the third to last tensor
|
| 7979 |
if (strcmp(embeddings->name, "result_norm") != 0) {
|
|
|
|
| 8000 |
n_threads = std::min(4, n_threads);
|
| 8001 |
}
|
| 8002 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8003 |
llama_set_inputs(lctx, batch);
|
| 8004 |
|
| 8005 |
+
llama_graph_compute(lctx, gf, n_threads);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8006 |
|
| 8007 |
// update the kv ring buffer
|
| 8008 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8009 |
kv_self.head += n_tokens;
|
| 8010 |
|
| 8011 |
// Ensure kv cache head points to a valid index.
|
|
|
|
| 8014 |
}
|
| 8015 |
}
|
| 8016 |
|
| 8017 |
+
// decide if we need to defrag the kv cache
|
| 8018 |
+
if (cparams.defrag_thold >= 0.0f) {
|
| 8019 |
+
const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens)/float(kv_self.n) : 0.0f;
|
| 8020 |
+
|
| 8021 |
+
// queue defragmentation for next llama_kv_cache_update
|
| 8022 |
+
if (fragmentation > cparams.defrag_thold) {
|
| 8023 |
+
//LLAMA_LOG_INFO("fragmentation: %.2f\n", fragmentation);
|
| 8024 |
+
|
| 8025 |
+
llama_kv_cache_defrag(kv_self);
|
| 8026 |
+
}
|
| 8027 |
+
}
|
| 8028 |
+
|
| 8029 |
#ifdef GGML_PERF
|
| 8030 |
// print timing information per ggml operation (for debugging purposes)
|
| 8031 |
// requires GGML_PERF to be defined
|
|
|
|
| 8113 |
return 0;
|
| 8114 |
}
|
| 8115 |
|
| 8116 |
+
// find holes from the beginning of the KV cache and fill them by moving data from the end of the cache
|
| 8117 |
+
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
| 8118 |
+
auto & kv_self = lctx.kv_self;
|
| 8119 |
+
|
| 8120 |
+
const auto & hparams = lctx.model.hparams;
|
| 8121 |
+
|
| 8122 |
+
const uint32_t n_layer = hparams.n_layer;
|
| 8123 |
+
|
| 8124 |
+
const uint32_t n_kv = llama_kv_cache_cell_max(kv_self);
|
| 8125 |
+
const uint32_t n_used = kv_self.used;
|
| 8126 |
+
|
| 8127 |
+
assert(n_used <= n_kv);
|
| 8128 |
+
|
| 8129 |
+
//const int64_t t_start = ggml_time_us();
|
| 8130 |
+
|
| 8131 |
+
// number of cells moved
|
| 8132 |
+
uint32_t n_moves = 0;
|
| 8133 |
+
|
| 8134 |
+
// determine which KV cells to move where
|
| 8135 |
+
//
|
| 8136 |
+
// cell i moves to ids[i]
|
| 8137 |
+
//
|
| 8138 |
+
// if ids[i] == i || ids[i] == n_kv, then cell i is not moved
|
| 8139 |
+
//
|
| 8140 |
+
std::vector<uint32_t> ids(n_kv, n_kv);
|
| 8141 |
+
|
| 8142 |
+
for (uint32_t i0 = 0; i0 < n_used; ++i0) {
|
| 8143 |
+
const auto & cell0 = kv_self.cells[i0];
|
| 8144 |
+
|
| 8145 |
+
if (!cell0.is_empty()) {
|
| 8146 |
+
ids[i0] = i0;
|
| 8147 |
+
|
| 8148 |
+
continue;
|
| 8149 |
+
}
|
| 8150 |
+
|
| 8151 |
+
// found a hole - fill it with data from the end of the cache
|
| 8152 |
+
|
| 8153 |
+
uint32_t nh = 1;
|
| 8154 |
+
|
| 8155 |
+
// determine the size of the hole
|
| 8156 |
+
while (i0 + nh < n_used && kv_self.cells[i0 + nh].is_empty()) {
|
| 8157 |
+
nh++;
|
| 8158 |
+
}
|
| 8159 |
+
|
| 8160 |
+
// each move requires 6*n_layer tensors (see build_defrag)
|
| 8161 |
+
// - source view, destination view, copy operation
|
| 8162 |
+
// - x2 for keys and values
|
| 8163 |
+
//
|
| 8164 |
+
if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) {
|
| 8165 |
+
// the graph is too big, we cannot move more cells
|
| 8166 |
+
break;
|
| 8167 |
+
}
|
| 8168 |
+
|
| 8169 |
+
uint32_t nf = 0;
|
| 8170 |
+
uint32_t is = n_kv - 1;
|
| 8171 |
+
|
| 8172 |
+
// starting from the end, find nh non-empty cells
|
| 8173 |
+
for (; is > i0; --is) {
|
| 8174 |
+
const auto & cell1 = kv_self.cells[is];
|
| 8175 |
+
|
| 8176 |
+
if (cell1.is_empty() || ids[is] != n_kv) {
|
| 8177 |
+
continue;
|
| 8178 |
+
}
|
| 8179 |
+
|
| 8180 |
+
// non-empty cell which is not yet moved
|
| 8181 |
+
nf++;
|
| 8182 |
+
|
| 8183 |
+
if (nf == nh) {
|
| 8184 |
+
break;
|
| 8185 |
+
}
|
| 8186 |
+
}
|
| 8187 |
+
|
| 8188 |
+
// this can only happen if `n_used` is not accurate, which would be a bug
|
| 8189 |
+
GGML_ASSERT(nf == nh && "KV defrag bug: nf != nh");
|
| 8190 |
+
|
| 8191 |
+
nf = 0;
|
| 8192 |
+
|
| 8193 |
+
uint32_t i1 = is;
|
| 8194 |
+
|
| 8195 |
+
// are we moving a continuous block of memory?
|
| 8196 |
+
bool cont = false;
|
| 8197 |
+
|
| 8198 |
+
// go back and move the nf cells to the hole
|
| 8199 |
+
for (; i1 < n_kv; ++i1) {
|
| 8200 |
+
auto & cell1 = kv_self.cells[i1];
|
| 8201 |
+
|
| 8202 |
+
if (cell1.is_empty() || ids[i1] != n_kv) {
|
| 8203 |
+
cont = false;
|
| 8204 |
+
continue;
|
| 8205 |
+
}
|
| 8206 |
+
|
| 8207 |
+
// this cell goes to (i0 + nf)
|
| 8208 |
+
ids[i1] = i0 + nf;
|
| 8209 |
+
|
| 8210 |
+
// move the cell meta data
|
| 8211 |
+
kv_self.cells[i0 + nf] = cell1;
|
| 8212 |
+
|
| 8213 |
+
// clear the old cell and move the head there
|
| 8214 |
+
cell1 = llama_kv_cell();
|
| 8215 |
+
kv_self.head = n_used;
|
| 8216 |
+
|
| 8217 |
+
if (!cont) {
|
| 8218 |
+
n_moves++;
|
| 8219 |
+
cont = true;
|
| 8220 |
+
}
|
| 8221 |
+
|
| 8222 |
+
nf++;
|
| 8223 |
+
|
| 8224 |
+
if (nf == nh) {
|
| 8225 |
+
break;
|
| 8226 |
+
}
|
| 8227 |
+
}
|
| 8228 |
+
|
| 8229 |
+
//LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh);
|
| 8230 |
+
|
| 8231 |
+
i0 += nh - 1;
|
| 8232 |
+
}
|
| 8233 |
+
|
| 8234 |
+
if (n_moves == 0) {
|
| 8235 |
+
return;
|
| 8236 |
+
}
|
| 8237 |
+
|
| 8238 |
+
//LLAMA_LOG_INFO("(tmp log) KV defrag cell moves: %u\n", n_moves);
|
| 8239 |
+
|
| 8240 |
+
//LLAMA_LOG_INFO("expected gf nodes: %u\n", 6*n_moves*n_layer);
|
| 8241 |
+
|
| 8242 |
+
#if 0
|
| 8243 |
+
// CPU defrag
|
| 8244 |
+
//
|
| 8245 |
+
// TODO: optimizations are possible:
|
| 8246 |
+
// - multiple threads
|
| 8247 |
+
// - avoid copying to the host memory when already there
|
| 8248 |
+
//
|
| 8249 |
+
// likely not worth the effort, as we have ggml_graph based defrag
|
| 8250 |
+
//
|
| 8251 |
+
|
| 8252 |
+
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
|
| 8253 |
+
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
|
| 8254 |
+
|
| 8255 |
+
const uint32_t kv_size = kv_self.size;
|
| 8256 |
+
|
| 8257 |
+
std::vector<uint8_t> buf_k;
|
| 8258 |
+
std::vector<uint8_t> buf_v;
|
| 8259 |
+
|
| 8260 |
+
for (uint32_t il = 0; il < n_layer; ++il) {
|
| 8261 |
+
const size_t k_size_row = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa);
|
| 8262 |
+
const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_size);
|
| 8263 |
+
|
| 8264 |
+
const size_t v_size_el = ggml_type_size(kv_self.v_l[il]->type);
|
| 8265 |
+
const size_t v_size = ggml_row_size (kv_self.v_l[il]->type, n_embd_v_gqa*kv_size);
|
| 8266 |
+
|
| 8267 |
+
buf_k.resize(k_size);
|
| 8268 |
+
buf_v.resize(v_size);
|
| 8269 |
+
|
| 8270 |
+
ggml_backend_tensor_get(kv_self.k_l[il], buf_k.data(), 0, buf_k.size());
|
| 8271 |
+
ggml_backend_tensor_get(kv_self.v_l[il], buf_v.data(), 0, buf_v.size());
|
| 8272 |
+
|
| 8273 |
+
// batch move [i, i+nm) to [id, id+nm)
|
| 8274 |
+
// note: cells can move only to a lower index
|
| 8275 |
+
for (uint32_t i = 0; i < n_kv; ++i) {
|
| 8276 |
+
const uint32_t id = ids[i];
|
| 8277 |
+
|
| 8278 |
+
if (i == id || id == n_kv) {
|
| 8279 |
+
continue;
|
| 8280 |
+
}
|
| 8281 |
+
|
| 8282 |
+
uint32_t nm = 1;
|
| 8283 |
+
|
| 8284 |
+
while (i + nm < n_kv && ids[i + nm] == id + nm) {
|
| 8285 |
+
nm++;
|
| 8286 |
+
}
|
| 8287 |
+
|
| 8288 |
+
// move keys
|
| 8289 |
+
{
|
| 8290 |
+
const int64_t os = i*k_size_row;
|
| 8291 |
+
const int64_t od = id*k_size_row;
|
| 8292 |
+
|
| 8293 |
+
memcpy(buf_k.data() + od, buf_k.data() + os, nm*k_size_row);
|
| 8294 |
+
}
|
| 8295 |
+
|
| 8296 |
+
// move values (note: they are transposed)
|
| 8297 |
+
{
|
| 8298 |
+
const int64_t os = i;
|
| 8299 |
+
const int64_t od = id;
|
| 8300 |
+
|
| 8301 |
+
for (uint32_t j = 0; j < n_embd_v_gqa; ++j) {
|
| 8302 |
+
memcpy(buf_v.data() + (od + j*kv_size)*v_size_el, buf_v.data() + (os + j*kv_size)*v_size_el, nm*v_size_el);
|
| 8303 |
+
}
|
| 8304 |
+
}
|
| 8305 |
+
|
| 8306 |
+
i += nm - 1;
|
| 8307 |
+
}
|
| 8308 |
+
|
| 8309 |
+
ggml_backend_tensor_set(kv_self.k_l[il], buf_k.data(), 0, buf_k.size());
|
| 8310 |
+
ggml_backend_tensor_set(kv_self.v_l[il], buf_v.data(), 0, buf_v.size());
|
| 8311 |
+
}
|
| 8312 |
+
#else
|
| 8313 |
+
// ggml_graph defrag
|
| 8314 |
+
|
| 8315 |
+
ggml_cgraph * gf = llama_build_graph_defrag(lctx, ids);
|
| 8316 |
+
|
| 8317 |
+
llama_graph_compute(lctx, gf, lctx.cparams.n_threads);
|
| 8318 |
+
#endif
|
| 8319 |
+
|
| 8320 |
+
//const int64_t t_end = ggml_time_us();
|
| 8321 |
+
|
| 8322 |
+
//LLAMA_LOG_INFO("(tmp log) KV defrag time: %.3f ms\n", (t_end - t_start)/1000.0);
|
| 8323 |
+
}
|
| 8324 |
+
|
| 8325 |
+
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
|
| 8326 |
+
// apply K-shift if needed
|
| 8327 |
+
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
|
| 8328 |
+
llama_set_k_shift(lctx);
|
| 8329 |
+
|
| 8330 |
+
{
|
| 8331 |
+
ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
|
| 8332 |
+
|
| 8333 |
+
llama_graph_compute(lctx, gf, lctx.cparams.n_threads);
|
| 8334 |
+
}
|
| 8335 |
+
|
| 8336 |
+
{
|
| 8337 |
+
auto & kv_self = lctx.kv_self;
|
| 8338 |
+
|
| 8339 |
+
kv_self.has_shift = false;
|
| 8340 |
+
|
| 8341 |
+
for (uint32_t i = 0; i < kv_self.size; ++i) {
|
| 8342 |
+
kv_self.cells[i].delta = 0;
|
| 8343 |
+
}
|
| 8344 |
+
}
|
| 8345 |
+
}
|
| 8346 |
+
|
| 8347 |
+
// defragment the KV cache if needed
|
| 8348 |
+
if (lctx.kv_self.do_defrag) {
|
| 8349 |
+
llama_kv_cache_defrag_internal(lctx);
|
| 8350 |
+
|
| 8351 |
+
lctx.kv_self.do_defrag = false;
|
| 8352 |
+
}
|
| 8353 |
+
}
|
| 8354 |
+
|
| 8355 |
//
|
| 8356 |
// tokenizer
|
| 8357 |
//
|
|
|
|
| 8943 |
}
|
| 8944 |
|
| 8945 |
std::vector<std::string> preprocess(const std::string & text) {
|
| 8946 |
+
// normalalization form D
|
| 8947 |
+
std::vector<uint32_t> codepoints = codepoints_from_utf8(text);
|
| 8948 |
+
std::vector<uint32_t> nfd_codepoints;
|
| 8949 |
+
for (uint32_t code : codepoints) {
|
| 8950 |
+
auto it = nfd_map.find(code);
|
| 8951 |
+
if (it != nfd_map.end()) {
|
| 8952 |
+
for (uint32_t c : it->second) {
|
| 8953 |
+
nfd_codepoints.push_back(c);
|
| 8954 |
+
}
|
| 8955 |
+
} else {
|
| 8956 |
+
nfd_codepoints.push_back(code);
|
| 8957 |
+
}
|
| 8958 |
+
}
|
| 8959 |
|
| 8960 |
+
// strip accents, strip control, uniformize whitespace,
|
| 8961 |
+
// to lowercase, pad chinese characters, pad punctuation
|
|
|
|
| 8962 |
std::string new_str = "";
|
| 8963 |
+
for (uint32_t code : nfd_codepoints) {
|
| 8964 |
+
int type = codepoint_type(code);
|
| 8965 |
+
if (type == CODEPOINT_TYPE_ACCENT_MARK || type == CODEPOINT_TYPE_CONTROL) {
|
| 8966 |
+
continue;
|
| 8967 |
+
}
|
| 8968 |
+
code = to_lower(code);
|
| 8969 |
+
if (type == CODEPOINT_TYPE_WHITESPACE) {
|
| 8970 |
+
code = ' ';
|
| 8971 |
}
|
| 8972 |
+
std::string s = codepoint_to_utf8(code);
|
| 8973 |
+
if (type == CODEPOINT_TYPE_PUNCTUATION || is_ascii_punct(code) || is_chinese_char(code)) {
|
| 8974 |
new_str += " ";
|
| 8975 |
+
new_str += s;
|
| 8976 |
new_str += " ";
|
| 8977 |
+
} else {
|
| 8978 |
+
new_str += s;
|
|
|
|
|
|
|
|
|
|
| 8979 |
}
|
| 8980 |
}
|
| 8981 |
|
| 8982 |
// split by whitespace
|
| 8983 |
uint64_t l = 0;
|
| 8984 |
uint64_t r = 0;
|
| 8985 |
+
std::vector<std::string> words;
|
| 8986 |
while (r < new_str.size()) {
|
| 8987 |
// if is whitespace
|
| 8988 |
if (isspace(new_str[r])) {
|
|
|
|
| 9000 |
return words;
|
| 9001 |
}
|
| 9002 |
|
| 9003 |
+
uint32_t to_lower(uint32_t code) {
|
| 9004 |
+
#if defined(_WIN32)
|
| 9005 |
+
if (code > 0xFFFF) {
|
| 9006 |
+
return code;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9007 |
}
|
| 9008 |
+
#endif
|
| 9009 |
+
return std::tolower(wchar_t(code), std::locale("en_US.UTF-8"));
|
| 9010 |
}
|
| 9011 |
|
| 9012 |
+
bool is_ascii_punct(uint32_t code) {
|
| 9013 |
+
return code < 256 && ispunct(code);
|
| 9014 |
+
}
|
| 9015 |
+
|
| 9016 |
+
bool is_chinese_char(uint32_t codepoint) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9017 |
if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) ||
|
| 9018 |
(codepoint >= 0x3400 && codepoint <= 0x4DBF) ||
|
| 9019 |
(codepoint >= 0x20000 && codepoint <= 0x2A6DF) ||
|
|
|
|
| 9029 |
return false;
|
| 9030 |
}
|
| 9031 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9032 |
const llama_vocab & vocab;
|
| 9033 |
};
|
| 9034 |
|
|
|
|
| 10754 |
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
|
| 10755 |
new_type = GGML_TYPE_Q8_0;
|
| 10756 |
}
|
| 10757 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
|
| 10758 |
+
ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
|
| 10759 |
new_type = GGML_TYPE_Q5_K;
|
| 10760 |
}
|
| 10761 |
else if (new_type != GGML_TYPE_Q8_0) {
|
| 10762 |
new_type = GGML_TYPE_Q6_K;
|
| 10763 |
}
|
| 10764 |
} else if (name == "token_embd.weight") {
|
| 10765 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
|
| 10766 |
+
ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
|
| 10767 |
new_type = GGML_TYPE_Q2_K;
|
| 10768 |
}
|
| 10769 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
|
| 10770 |
+
new_type = GGML_TYPE_IQ3_S;
|
| 10771 |
+
}
|
| 10772 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10773 |
+
new_type = GGML_TYPE_IQ3_S;
|
| 10774 |
}
|
| 10775 |
+
} else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
|
| 10776 |
+
ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
|
| 10777 |
if (name.find("attn_v.weight") != std::string::npos) {
|
| 10778 |
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
|
| 10779 |
+
else new_type = ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ? GGML_TYPE_IQ3_S : GGML_TYPE_Q2_K;
|
| 10780 |
++qs.i_attention_wv;
|
| 10781 |
}
|
| 10782 |
+
else if (qs.model.hparams.n_expert == 8 && name.find("attn_k.weight") != std::string::npos) {
|
| 10783 |
+
new_type = GGML_TYPE_Q4_K;
|
| 10784 |
+
}
|
| 10785 |
else if (name.find("ffn_down") != std::string::npos) {
|
| 10786 |
+
if (qs.i_ffn_down < qs.n_ffn_down/8) {
|
| 10787 |
+
new_type = ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ? GGML_TYPE_IQ3_S : GGML_TYPE_Q2_K;
|
| 10788 |
+
}
|
| 10789 |
++qs.i_ffn_down;
|
| 10790 |
}
|
| 10791 |
else if (name.find("attn_output.weight") != std::string::npos) {
|
| 10792 |
+
if (qs.model.hparams.n_expert == 8) {
|
| 10793 |
+
new_type = GGML_TYPE_Q5_K;
|
| 10794 |
+
} else {
|
| 10795 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) new_type = GGML_TYPE_IQ2_XXS;
|
| 10796 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_S;
|
| 10797 |
+
}
|
| 10798 |
}
|
| 10799 |
} else if (name.find("attn_v.weight") != std::string::npos) {
|
| 10800 |
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
|
|
|
|
| 10804 |
new_type = GGML_TYPE_Q4_K;
|
| 10805 |
}
|
| 10806 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10807 |
+
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
|
| 10808 |
+
}
|
| 10809 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
|
| 10810 |
+
new_type = GGML_TYPE_Q4_K;
|
| 10811 |
+
}
|
| 10812 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
|
| 10813 |
+
new_type = GGML_TYPE_Q4_K;
|
| 10814 |
}
|
| 10815 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
|
| 10816 |
new_type = GGML_TYPE_Q4_K;
|
|
|
|
| 10822 |
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
|
| 10823 |
}
|
| 10824 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
|
| 10825 |
+
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && qs.model.hparams.n_gqa() >= 4) {
|
| 10826 |
new_type = GGML_TYPE_Q5_K;
|
| 10827 |
}
|
| 10828 |
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
|
|
|
|
| 10848 |
// TODO: explore better strategies
|
| 10849 |
new_type = GGML_TYPE_Q8_0;
|
| 10850 |
}
|
| 10851 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
|
| 10852 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10853 |
}
|
| 10854 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10855 |
+
new_type = GGML_TYPE_IQ2_S;
|
| 10856 |
+
}
|
| 10857 |
} else if (name.find("attn_q.weight") != std::string::npos) {
|
| 10858 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
|
| 10859 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10860 |
}
|
| 10861 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
| 10862 |
+
new_type = GGML_TYPE_IQ2_S;
|
| 10863 |
+
}
|
| 10864 |
} else if (name.find("ffn_down") != std::string::npos) {
|
| 10865 |
auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
|
| 10866 |
int i_layer = info.first, n_layer = info.second;
|
|
|
|
| 10891 |
if (use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
|
| 10892 |
}
|
| 10893 |
}
|
| 10894 |
+
else if (i_layer < n_layer/8 && (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && !qs.has_imatrix) {
|
| 10895 |
+
new_type = GGML_TYPE_Q5_K;
|
| 10896 |
}
|
| 10897 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M && use_more_bits(i_layer, n_layer)) new_type = GGML_TYPE_Q6_K;
|
| 10898 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && arch != LLM_ARCH_FALCON && i_layer < n_layer/8) {
|
|
|
|
| 10909 |
} else if (name.find("attn_output.weight") != std::string::npos) {
|
| 10910 |
if (arch != LLM_ARCH_FALCON) {
|
| 10911 |
if (qs.model.hparams.n_expert == 8) {
|
| 10912 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
|
| 10913 |
ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
|
| 10914 |
ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
|
| 10915 |
+
ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) {
|
| 10916 |
new_type = GGML_TYPE_Q5_K;
|
| 10917 |
}
|
| 10918 |
} else {
|
| 10919 |
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
|
| 10920 |
+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;
|
| 10921 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
|
| 10922 |
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ) new_type = GGML_TYPE_Q5_K;
|
| 10923 |
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ) new_type = GGML_TYPE_Q4_K;
|
|
|
|
| 10936 |
else if (name.find("ffn_gate") != std::string::npos) {
|
| 10937 |
auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
|
| 10938 |
int i_layer = info.first, n_layer = info.second;
|
| 10939 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
|
| 10940 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10941 |
}
|
| 10942 |
++qs.i_ffn_gate;
|
|
|
|
| 10944 |
else if (name.find("ffn_up") != std::string::npos) {
|
| 10945 |
auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
|
| 10946 |
int i_layer = info.first, n_layer = info.second;
|
| 10947 |
+
if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
|
| 10948 |
new_type = GGML_TYPE_IQ3_XXS;
|
| 10949 |
}
|
| 10950 |
++qs.i_ffn_up;
|
|
|
|
| 10963 |
//}
|
| 10964 |
bool convert_incompatible_tensor = false;
|
| 10965 |
if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
|
| 10966 |
+
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K || new_type == GGML_TYPE_IQ4_XS ||
|
| 10967 |
+
new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_S ||
|
| 10968 |
new_type == GGML_TYPE_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || new_type == GGML_TYPE_IQ3_S) {
|
| 10969 |
int nx = tensor->ne[0];
|
| 10970 |
int ny = tensor->ne[1];
|
|
|
|
| 10979 |
switch (new_type) {
|
| 10980 |
case GGML_TYPE_IQ2_XXS:
|
| 10981 |
case GGML_TYPE_IQ2_XS:
|
| 10982 |
+
case GGML_TYPE_IQ2_S:
|
| 10983 |
case GGML_TYPE_IQ3_XXS:
|
| 10984 |
case GGML_TYPE_IQ3_S:
|
| 10985 |
case GGML_TYPE_IQ1_S:
|
| 10986 |
case GGML_TYPE_Q2_K:
|
| 10987 |
+
case GGML_TYPE_Q3_K:
|
| 10988 |
+
case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_IQ4_NL; break;
|
| 10989 |
+
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
| 10990 |
+
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
|
| 10991 |
+
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
| 10992 |
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
| 10993 |
}
|
| 10994 |
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
|
|
|
|
| 11014 |
// K-quants
|
| 11015 |
case LLAMA_FTYPE_MOSTLY_Q2_K_S:
|
| 11016 |
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
|
| 11017 |
+
case LLAMA_FTYPE_MOSTLY_IQ3_XS: quantized_type = GGML_TYPE_IQ3_S; break;
|
| 11018 |
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
|
| 11019 |
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
|
| 11020 |
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
|
|
|
|
| 11025 |
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
|
| 11026 |
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break;
|
| 11027 |
case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break;
|
| 11028 |
+
case LLAMA_FTYPE_MOSTLY_IQ2_S: quantized_type = GGML_TYPE_IQ2_XS; break;
|
| 11029 |
+
case LLAMA_FTYPE_MOSTLY_IQ2_M: quantized_type = GGML_TYPE_IQ2_S; break;
|
| 11030 |
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break;
|
| 11031 |
case LLAMA_FTYPE_MOSTLY_IQ1_S: quantized_type = GGML_TYPE_IQ1_S; break;
|
| 11032 |
case LLAMA_FTYPE_MOSTLY_IQ4_NL: quantized_type = GGML_TYPE_IQ4_NL; break;
|
| 11033 |
+
case LLAMA_FTYPE_MOSTLY_IQ4_XS: quantized_type = GGML_TYPE_IQ4_XS; break;
|
| 11034 |
case LLAMA_FTYPE_MOSTLY_IQ3_S: quantized_type = GGML_TYPE_IQ3_S; break;
|
| 11035 |
case LLAMA_FTYPE_MOSTLY_IQ3_M: quantized_type = GGML_TYPE_IQ3_S; break;
|
| 11036 |
|
|
|
|
| 11162 |
quantize &= !params->only_copy;
|
| 11163 |
|
| 11164 |
// do not quantize expert gating tensors
|
| 11165 |
+
// NOTE: can't use LLM_TN here because the layer number is not known
|
| 11166 |
+
quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
| 11167 |
|
| 11168 |
// do not quantize positional embeddings and token types (BERT)
|
| 11169 |
quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight");
|
|
|
|
| 11207 |
}
|
| 11208 |
if ((new_type == GGML_TYPE_IQ2_XXS ||
|
| 11209 |
new_type == GGML_TYPE_IQ2_XS ||
|
| 11210 |
+
new_type == GGML_TYPE_IQ2_S ||
|
| 11211 |
new_type == GGML_TYPE_IQ1_S ||
|
| 11212 |
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
|
| 11213 |
LLAMA_LOG_ERROR("\n\n============================================================\n");
|
|
|
|
| 11663 |
/*.yarn_beta_fast =*/ 32.0f,
|
| 11664 |
/*.yarn_beta_slow =*/ 1.0f,
|
| 11665 |
/*.yarn_orig_ctx =*/ 0,
|
| 11666 |
+
/*.defrag_thold =*/ -1.0f,
|
| 11667 |
/*.cb_eval =*/ nullptr,
|
| 11668 |
/*.cb_eval_user_data =*/ nullptr,
|
| 11669 |
/*.type_k =*/ GGML_TYPE_F16,
|
|
|
|
| 11828 |
cparams.yarn_attn_factor = params.yarn_attn_factor;
|
| 11829 |
cparams.yarn_beta_fast = params.yarn_beta_fast;
|
| 11830 |
cparams.yarn_beta_slow = params.yarn_beta_slow;
|
| 11831 |
+
cparams.defrag_thold = params.defrag_thold;
|
| 11832 |
cparams.mul_mat_q = params.mul_mat_q;
|
| 11833 |
cparams.offload_kqv = params.offload_kqv;
|
| 11834 |
cparams.do_pooling = params.do_pooling;
|
|
|
|
| 11951 |
}
|
| 11952 |
ctx->backends.push_back(ctx->backend_cpu);
|
| 11953 |
|
| 11954 |
+
if (!llama_kv_cache_init(ctx->kv_self, ctx->model, type_k, type_v, cparams.n_ctx, cparams.offload_kqv)) {
|
|
|
|
| 11955 |
LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
|
| 11956 |
llama_free(ctx);
|
| 11957 |
return nullptr;
|
|
|
|
| 12030 |
}
|
| 12031 |
|
| 12032 |
// buffer used to store the computation graph and the tensor meta data
|
| 12033 |
+
ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead_custom(LLAMA_MAX_NODES, false));
|
| 12034 |
|
| 12035 |
ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES);
|
| 12036 |
|
|
|
|
| 12099 |
return model->vocab.type;
|
| 12100 |
}
|
| 12101 |
|
| 12102 |
+
enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
| 12103 |
+
switch (model->arch) {
|
| 12104 |
+
// these models do not use RoPE
|
| 12105 |
+
case LLM_ARCH_GPT2:
|
| 12106 |
+
case LLM_ARCH_GPTJ:
|
| 12107 |
+
case LLM_ARCH_GPTNEOX:
|
| 12108 |
+
case LLM_ARCH_MPT:
|
| 12109 |
+
case LLM_ARCH_REFACT:
|
| 12110 |
+
case LLM_ARCH_BLOOM:
|
| 12111 |
+
return LLAMA_ROPE_TYPE_NONE;
|
| 12112 |
+
|
| 12113 |
+
// use what we call a normal RoPE, operating on pairs of consecutive head values
|
| 12114 |
+
case LLM_ARCH_LLAMA:
|
| 12115 |
+
case LLM_ARCH_BAICHUAN:
|
| 12116 |
+
case LLM_ARCH_STARCODER:
|
| 12117 |
+
case LLM_ARCH_PLAMO:
|
| 12118 |
+
case LLM_ARCH_CODESHELL:
|
| 12119 |
+
case LLM_ARCH_ORION:
|
| 12120 |
+
case LLM_ARCH_INTERNLM2:
|
| 12121 |
+
case LLM_ARCH_MINICPM:
|
| 12122 |
+
return LLAMA_ROPE_TYPE_NORM;
|
| 12123 |
+
|
| 12124 |
+
// the pairs of head values are offset by n_rot/2
|
| 12125 |
+
case LLM_ARCH_FALCON:
|
| 12126 |
+
case LLM_ARCH_PERSIMMON:
|
| 12127 |
+
case LLM_ARCH_BERT:
|
| 12128 |
+
case LLM_ARCH_NOMIC_BERT:
|
| 12129 |
+
case LLM_ARCH_STABLELM:
|
| 12130 |
+
case LLM_ARCH_QWEN:
|
| 12131 |
+
case LLM_ARCH_QWEN2:
|
| 12132 |
+
case LLM_ARCH_PHI2:
|
| 12133 |
+
case LLM_ARCH_GEMMA:
|
| 12134 |
+
return LLAMA_ROPE_TYPE_NEOX;
|
| 12135 |
+
|
| 12136 |
+
// all model arches should be listed explicitly here
|
| 12137 |
+
case LLM_ARCH_UNKNOWN:
|
| 12138 |
+
GGML_ASSERT(false && "unknown architecture");
|
| 12139 |
+
break;
|
| 12140 |
+
}
|
| 12141 |
+
|
| 12142 |
+
return LLAMA_ROPE_TYPE_NONE;
|
| 12143 |
+
}
|
| 12144 |
+
|
| 12145 |
int32_t llama_n_vocab(const struct llama_model * model) {
|
| 12146 |
return model->vocab.id_to_token.size();
|
| 12147 |
}
|
|
|
|
| 12384 |
llama_kv_cache_seq_keep(ctx->kv_self, seq_id);
|
| 12385 |
}
|
| 12386 |
|
| 12387 |
+
void llama_kv_cache_seq_add(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) {
|
| 12388 |
if (delta == 0) {
|
| 12389 |
return;
|
| 12390 |
}
|
| 12391 |
|
| 12392 |
+
llama_kv_cache_seq_add(ctx->kv_self, seq_id, p0, p1, delta);
|
| 12393 |
}
|
| 12394 |
|
| 12395 |
void llama_kv_cache_seq_div(struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) {
|
|
|
|
| 12400 |
llama_kv_cache_seq_div(ctx->kv_self, seq_id, p0, p1, d);
|
| 12401 |
}
|
| 12402 |
|
| 12403 |
+
llama_pos llama_kv_cache_seq_pos_max(struct llama_context * ctx, llama_seq_id seq_id) {
|
| 12404 |
+
return llama_kv_cache_seq_pos_max(ctx->kv_self, seq_id);
|
| 12405 |
+
}
|
| 12406 |
+
|
| 12407 |
+
void llama_kv_cache_defrag(struct llama_context * ctx) {
|
| 12408 |
+
llama_kv_cache_defrag(ctx->kv_self);
|
| 12409 |
+
}
|
| 12410 |
+
|
| 12411 |
+
void llama_kv_cache_update(struct llama_context * ctx) {
|
| 12412 |
+
llama_kv_cache_update_internal(*ctx);
|
| 12413 |
+
}
|
| 12414 |
+
|
| 12415 |
+
|
| 12416 |
// Returns the *maximum* size of the state
|
| 12417 |
size_t llama_get_state_size(const struct llama_context * ctx) {
|
| 12418 |
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
|
|
|
|
| 12539 |
const auto & hparams = ctx->model.hparams;
|
| 12540 |
const auto & cparams = ctx->cparams;
|
| 12541 |
|
| 12542 |
+
const uint32_t n_layer = hparams.n_layer;
|
| 12543 |
+
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
|
| 12544 |
+
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
|
| 12545 |
+
const uint32_t n_ctx = cparams.n_ctx;
|
| 12546 |
|
| 12547 |
const size_t kv_buf_size = kv_self.total_size();
|
| 12548 |
const uint32_t kv_head = kv_self.head;
|
|
|
|
| 12557 |
if (kv_buf_size) {
|
| 12558 |
std::vector<uint8_t> tmp_buf;
|
| 12559 |
for (int il = 0; il < (int) n_layer; ++il) {
|
| 12560 |
+
const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
|
| 12561 |
+
|
| 12562 |
tmp_buf.resize(k_size);
|
| 12563 |
ggml_backend_tensor_get(kv_self.k_l[il], tmp_buf.data(), 0, tmp_buf.size());
|
| 12564 |
data_ctx->write(tmp_buf.data(), tmp_buf.size());
|
| 12565 |
|
| 12566 |
// v is not contiguous, copy row by row
|
| 12567 |
+
const size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
|
| 12568 |
+
const size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
|
| 12569 |
+
|
| 12570 |
tmp_buf.resize(v_row_size);
|
| 12571 |
for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
|
| 12572 |
ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), ir*v_row_stride, tmp_buf.size());
|
|
|
|
| 12653 |
const auto & hparams = ctx->model.hparams;
|
| 12654 |
const auto & cparams = ctx->cparams;
|
| 12655 |
|
| 12656 |
+
const uint32_t n_layer = hparams.n_layer;
|
| 12657 |
+
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa();
|
| 12658 |
+
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa();
|
| 12659 |
+
const uint32_t n_ctx = cparams.n_ctx;
|
| 12660 |
|
| 12661 |
size_t kv_buf_size;
|
| 12662 |
uint32_t kv_head;
|
|
|
|
| 12672 |
GGML_ASSERT(kv_self.total_size() == kv_buf_size);
|
| 12673 |
|
| 12674 |
for (int il = 0; il < (int) n_layer; ++il) {
|
| 12675 |
+
const size_t k_size = ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa*kv_head);
|
| 12676 |
+
|
| 12677 |
ggml_backend_tensor_set(kv_self.k_l[il], inp, 0, k_size);
|
| 12678 |
inp += k_size;
|
| 12679 |
|
| 12680 |
// v is not contiguous, copy row by row
|
| 12681 |
+
const size_t v_row_size = ggml_row_size(kv_self.v_l[il]->type, kv_head);
|
| 12682 |
+
const size_t v_row_stride = ggml_row_size(kv_self.v_l[il]->type, n_ctx);
|
| 12683 |
+
|
| 12684 |
for (int ir = 0; ir < (int) n_embd_v_gqa; ++ir) {
|
| 12685 |
ggml_backend_tensor_set(kv_self.v_l[il], inp, ir*v_row_stride, v_row_size);
|
| 12686 |
inp += v_row_size;
|
examples/talk-llama/llama.h
CHANGED
|
@@ -64,6 +64,15 @@ extern "C" {
|
|
| 64 |
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
|
| 65 |
};
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
enum llama_token_type {
|
| 68 |
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
|
| 69 |
LLAMA_TOKEN_TYPE_NORMAL = 1,
|
|
@@ -98,12 +107,15 @@ extern "C" {
|
|
| 98 |
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
| 99 |
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
| 100 |
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
| 101 |
-
|
| 102 |
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
| 103 |
LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
|
| 104 |
LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
|
| 105 |
LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
|
| 106 |
LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
| 109 |
};
|
|
@@ -234,6 +246,7 @@ extern "C" {
|
|
| 234 |
float yarn_beta_fast; // YaRN low correction dim
|
| 235 |
float yarn_beta_slow; // YaRN high correction dim
|
| 236 |
uint32_t yarn_orig_ctx; // YaRN original context size
|
|
|
|
| 237 |
|
| 238 |
ggml_backend_sched_eval_callback cb_eval;
|
| 239 |
void * cb_eval_user_data;
|
|
@@ -360,6 +373,7 @@ extern "C" {
|
|
| 360 |
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
| 361 |
|
| 362 |
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
|
|
|
| 363 |
|
| 364 |
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
| 365 |
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
|
@@ -514,10 +528,12 @@ extern "C" {
|
|
| 514 |
llama_seq_id seq_id);
|
| 515 |
|
| 516 |
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
| 517 |
-
// If the KV cache is RoPEd, the KV data is updated accordingly
|
|
|
|
|
|
|
| 518 |
// p0 < 0 : [0, p1]
|
| 519 |
// p1 < 0 : [p0, inf)
|
| 520 |
-
LLAMA_API void
|
| 521 |
struct llama_context * ctx,
|
| 522 |
llama_seq_id seq_id,
|
| 523 |
llama_pos p0,
|
|
@@ -525,7 +541,9 @@ extern "C" {
|
|
| 525 |
llama_pos delta);
|
| 526 |
|
| 527 |
// Integer division of the positions by factor of `d > 1`
|
| 528 |
-
// If the KV cache is RoPEd, the KV data is updated accordingly
|
|
|
|
|
|
|
| 529 |
// p0 < 0 : [0, p1]
|
| 530 |
// p1 < 0 : [p0, inf)
|
| 531 |
LLAMA_API void llama_kv_cache_seq_div(
|
|
@@ -535,6 +553,20 @@ extern "C" {
|
|
| 535 |
llama_pos p1,
|
| 536 |
int d);
|
| 537 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
//
|
| 539 |
// State / sessions
|
| 540 |
//
|
|
|
|
| 64 |
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
|
| 65 |
};
|
| 66 |
|
| 67 |
+
// note: these values should be synchronized with ggml_rope
|
| 68 |
+
// TODO: maybe move this enum to ggml.h (ggml_rope_type)
|
| 69 |
+
enum llama_rope_type {
|
| 70 |
+
LLAMA_ROPE_TYPE_NONE = -1,
|
| 71 |
+
LLAMA_ROPE_TYPE_NORM = 0,
|
| 72 |
+
LLAMA_ROPE_TYPE_NEOX = 2,
|
| 73 |
+
LLAMA_ROPE_TYPE_GLM = 4,
|
| 74 |
+
};
|
| 75 |
+
|
| 76 |
enum llama_token_type {
|
| 77 |
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
|
| 78 |
LLAMA_TOKEN_TYPE_NORMAL = 1,
|
|
|
|
| 107 |
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
| 108 |
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
| 109 |
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
| 110 |
+
LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
|
| 111 |
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
| 112 |
LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
|
| 113 |
LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
|
| 114 |
LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
|
| 115 |
LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
|
| 116 |
+
LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
|
| 117 |
+
LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
|
| 118 |
+
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
|
| 119 |
|
| 120 |
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
| 121 |
};
|
|
|
|
| 246 |
float yarn_beta_fast; // YaRN low correction dim
|
| 247 |
float yarn_beta_slow; // YaRN high correction dim
|
| 248 |
uint32_t yarn_orig_ctx; // YaRN original context size
|
| 249 |
+
float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
|
| 250 |
|
| 251 |
ggml_backend_sched_eval_callback cb_eval;
|
| 252 |
void * cb_eval_user_data;
|
|
|
|
| 373 |
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
| 374 |
|
| 375 |
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
| 376 |
+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
| 377 |
|
| 378 |
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
| 379 |
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
|
|
|
| 528 |
llama_seq_id seq_id);
|
| 529 |
|
| 530 |
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
| 531 |
+
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
| 532 |
+
// - lazily on next llama_decode()
|
| 533 |
+
// - explicitly with llama_kv_cache_update()
|
| 534 |
// p0 < 0 : [0, p1]
|
| 535 |
// p1 < 0 : [p0, inf)
|
| 536 |
+
LLAMA_API void llama_kv_cache_seq_add(
|
| 537 |
struct llama_context * ctx,
|
| 538 |
llama_seq_id seq_id,
|
| 539 |
llama_pos p0,
|
|
|
|
| 541 |
llama_pos delta);
|
| 542 |
|
| 543 |
// Integer division of the positions by factor of `d > 1`
|
| 544 |
+
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
| 545 |
+
// - lazily on next llama_decode()
|
| 546 |
+
// - explicitly with llama_kv_cache_update()
|
| 547 |
// p0 < 0 : [0, p1]
|
| 548 |
// p1 < 0 : [p0, inf)
|
| 549 |
LLAMA_API void llama_kv_cache_seq_div(
|
|
|
|
| 553 |
llama_pos p1,
|
| 554 |
int d);
|
| 555 |
|
| 556 |
+
// Returns the largest position present in the KV cache for the specified sequence
|
| 557 |
+
LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
|
| 558 |
+
struct llama_context * ctx,
|
| 559 |
+
llama_seq_id seq_id);
|
| 560 |
+
|
| 561 |
+
// Defragment the KV cache
|
| 562 |
+
// This will be applied:
|
| 563 |
+
// - lazily on next llama_decode()
|
| 564 |
+
// - explicitly with llama_kv_cache_update()
|
| 565 |
+
LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx);
|
| 566 |
+
|
| 567 |
+
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
| 568 |
+
LLAMA_API void llama_kv_cache_update(struct llama_context * ctx);
|
| 569 |
+
|
| 570 |
//
|
| 571 |
// State / sessions
|
| 572 |
//
|
examples/talk-llama/unicode.h
CHANGED
|
@@ -223,6 +223,268 @@ static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
|
|
| 223 |
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
|
| 224 |
};
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
static std::string codepoint_to_utf8(uint32_t cp) {
|
| 227 |
std::string result;
|
| 228 |
if (/* 0x00 <= cp && */ cp <= 0x7f) {
|
|
@@ -404,7 +666,8 @@ static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
|
| 404 |
|
| 405 |
static int codepoint_type(uint32_t cp) {
|
| 406 |
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
| 407 |
-
|
|
|
|
| 408 |
}
|
| 409 |
|
| 410 |
static int codepoint_type(const std::string & utf8) {
|
|
|
|
| 223 |
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
|
| 224 |
};
|
| 225 |
|
| 226 |
+
static const std::unordered_map<uint32_t, std::vector<uint32_t>> nfd_map = {
|
| 227 |
+
{0xC0, {0x41, 0x300}}, {0xC1, {0x41, 0x301}}, {0xC2, {0x41, 0x302}}, {0xC3, {0x41, 0x303}}, {0xC4, {0x41, 0x308}}, {0xC5, {0x41, 0x30A}}, {0xC7, {0x43, 0x327}}, {0xC8, {0x45, 0x300}},
|
| 228 |
+
{0xC9, {0x45, 0x301}}, {0xCA, {0x45, 0x302}}, {0xCB, {0x45, 0x308}}, {0xCC, {0x49, 0x300}}, {0xCD, {0x49, 0x301}}, {0xCE, {0x49, 0x302}}, {0xCF, {0x49, 0x308}}, {0xD1, {0x4E, 0x303}},
|
| 229 |
+
{0xD2, {0x4F, 0x300}}, {0xD3, {0x4F, 0x301}}, {0xD4, {0x4F, 0x302}}, {0xD5, {0x4F, 0x303}}, {0xD6, {0x4F, 0x308}}, {0xD9, {0x55, 0x300}}, {0xDA, {0x55, 0x301}}, {0xDB, {0x55, 0x302}},
|
| 230 |
+
{0xDC, {0x55, 0x308}}, {0xDD, {0x59, 0x301}}, {0xE0, {0x61, 0x300}}, {0xE1, {0x61, 0x301}}, {0xE2, {0x61, 0x302}}, {0xE3, {0x61, 0x303}}, {0xE4, {0x61, 0x308}}, {0xE5, {0x61, 0x30A}},
|
| 231 |
+
{0xE7, {0x63, 0x327}}, {0xE8, {0x65, 0x300}}, {0xE9, {0x65, 0x301}}, {0xEA, {0x65, 0x302}}, {0xEB, {0x65, 0x308}}, {0xEC, {0x69, 0x300}}, {0xED, {0x69, 0x301}}, {0xEE, {0x69, 0x302}},
|
| 232 |
+
{0xEF, {0x69, 0x308}}, {0xF1, {0x6E, 0x303}}, {0xF2, {0x6F, 0x300}}, {0xF3, {0x6F, 0x301}}, {0xF4, {0x6F, 0x302}}, {0xF5, {0x6F, 0x303}}, {0xF6, {0x6F, 0x308}}, {0xF9, {0x75, 0x300}},
|
| 233 |
+
{0xFA, {0x75, 0x301}}, {0xFB, {0x75, 0x302}}, {0xFC, {0x75, 0x308}}, {0xFD, {0x79, 0x301}}, {0xFF, {0x79, 0x308}}, {0x100, {0x41, 0x304}}, {0x101, {0x61, 0x304}}, {0x102, {0x41, 0x306}},
|
| 234 |
+
{0x103, {0x61, 0x306}}, {0x104, {0x41, 0x328}}, {0x105, {0x61, 0x328}}, {0x106, {0x43, 0x301}}, {0x107, {0x63, 0x301}}, {0x108, {0x43, 0x302}}, {0x109, {0x63, 0x302}}, {0x10A, {0x43, 0x307}},
|
| 235 |
+
{0x10B, {0x63, 0x307}}, {0x10C, {0x43, 0x30C}}, {0x10D, {0x63, 0x30C}}, {0x10E, {0x44, 0x30C}}, {0x10F, {0x64, 0x30C}}, {0x112, {0x45, 0x304}}, {0x113, {0x65, 0x304}}, {0x114, {0x45, 0x306}},
|
| 236 |
+
{0x115, {0x65, 0x306}}, {0x116, {0x45, 0x307}}, {0x117, {0x65, 0x307}}, {0x118, {0x45, 0x328}}, {0x119, {0x65, 0x328}}, {0x11A, {0x45, 0x30C}}, {0x11B, {0x65, 0x30C}}, {0x11C, {0x47, 0x302}},
|
| 237 |
+
{0x11D, {0x67, 0x302}}, {0x11E, {0x47, 0x306}}, {0x11F, {0x67, 0x306}}, {0x120, {0x47, 0x307}}, {0x121, {0x67, 0x307}}, {0x122, {0x47, 0x327}}, {0x123, {0x67, 0x327}}, {0x124, {0x48, 0x302}},
|
| 238 |
+
{0x125, {0x68, 0x302}}, {0x128, {0x49, 0x303}}, {0x129, {0x69, 0x303}}, {0x12A, {0x49, 0x304}}, {0x12B, {0x69, 0x304}}, {0x12C, {0x49, 0x306}}, {0x12D, {0x69, 0x306}}, {0x12E, {0x49, 0x328}},
|
| 239 |
+
{0x12F, {0x69, 0x328}}, {0x130, {0x49, 0x307}}, {0x134, {0x4A, 0x302}}, {0x135, {0x6A, 0x302}}, {0x136, {0x4B, 0x327}}, {0x137, {0x6B, 0x327}}, {0x139, {0x4C, 0x301}}, {0x13A, {0x6C, 0x301}},
|
| 240 |
+
{0x13B, {0x4C, 0x327}}, {0x13C, {0x6C, 0x327}}, {0x13D, {0x4C, 0x30C}}, {0x13E, {0x6C, 0x30C}}, {0x143, {0x4E, 0x301}}, {0x144, {0x6E, 0x301}}, {0x145, {0x4E, 0x327}}, {0x146, {0x6E, 0x327}},
|
| 241 |
+
{0x147, {0x4E, 0x30C}}, {0x148, {0x6E, 0x30C}}, {0x14C, {0x4F, 0x304}}, {0x14D, {0x6F, 0x304}}, {0x14E, {0x4F, 0x306}}, {0x14F, {0x6F, 0x306}}, {0x150, {0x4F, 0x30B}}, {0x151, {0x6F, 0x30B}},
|
| 242 |
+
{0x154, {0x52, 0x301}}, {0x155, {0x72, 0x301}}, {0x156, {0x52, 0x327}}, {0x157, {0x72, 0x327}}, {0x158, {0x52, 0x30C}}, {0x159, {0x72, 0x30C}}, {0x15A, {0x53, 0x301}}, {0x15B, {0x73, 0x301}},
|
| 243 |
+
{0x15C, {0x53, 0x302}}, {0x15D, {0x73, 0x302}}, {0x15E, {0x53, 0x327}}, {0x15F, {0x73, 0x327}}, {0x160, {0x53, 0x30C}}, {0x161, {0x73, 0x30C}}, {0x162, {0x54, 0x327}}, {0x163, {0x74, 0x327}},
|
| 244 |
+
{0x164, {0x54, 0x30C}}, {0x165, {0x74, 0x30C}}, {0x168, {0x55, 0x303}}, {0x169, {0x75, 0x303}}, {0x16A, {0x55, 0x304}}, {0x16B, {0x75, 0x304}}, {0x16C, {0x55, 0x306}}, {0x16D, {0x75, 0x306}},
|
| 245 |
+
{0x16E, {0x55, 0x30A}}, {0x16F, {0x75, 0x30A}}, {0x170, {0x55, 0x30B}}, {0x171, {0x75, 0x30B}}, {0x172, {0x55, 0x328}}, {0x173, {0x75, 0x328}}, {0x174, {0x57, 0x302}}, {0x175, {0x77, 0x302}},
|
| 246 |
+
{0x176, {0x59, 0x302}}, {0x177, {0x79, 0x302}}, {0x178, {0x59, 0x308}}, {0x179, {0x5A, 0x301}}, {0x17A, {0x7A, 0x301}}, {0x17B, {0x5A, 0x307}}, {0x17C, {0x7A, 0x307}}, {0x17D, {0x5A, 0x30C}},
|
| 247 |
+
{0x17E, {0x7A, 0x30C}}, {0x1A0, {0x4F, 0x31B}}, {0x1A1, {0x6F, 0x31B}}, {0x1AF, {0x55, 0x31B}}, {0x1B0, {0x75, 0x31B}}, {0x1CD, {0x41, 0x30C}}, {0x1CE, {0x61, 0x30C}}, {0x1CF, {0x49, 0x30C}},
|
| 248 |
+
{0x1D0, {0x69, 0x30C}}, {0x1D1, {0x4F, 0x30C}}, {0x1D2, {0x6F, 0x30C}}, {0x1D3, {0x55, 0x30C}}, {0x1D4, {0x75, 0x30C}}, {0x1D5, {0x55, 0x308, 0x304}}, {0x1D6, {0x75, 0x308, 0x304}},
|
| 249 |
+
{0x1D7, {0x55, 0x308, 0x301}}, {0x1D8, {0x75, 0x308, 0x301}}, {0x1D9, {0x55, 0x308, 0x30C}}, {0x1DA, {0x75, 0x308, 0x30C}}, {0x1DB, {0x55, 0x308, 0x300}}, {0x1DC, {0x75, 0x308, 0x300}},
|
| 250 |
+
{0x1DE, {0x41, 0x308, 0x304}}, {0x1DF, {0x61, 0x308, 0x304}}, {0x1E0, {0x41, 0x307, 0x304}}, {0x1E1, {0x61, 0x307, 0x304}}, {0x1E2, {0xC6, 0x304}}, {0x1E3, {0xE6, 0x304}}, {0x1E6, {0x47, 0x30C}},
|
| 251 |
+
{0x1E7, {0x67, 0x30C}}, {0x1E8, {0x4B, 0x30C}}, {0x1E9, {0x6B, 0x30C}}, {0x1EA, {0x4F, 0x328}}, {0x1EB, {0x6F, 0x328}}, {0x1EC, {0x4F, 0x328, 0x304}}, {0x1ED, {0x6F, 0x328, 0x304}},
|
| 252 |
+
{0x1EE, {0x1B7, 0x30C}}, {0x1EF, {0x292, 0x30C}}, {0x1F0, {0x6A, 0x30C}}, {0x1F4, {0x47, 0x301}}, {0x1F5, {0x67, 0x301}}, {0x1F8, {0x4E, 0x300}}, {0x1F9, {0x6E, 0x300}}, {0x1FA, {0x41, 0x30A, 0x301}},
|
| 253 |
+
{0x1FB, {0x61, 0x30A, 0x301}}, {0x1FC, {0xC6, 0x301}}, {0x1FD, {0xE6, 0x301}}, {0x1FE, {0xD8, 0x301}}, {0x1FF, {0xF8, 0x301}}, {0x200, {0x41, 0x30F}}, {0x201, {0x61, 0x30F}}, {0x202, {0x41, 0x311}},
|
| 254 |
+
{0x203, {0x61, 0x311}}, {0x204, {0x45, 0x30F}}, {0x205, {0x65, 0x30F}}, {0x206, {0x45, 0x311}}, {0x207, {0x65, 0x311}}, {0x208, {0x49, 0x30F}}, {0x209, {0x69, 0x30F}}, {0x20A, {0x49, 0x311}},
|
| 255 |
+
{0x20B, {0x69, 0x311}}, {0x20C, {0x4F, 0x30F}}, {0x20D, {0x6F, 0x30F}}, {0x20E, {0x4F, 0x311}}, {0x20F, {0x6F, 0x311}}, {0x210, {0x52, 0x30F}}, {0x211, {0x72, 0x30F}}, {0x212, {0x52, 0x311}},
|
| 256 |
+
{0x213, {0x72, 0x311}}, {0x214, {0x55, 0x30F}}, {0x215, {0x75, 0x30F}}, {0x216, {0x55, 0x311}}, {0x217, {0x75, 0x311}}, {0x218, {0x53, 0x326}}, {0x219, {0x73, 0x326}}, {0x21A, {0x54, 0x326}},
|
| 257 |
+
{0x21B, {0x74, 0x326}}, {0x21E, {0x48, 0x30C}}, {0x21F, {0x68, 0x30C}}, {0x226, {0x41, 0x307}}, {0x227, {0x61, 0x307}}, {0x228, {0x45, 0x327}}, {0x229, {0x65, 0x327}}, {0x22A, {0x4F, 0x308, 0x304}},
|
| 258 |
+
{0x22B, {0x6F, 0x308, 0x304}}, {0x22C, {0x4F, 0x303, 0x304}}, {0x22D, {0x6F, 0x303, 0x304}}, {0x22E, {0x4F, 0x307}}, {0x22F, {0x6F, 0x307}}, {0x230, {0x4F, 0x307, 0x304}},
|
| 259 |
+
{0x231, {0x6F, 0x307, 0x304}}, {0x232, {0x59, 0x304}}, {0x233, {0x79, 0x304}}, {0x340, {0x300}}, {0x341, {0x301}}, {0x343, {0x313}}, {0x344, {0x308, 0x301}}, {0x374, {0x2B9}}, {0x37E, {0x3B}},
|
| 260 |
+
{0x385, {0xA8, 0x301}}, {0x386, {0x391, 0x301}}, {0x387, {0xB7}}, {0x388, {0x395, 0x301}}, {0x389, {0x397, 0x301}}, {0x38A, {0x399, 0x301}}, {0x38C, {0x39F, 0x301}}, {0x38E, {0x3A5, 0x301}},
|
| 261 |
+
{0x38F, {0x3A9, 0x301}}, {0x390, {0x3B9, 0x308, 0x301}}, {0x3AA, {0x399, 0x308}}, {0x3AB, {0x3A5, 0x308}}, {0x3AC, {0x3B1, 0x301}}, {0x3AD, {0x3B5, 0x301}}, {0x3AE, {0x3B7, 0x301}},
|
| 262 |
+
{0x3AF, {0x3B9, 0x301}}, {0x3B0, {0x3C5, 0x308, 0x301}}, {0x3CA, {0x3B9, 0x308}}, {0x3CB, {0x3C5, 0x308}}, {0x3CC, {0x3BF, 0x301}}, {0x3CD, {0x3C5, 0x301}}, {0x3CE, {0x3C9, 0x301}},
|
| 263 |
+
{0x3D3, {0x3D2, 0x301}}, {0x3D4, {0x3D2, 0x308}}, {0x400, {0x415, 0x300}}, {0x401, {0x415, 0x308}}, {0x403, {0x413, 0x301}}, {0x407, {0x406, 0x308}}, {0x40C, {0x41A, 0x301}}, {0x40D, {0x418, 0x300}},
|
| 264 |
+
{0x40E, {0x423, 0x306}}, {0x419, {0x418, 0x306}}, {0x439, {0x438, 0x306}}, {0x450, {0x435, 0x300}}, {0x451, {0x435, 0x308}}, {0x453, {0x433, 0x301}}, {0x457, {0x456, 0x308}}, {0x45C, {0x43A, 0x301}},
|
| 265 |
+
{0x45D, {0x438, 0x300}}, {0x45E, {0x443, 0x306}}, {0x476, {0x474, 0x30F}}, {0x477, {0x475, 0x30F}}, {0x4C1, {0x416, 0x306}}, {0x4C2, {0x436, 0x306}}, {0x4D0, {0x410, 0x306}}, {0x4D1, {0x430, 0x306}},
|
| 266 |
+
{0x4D2, {0x410, 0x308}}, {0x4D3, {0x430, 0x308}}, {0x4D6, {0x415, 0x306}}, {0x4D7, {0x435, 0x306}}, {0x4DA, {0x4D8, 0x308}}, {0x4DB, {0x4D9, 0x308}}, {0x4DC, {0x416, 0x308}}, {0x4DD, {0x436, 0x308}},
|
| 267 |
+
{0x4DE, {0x417, 0x308}}, {0x4DF, {0x437, 0x308}}, {0x4E2, {0x418, 0x304}}, {0x4E3, {0x438, 0x304}}, {0x4E4, {0x418, 0x308}}, {0x4E5, {0x438, 0x308}}, {0x4E6, {0x41E, 0x308}}, {0x4E7, {0x43E, 0x308}},
|
| 268 |
+
{0x4EA, {0x4E8, 0x308}}, {0x4EB, {0x4E9, 0x308}}, {0x4EC, {0x42D, 0x308}}, {0x4ED, {0x44D, 0x308}}, {0x4EE, {0x423, 0x304}}, {0x4EF, {0x443, 0x304}}, {0x4F0, {0x423, 0x308}}, {0x4F1, {0x443, 0x308}},
|
| 269 |
+
{0x4F2, {0x423, 0x30B}}, {0x4F3, {0x443, 0x30B}}, {0x4F4, {0x427, 0x308}}, {0x4F5, {0x447, 0x308}}, {0x4F8, {0x42B, 0x308}}, {0x4F9, {0x44B, 0x308}}, {0x622, {0x627, 0x653}}, {0x623, {0x627, 0x654}},
|
| 270 |
+
{0x624, {0x648, 0x654}}, {0x625, {0x627, 0x655}}, {0x626, {0x64A, 0x654}}, {0x6C0, {0x6D5, 0x654}}, {0x6C2, {0x6C1, 0x654}}, {0x6D3, {0x6D2, 0x654}}, {0x929, {0x928, 0x93C}}, {0x931, {0x930, 0x93C}},
|
| 271 |
+
{0x934, {0x933, 0x93C}}, {0x958, {0x915, 0x93C}}, {0x959, {0x916, 0x93C}}, {0x95A, {0x917, 0x93C}}, {0x95B, {0x91C, 0x93C}}, {0x95C, {0x921, 0x93C}}, {0x95D, {0x922, 0x93C}}, {0x95E, {0x92B, 0x93C}},
|
| 272 |
+
{0x95F, {0x92F, 0x93C}}, {0x9CB, {0x9C7, 0x9BE}}, {0x9CC, {0x9C7, 0x9D7}}, {0x9DC, {0x9A1, 0x9BC}}, {0x9DD, {0x9A2, 0x9BC}}, {0x9DF, {0x9AF, 0x9BC}}, {0xA33, {0xA32, 0xA3C}}, {0xA36, {0xA38, 0xA3C}},
|
| 273 |
+
{0xA59, {0xA16, 0xA3C}}, {0xA5A, {0xA17, 0xA3C}}, {0xA5B, {0xA1C, 0xA3C}}, {0xA5E, {0xA2B, 0xA3C}}, {0xB48, {0xB47, 0xB56}}, {0xB4B, {0xB47, 0xB3E}}, {0xB4C, {0xB47, 0xB57}}, {0xB5C, {0xB21, 0xB3C}},
|
| 274 |
+
{0xB5D, {0xB22, 0xB3C}}, {0xB94, {0xB92, 0xBD7}}, {0xBCA, {0xBC6, 0xBBE}}, {0xBCB, {0xBC7, 0xBBE}}, {0xBCC, {0xBC6, 0xBD7}}, {0xC48, {0xC46, 0xC56}}, {0xCC0, {0xCBF, 0xCD5}}, {0xCC7, {0xCC6, 0xCD5}},
|
| 275 |
+
{0xCC8, {0xCC6, 0xCD6}}, {0xCCA, {0xCC6, 0xCC2}}, {0xCCB, {0xCC6, 0xCC2, 0xCD5}}, {0xD4A, {0xD46, 0xD3E}}, {0xD4B, {0xD47, 0xD3E}}, {0xD4C, {0xD46, 0xD57}}, {0xDDA, {0xDD9, 0xDCA}},
|
| 276 |
+
{0xDDC, {0xDD9, 0xDCF}}, {0xDDD, {0xDD9, 0xDCF, 0xDCA}}, {0xDDE, {0xDD9, 0xDDF}}, {0xF43, {0xF42, 0xFB7}}, {0xF4D, {0xF4C, 0xFB7}}, {0xF52, {0xF51, 0xFB7}}, {0xF57, {0xF56, 0xFB7}},
|
| 277 |
+
{0xF5C, {0xF5B, 0xFB7}}, {0xF69, {0xF40, 0xFB5}}, {0xF73, {0xF71, 0xF72}}, {0xF75, {0xF71, 0xF74}}, {0xF76, {0xFB2, 0xF80}}, {0xF78, {0xFB3, 0xF80}}, {0xF81, {0xF71, 0xF80}}, {0xF93, {0xF92, 0xFB7}},
|
| 278 |
+
{0xF9D, {0xF9C, 0xFB7}}, {0xFA2, {0xFA1, 0xFB7}}, {0xFA7, {0xFA6, 0xFB7}}, {0xFAC, {0xFAB, 0xFB7}}, {0xFB9, {0xF90, 0xFB5}}, {0x1026, {0x1025, 0x102E}}, {0x1B06, {0x1B05, 0x1B35}},
|
| 279 |
+
{0x1B08, {0x1B07, 0x1B35}}, {0x1B0A, {0x1B09, 0x1B35}}, {0x1B0C, {0x1B0B, 0x1B35}}, {0x1B0E, {0x1B0D, 0x1B35}}, {0x1B12, {0x1B11, 0x1B35}}, {0x1B3B, {0x1B3A, 0x1B35}}, {0x1B3D, {0x1B3C, 0x1B35}},
|
| 280 |
+
{0x1B40, {0x1B3E, 0x1B35}}, {0x1B41, {0x1B3F, 0x1B35}}, {0x1B43, {0x1B42, 0x1B35}}, {0x1E00, {0x41, 0x325}}, {0x1E01, {0x61, 0x325}}, {0x1E02, {0x42, 0x307}}, {0x1E03, {0x62, 0x307}},
|
| 281 |
+
{0x1E04, {0x42, 0x323}}, {0x1E05, {0x62, 0x323}}, {0x1E06, {0x42, 0x331}}, {0x1E07, {0x62, 0x331}}, {0x1E08, {0x43, 0x327, 0x301}}, {0x1E09, {0x63, 0x327, 0x301}}, {0x1E0A, {0x44, 0x307}},
|
| 282 |
+
{0x1E0B, {0x64, 0x307}}, {0x1E0C, {0x44, 0x323}}, {0x1E0D, {0x64, 0x323}}, {0x1E0E, {0x44, 0x331}}, {0x1E0F, {0x64, 0x331}}, {0x1E10, {0x44, 0x327}}, {0x1E11, {0x64, 0x327}}, {0x1E12, {0x44, 0x32D}},
|
| 283 |
+
{0x1E13, {0x64, 0x32D}}, {0x1E14, {0x45, 0x304, 0x300}}, {0x1E15, {0x65, 0x304, 0x300}}, {0x1E16, {0x45, 0x304, 0x301}}, {0x1E17, {0x65, 0x304, 0x301}}, {0x1E18, {0x45, 0x32D}},
|
| 284 |
+
{0x1E19, {0x65, 0x32D}}, {0x1E1A, {0x45, 0x330}}, {0x1E1B, {0x65, 0x330}}, {0x1E1C, {0x45, 0x327, 0x306}}, {0x1E1D, {0x65, 0x327, 0x306}}, {0x1E1E, {0x46, 0x307}}, {0x1E1F, {0x66, 0x307}},
|
| 285 |
+
{0x1E20, {0x47, 0x304}}, {0x1E21, {0x67, 0x304}}, {0x1E22, {0x48, 0x307}}, {0x1E23, {0x68, 0x307}}, {0x1E24, {0x48, 0x323}}, {0x1E25, {0x68, 0x323}}, {0x1E26, {0x48, 0x308}}, {0x1E27, {0x68, 0x308}},
|
| 286 |
+
{0x1E28, {0x48, 0x327}}, {0x1E29, {0x68, 0x327}}, {0x1E2A, {0x48, 0x32E}}, {0x1E2B, {0x68, 0x32E}}, {0x1E2C, {0x49, 0x330}}, {0x1E2D, {0x69, 0x330}}, {0x1E2E, {0x49, 0x308, 0x301}},
|
| 287 |
+
{0x1E2F, {0x69, 0x308, 0x301}}, {0x1E30, {0x4B, 0x301}}, {0x1E31, {0x6B, 0x301}}, {0x1E32, {0x4B, 0x323}}, {0x1E33, {0x6B, 0x323}}, {0x1E34, {0x4B, 0x331}}, {0x1E35, {0x6B, 0x331}},
|
| 288 |
+
{0x1E36, {0x4C, 0x323}}, {0x1E37, {0x6C, 0x323}}, {0x1E38, {0x4C, 0x323, 0x304}}, {0x1E39, {0x6C, 0x323, 0x304}}, {0x1E3A, {0x4C, 0x331}}, {0x1E3B, {0x6C, 0x331}}, {0x1E3C, {0x4C, 0x32D}},
|
| 289 |
+
{0x1E3D, {0x6C, 0x32D}}, {0x1E3E, {0x4D, 0x301}}, {0x1E3F, {0x6D, 0x301}}, {0x1E40, {0x4D, 0x307}}, {0x1E41, {0x6D, 0x307}}, {0x1E42, {0x4D, 0x323}}, {0x1E43, {0x6D, 0x323}}, {0x1E44, {0x4E, 0x307}},
|
| 290 |
+
{0x1E45, {0x6E, 0x307}}, {0x1E46, {0x4E, 0x323}}, {0x1E47, {0x6E, 0x323}}, {0x1E48, {0x4E, 0x331}}, {0x1E49, {0x6E, 0x331}}, {0x1E4A, {0x4E, 0x32D}}, {0x1E4B, {0x6E, 0x32D}},
|
| 291 |
+
{0x1E4C, {0x4F, 0x303, 0x301}}, {0x1E4D, {0x6F, 0x303, 0x301}}, {0x1E4E, {0x4F, 0x303, 0x308}}, {0x1E4F, {0x6F, 0x303, 0x308}}, {0x1E50, {0x4F, 0x304, 0x300}}, {0x1E51, {0x6F, 0x304, 0x300}},
|
| 292 |
+
{0x1E52, {0x4F, 0x304, 0x301}}, {0x1E53, {0x6F, 0x304, 0x301}}, {0x1E54, {0x50, 0x301}}, {0x1E55, {0x70, 0x301}}, {0x1E56, {0x50, 0x307}}, {0x1E57, {0x70, 0x307}}, {0x1E58, {0x52, 0x307}},
|
| 293 |
+
{0x1E59, {0x72, 0x307}}, {0x1E5A, {0x52, 0x323}}, {0x1E5B, {0x72, 0x323}}, {0x1E5C, {0x52, 0x323, 0x304}}, {0x1E5D, {0x72, 0x323, 0x304}}, {0x1E5E, {0x52, 0x331}}, {0x1E5F, {0x72, 0x331}},
|
| 294 |
+
{0x1E60, {0x53, 0x307}}, {0x1E61, {0x73, 0x307}}, {0x1E62, {0x53, 0x323}}, {0x1E63, {0x73, 0x323}}, {0x1E64, {0x53, 0x301, 0x307}}, {0x1E65, {0x73, 0x301, 0x307}}, {0x1E66, {0x53, 0x30C, 0x307}},
|
| 295 |
+
{0x1E67, {0x73, 0x30C, 0x307}}, {0x1E68, {0x53, 0x323, 0x307}}, {0x1E69, {0x73, 0x323, 0x307}}, {0x1E6A, {0x54, 0x307}}, {0x1E6B, {0x74, 0x307}}, {0x1E6C, {0x54, 0x323}}, {0x1E6D, {0x74, 0x323}},
|
| 296 |
+
{0x1E6E, {0x54, 0x331}}, {0x1E6F, {0x74, 0x331}}, {0x1E70, {0x54, 0x32D}}, {0x1E71, {0x74, 0x32D}}, {0x1E72, {0x55, 0x324}}, {0x1E73, {0x75, 0x324}}, {0x1E74, {0x55, 0x330}}, {0x1E75, {0x75, 0x330}},
|
| 297 |
+
{0x1E76, {0x55, 0x32D}}, {0x1E77, {0x75, 0x32D}}, {0x1E78, {0x55, 0x303, 0x301}}, {0x1E79, {0x75, 0x303, 0x301}}, {0x1E7A, {0x55, 0x304, 0x308}}, {0x1E7B, {0x75, 0x304, 0x308}},
|
| 298 |
+
{0x1E7C, {0x56, 0x303}}, {0x1E7D, {0x76, 0x303}}, {0x1E7E, {0x56, 0x323}}, {0x1E7F, {0x76, 0x323}}, {0x1E80, {0x57, 0x300}}, {0x1E81, {0x77, 0x300}}, {0x1E82, {0x57, 0x301}}, {0x1E83, {0x77, 0x301}},
|
| 299 |
+
{0x1E84, {0x57, 0x308}}, {0x1E85, {0x77, 0x308}}, {0x1E86, {0x57, 0x307}}, {0x1E87, {0x77, 0x307}}, {0x1E88, {0x57, 0x323}}, {0x1E89, {0x77, 0x323}}, {0x1E8A, {0x58, 0x307}}, {0x1E8B, {0x78, 0x307}},
|
| 300 |
+
{0x1E8C, {0x58, 0x308}}, {0x1E8D, {0x78, 0x308}}, {0x1E8E, {0x59, 0x307}}, {0x1E8F, {0x79, 0x307}}, {0x1E90, {0x5A, 0x302}}, {0x1E91, {0x7A, 0x302}}, {0x1E92, {0x5A, 0x323}}, {0x1E93, {0x7A, 0x323}},
|
| 301 |
+
{0x1E94, {0x5A, 0x331}}, {0x1E95, {0x7A, 0x331}}, {0x1E96, {0x68, 0x331}}, {0x1E97, {0x74, 0x308}}, {0x1E98, {0x77, 0x30A}}, {0x1E99, {0x79, 0x30A}}, {0x1E9B, {0x17F, 0x307}}, {0x1EA0, {0x41, 0x323}},
|
| 302 |
+
{0x1EA1, {0x61, 0x323}}, {0x1EA2, {0x41, 0x309}}, {0x1EA3, {0x61, 0x309}}, {0x1EA4, {0x41, 0x302, 0x301}}, {0x1EA5, {0x61, 0x302, 0x301}}, {0x1EA6, {0x41, 0x302, 0x300}},
|
| 303 |
+
{0x1EA7, {0x61, 0x302, 0x300}}, {0x1EA8, {0x41, 0x302, 0x309}}, {0x1EA9, {0x61, 0x302, 0x309}}, {0x1EAA, {0x41, 0x302, 0x303}}, {0x1EAB, {0x61, 0x302, 0x303}}, {0x1EAC, {0x41, 0x323, 0x302}},
|
| 304 |
+
{0x1EAD, {0x61, 0x323, 0x302}}, {0x1EAE, {0x41, 0x306, 0x301}}, {0x1EAF, {0x61, 0x306, 0x301}}, {0x1EB0, {0x41, 0x306, 0x300}}, {0x1EB1, {0x61, 0x306, 0x300}}, {0x1EB2, {0x41, 0x306, 0x309}},
|
| 305 |
+
{0x1EB3, {0x61, 0x306, 0x309}}, {0x1EB4, {0x41, 0x306, 0x303}}, {0x1EB5, {0x61, 0x306, 0x303}}, {0x1EB6, {0x41, 0x323, 0x306}}, {0x1EB7, {0x61, 0x323, 0x306}}, {0x1EB8, {0x45, 0x323}},
|
| 306 |
+
{0x1EB9, {0x65, 0x323}}, {0x1EBA, {0x45, 0x309}}, {0x1EBB, {0x65, 0x309}}, {0x1EBC, {0x45, 0x303}}, {0x1EBD, {0x65, 0x303}}, {0x1EBE, {0x45, 0x302, 0x301}}, {0x1EBF, {0x65, 0x302, 0x301}},
|
| 307 |
+
{0x1EC0, {0x45, 0x302, 0x300}}, {0x1EC1, {0x65, 0x302, 0x300}}, {0x1EC2, {0x45, 0x302, 0x309}}, {0x1EC3, {0x65, 0x302, 0x309}}, {0x1EC4, {0x45, 0x302, 0x303}}, {0x1EC5, {0x65, 0x302, 0x303}},
|
| 308 |
+
{0x1EC6, {0x45, 0x323, 0x302}}, {0x1EC7, {0x65, 0x323, 0x302}}, {0x1EC8, {0x49, 0x309}}, {0x1EC9, {0x69, 0x309}}, {0x1ECA, {0x49, 0x323}}, {0x1ECB, {0x69, 0x323}}, {0x1ECC, {0x4F, 0x323}},
|
| 309 |
+
{0x1ECD, {0x6F, 0x323}}, {0x1ECE, {0x4F, 0x309}}, {0x1ECF, {0x6F, 0x309}}, {0x1ED0, {0x4F, 0x302, 0x301}}, {0x1ED1, {0x6F, 0x302, 0x301}}, {0x1ED2, {0x4F, 0x302, 0x300}},
|
| 310 |
+
{0x1ED3, {0x6F, 0x302, 0x300}}, {0x1ED4, {0x4F, 0x302, 0x309}}, {0x1ED5, {0x6F, 0x302, 0x309}}, {0x1ED6, {0x4F, 0x302, 0x303}}, {0x1ED7, {0x6F, 0x302, 0x303}}, {0x1ED8, {0x4F, 0x323, 0x302}},
|
| 311 |
+
{0x1ED9, {0x6F, 0x323, 0x302}}, {0x1EDA, {0x4F, 0x31B, 0x301}}, {0x1EDB, {0x6F, 0x31B, 0x301}}, {0x1EDC, {0x4F, 0x31B, 0x300}}, {0x1EDD, {0x6F, 0x31B, 0x300}}, {0x1EDE, {0x4F, 0x31B, 0x309}},
|
| 312 |
+
{0x1EDF, {0x6F, 0x31B, 0x309}}, {0x1EE0, {0x4F, 0x31B, 0x303}}, {0x1EE1, {0x6F, 0x31B, 0x303}}, {0x1EE2, {0x4F, 0x31B, 0x323}}, {0x1EE3, {0x6F, 0x31B, 0x323}}, {0x1EE4, {0x55, 0x323}},
|
| 313 |
+
{0x1EE5, {0x75, 0x323}}, {0x1EE6, {0x55, 0x309}}, {0x1EE7, {0x75, 0x309}}, {0x1EE8, {0x55, 0x31B, 0x301}}, {0x1EE9, {0x75, 0x31B, 0x301}}, {0x1EEA, {0x55, 0x31B, 0x300}},
|
| 314 |
+
{0x1EEB, {0x75, 0x31B, 0x300}}, {0x1EEC, {0x55, 0x31B, 0x309}}, {0x1EED, {0x75, 0x31B, 0x309}}, {0x1EEE, {0x55, 0x31B, 0x303}}, {0x1EEF, {0x75, 0x31B, 0x303}}, {0x1EF0, {0x55, 0x31B, 0x323}},
|
| 315 |
+
{0x1EF1, {0x75, 0x31B, 0x323}}, {0x1EF2, {0x59, 0x300}}, {0x1EF3, {0x79, 0x300}}, {0x1EF4, {0x59, 0x323}}, {0x1EF5, {0x79, 0x323}}, {0x1EF6, {0x59, 0x309}}, {0x1EF7, {0x79, 0x309}},
|
| 316 |
+
{0x1EF8, {0x59, 0x303}}, {0x1EF9, {0x79, 0x303}}, {0x1F00, {0x3B1, 0x313}}, {0x1F01, {0x3B1, 0x314}}, {0x1F02, {0x3B1, 0x313, 0x300}}, {0x1F03, {0x3B1, 0x314, 0x300}}, {0x1F04, {0x3B1, 0x313, 0x301}},
|
| 317 |
+
{0x1F05, {0x3B1, 0x314, 0x301}}, {0x1F06, {0x3B1, 0x313, 0x342}}, {0x1F07, {0x3B1, 0x314, 0x342}}, {0x1F08, {0x391, 0x313}}, {0x1F09, {0x391, 0x314}}, {0x1F0A, {0x391, 0x313, 0x300}},
|
| 318 |
+
{0x1F0B, {0x391, 0x314, 0x300}}, {0x1F0C, {0x391, 0x313, 0x301}}, {0x1F0D, {0x391, 0x314, 0x301}}, {0x1F0E, {0x391, 0x313, 0x342}}, {0x1F0F, {0x391, 0x314, 0x342}}, {0x1F10, {0x3B5, 0x313}},
|
| 319 |
+
{0x1F11, {0x3B5, 0x314}}, {0x1F12, {0x3B5, 0x313, 0x300}}, {0x1F13, {0x3B5, 0x314, 0x300}}, {0x1F14, {0x3B5, 0x313, 0x301}}, {0x1F15, {0x3B5, 0x314, 0x301}}, {0x1F18, {0x395, 0x313}},
|
| 320 |
+
{0x1F19, {0x395, 0x314}}, {0x1F1A, {0x395, 0x313, 0x300}}, {0x1F1B, {0x395, 0x314, 0x300}}, {0x1F1C, {0x395, 0x313, 0x301}}, {0x1F1D, {0x395, 0x314, 0x301}}, {0x1F20, {0x3B7, 0x313}},
|
| 321 |
+
{0x1F21, {0x3B7, 0x314}}, {0x1F22, {0x3B7, 0x313, 0x300}}, {0x1F23, {0x3B7, 0x314, 0x300}}, {0x1F24, {0x3B7, 0x313, 0x301}}, {0x1F25, {0x3B7, 0x314, 0x301}}, {0x1F26, {0x3B7, 0x313, 0x342}},
|
| 322 |
+
{0x1F27, {0x3B7, 0x314, 0x342}}, {0x1F28, {0x397, 0x313}}, {0x1F29, {0x397, 0x314}}, {0x1F2A, {0x397, 0x313, 0x300}}, {0x1F2B, {0x397, 0x314, 0x300}}, {0x1F2C, {0x397, 0x313, 0x301}},
|
| 323 |
+
{0x1F2D, {0x397, 0x314, 0x301}}, {0x1F2E, {0x397, 0x313, 0x342}}, {0x1F2F, {0x397, 0x314, 0x342}}, {0x1F30, {0x3B9, 0x313}}, {0x1F31, {0x3B9, 0x314}}, {0x1F32, {0x3B9, 0x313, 0x300}},
|
| 324 |
+
{0x1F33, {0x3B9, 0x314, 0x300}}, {0x1F34, {0x3B9, 0x313, 0x301}}, {0x1F35, {0x3B9, 0x314, 0x301}}, {0x1F36, {0x3B9, 0x313, 0x342}}, {0x1F37, {0x3B9, 0x314, 0x342}}, {0x1F38, {0x399, 0x313}},
|
| 325 |
+
{0x1F39, {0x399, 0x314}}, {0x1F3A, {0x399, 0x313, 0x300}}, {0x1F3B, {0x399, 0x314, 0x300}}, {0x1F3C, {0x399, 0x313, 0x301}}, {0x1F3D, {0x399, 0x314, 0x301}}, {0x1F3E, {0x399, 0x313, 0x342}},
|
| 326 |
+
{0x1F3F, {0x399, 0x314, 0x342}}, {0x1F40, {0x3BF, 0x313}}, {0x1F41, {0x3BF, 0x314}}, {0x1F42, {0x3BF, 0x313, 0x300}}, {0x1F43, {0x3BF, 0x314, 0x300}}, {0x1F44, {0x3BF, 0x313, 0x301}},
|
| 327 |
+
{0x1F45, {0x3BF, 0x314, 0x301}}, {0x1F48, {0x39F, 0x313}}, {0x1F49, {0x39F, 0x314}}, {0x1F4A, {0x39F, 0x313, 0x300}}, {0x1F4B, {0x39F, 0x314, 0x300}}, {0x1F4C, {0x39F, 0x313, 0x301}},
|
| 328 |
+
{0x1F4D, {0x39F, 0x314, 0x301}}, {0x1F50, {0x3C5, 0x313}}, {0x1F51, {0x3C5, 0x314}}, {0x1F52, {0x3C5, 0x313, 0x300}}, {0x1F53, {0x3C5, 0x314, 0x300}}, {0x1F54, {0x3C5, 0x313, 0x301}},
|
| 329 |
+
{0x1F55, {0x3C5, 0x314, 0x301}}, {0x1F56, {0x3C5, 0x313, 0x342}}, {0x1F57, {0x3C5, 0x314, 0x342}}, {0x1F59, {0x3A5, 0x314}}, {0x1F5B, {0x3A5, 0x314, 0x300}}, {0x1F5D, {0x3A5, 0x314, 0x301}},
|
| 330 |
+
{0x1F5F, {0x3A5, 0x314, 0x342}}, {0x1F60, {0x3C9, 0x313}}, {0x1F61, {0x3C9, 0x314}}, {0x1F62, {0x3C9, 0x313, 0x300}}, {0x1F63, {0x3C9, 0x314, 0x300}}, {0x1F64, {0x3C9, 0x313, 0x301}},
|
| 331 |
+
{0x1F65, {0x3C9, 0x314, 0x301}}, {0x1F66, {0x3C9, 0x313, 0x342}}, {0x1F67, {0x3C9, 0x314, 0x342}}, {0x1F68, {0x3A9, 0x313}}, {0x1F69, {0x3A9, 0x314}}, {0x1F6A, {0x3A9, 0x313, 0x300}},
|
| 332 |
+
{0x1F6B, {0x3A9, 0x314, 0x300}}, {0x1F6C, {0x3A9, 0x313, 0x301}}, {0x1F6D, {0x3A9, 0x314, 0x301}}, {0x1F6E, {0x3A9, 0x313, 0x342}}, {0x1F6F, {0x3A9, 0x314, 0x342}}, {0x1F70, {0x3B1, 0x300}},
|
| 333 |
+
{0x1F71, {0x3B1, 0x301}}, {0x1F72, {0x3B5, 0x300}}, {0x1F73, {0x3B5, 0x301}}, {0x1F74, {0x3B7, 0x300}}, {0x1F75, {0x3B7, 0x301}}, {0x1F76, {0x3B9, 0x300}}, {0x1F77, {0x3B9, 0x301}},
|
| 334 |
+
{0x1F78, {0x3BF, 0x300}}, {0x1F79, {0x3BF, 0x301}}, {0x1F7A, {0x3C5, 0x300}}, {0x1F7B, {0x3C5, 0x301}}, {0x1F7C, {0x3C9, 0x300}}, {0x1F7D, {0x3C9, 0x301}}, {0x1F80, {0x3B1, 0x313, 0x345}},
|
| 335 |
+
{0x1F81, {0x3B1, 0x314, 0x345}}, {0x1F82, {0x3B1, 0x313, 0x300, 0x345}}, {0x1F83, {0x3B1, 0x314, 0x300, 0x345}}, {0x1F84, {0x3B1, 0x313, 0x301, 0x345}}, {0x1F85, {0x3B1, 0x314, 0x301, 0x345}},
|
| 336 |
+
{0x1F86, {0x3B1, 0x313, 0x342, 0x345}}, {0x1F87, {0x3B1, 0x314, 0x342, 0x345}}, {0x1F88, {0x391, 0x313, 0x345}}, {0x1F89, {0x391, 0x314, 0x345}}, {0x1F8A, {0x391, 0x313, 0x300, 0x345}},
|
| 337 |
+
{0x1F8B, {0x391, 0x314, 0x300, 0x345}}, {0x1F8C, {0x391, 0x313, 0x301, 0x345}}, {0x1F8D, {0x391, 0x314, 0x301, 0x345}}, {0x1F8E, {0x391, 0x313, 0x342, 0x345}}, {0x1F8F, {0x391, 0x314, 0x342, 0x345}},
|
| 338 |
+
{0x1F90, {0x3B7, 0x313, 0x345}}, {0x1F91, {0x3B7, 0x314, 0x345}}, {0x1F92, {0x3B7, 0x313, 0x300, 0x345}}, {0x1F93, {0x3B7, 0x314, 0x300, 0x345}}, {0x1F94, {0x3B7, 0x313, 0x301, 0x345}},
|
| 339 |
+
{0x1F95, {0x3B7, 0x314, 0x301, 0x345}}, {0x1F96, {0x3B7, 0x313, 0x342, 0x345}}, {0x1F97, {0x3B7, 0x314, 0x342, 0x345}}, {0x1F98, {0x397, 0x313, 0x345}}, {0x1F99, {0x397, 0x314, 0x345}},
|
| 340 |
+
{0x1F9A, {0x397, 0x313, 0x300, 0x345}}, {0x1F9B, {0x397, 0x314, 0x300, 0x345}}, {0x1F9C, {0x397, 0x313, 0x301, 0x345}}, {0x1F9D, {0x397, 0x314, 0x301, 0x345}}, {0x1F9E, {0x397, 0x313, 0x342, 0x345}},
|
| 341 |
+
{0x1F9F, {0x397, 0x314, 0x342, 0x345}}, {0x1FA0, {0x3C9, 0x313, 0x345}}, {0x1FA1, {0x3C9, 0x314, 0x345}}, {0x1FA2, {0x3C9, 0x313, 0x300, 0x345}}, {0x1FA3, {0x3C9, 0x314, 0x300, 0x345}},
|
| 342 |
+
{0x1FA4, {0x3C9, 0x313, 0x301, 0x345}}, {0x1FA5, {0x3C9, 0x314, 0x301, 0x345}}, {0x1FA6, {0x3C9, 0x313, 0x342, 0x345}}, {0x1FA7, {0x3C9, 0x314, 0x342, 0x345}}, {0x1FA8, {0x3A9, 0x313, 0x345}},
|
| 343 |
+
{0x1FA9, {0x3A9, 0x314, 0x345}}, {0x1FAA, {0x3A9, 0x313, 0x300, 0x345}}, {0x1FAB, {0x3A9, 0x314, 0x300, 0x345}}, {0x1FAC, {0x3A9, 0x313, 0x301, 0x345}}, {0x1FAD, {0x3A9, 0x314, 0x301, 0x345}},
|
| 344 |
+
{0x1FAE, {0x3A9, 0x313, 0x342, 0x345}}, {0x1FAF, {0x3A9, 0x314, 0x342, 0x345}}, {0x1FB0, {0x3B1, 0x306}}, {0x1FB1, {0x3B1, 0x304}}, {0x1FB2, {0x3B1, 0x300, 0x345}}, {0x1FB3, {0x3B1, 0x345}},
|
| 345 |
+
{0x1FB4, {0x3B1, 0x301, 0x345}}, {0x1FB6, {0x3B1, 0x342}}, {0x1FB7, {0x3B1, 0x342, 0x345}}, {0x1FB8, {0x391, 0x306}}, {0x1FB9, {0x391, 0x304}}, {0x1FBA, {0x391, 0x300}}, {0x1FBB, {0x391, 0x301}},
|
| 346 |
+
{0x1FBC, {0x391, 0x345}}, {0x1FBE, {0x3B9}}, {0x1FC1, {0xA8, 0x342}}, {0x1FC2, {0x3B7, 0x300, 0x345}}, {0x1FC3, {0x3B7, 0x345}}, {0x1FC4, {0x3B7, 0x301, 0x345}}, {0x1FC6, {0x3B7, 0x342}},
|
| 347 |
+
{0x1FC7, {0x3B7, 0x342, 0x345}}, {0x1FC8, {0x395, 0x300}}, {0x1FC9, {0x395, 0x301}}, {0x1FCA, {0x397, 0x300}}, {0x1FCB, {0x397, 0x301}}, {0x1FCC, {0x397, 0x345}}, {0x1FCD, {0x1FBF, 0x300}},
|
| 348 |
+
{0x1FCE, {0x1FBF, 0x301}}, {0x1FCF, {0x1FBF, 0x342}}, {0x1FD0, {0x3B9, 0x306}}, {0x1FD1, {0x3B9, 0x304}}, {0x1FD2, {0x3B9, 0x308, 0x300}}, {0x1FD3, {0x3B9, 0x308, 0x301}}, {0x1FD6, {0x3B9, 0x342}},
|
| 349 |
+
{0x1FD7, {0x3B9, 0x308, 0x342}}, {0x1FD8, {0x399, 0x306}}, {0x1FD9, {0x399, 0x304}}, {0x1FDA, {0x399, 0x300}}, {0x1FDB, {0x399, 0x301}}, {0x1FDD, {0x1FFE, 0x300}}, {0x1FDE, {0x1FFE, 0x301}},
|
| 350 |
+
{0x1FDF, {0x1FFE, 0x342}}, {0x1FE0, {0x3C5, 0x306}}, {0x1FE1, {0x3C5, 0x304}}, {0x1FE2, {0x3C5, 0x308, 0x300}}, {0x1FE3, {0x3C5, 0x308, 0x301}}, {0x1FE4, {0x3C1, 0x313}}, {0x1FE5, {0x3C1, 0x314}},
|
| 351 |
+
{0x1FE6, {0x3C5, 0x342}}, {0x1FE7, {0x3C5, 0x308, 0x342}}, {0x1FE8, {0x3A5, 0x306}}, {0x1FE9, {0x3A5, 0x304}}, {0x1FEA, {0x3A5, 0x300}}, {0x1FEB, {0x3A5, 0x301}}, {0x1FEC, {0x3A1, 0x314}},
|
| 352 |
+
{0x1FED, {0xA8, 0x300}}, {0x1FEE, {0xA8, 0x301}}, {0x1FEF, {0x60}}, {0x1FF2, {0x3C9, 0x300, 0x345}}, {0x1FF3, {0x3C9, 0x345}}, {0x1FF4, {0x3C9, 0x301, 0x345}}, {0x1FF6, {0x3C9, 0x342}},
|
| 353 |
+
{0x1FF7, {0x3C9, 0x342, 0x345}}, {0x1FF8, {0x39F, 0x300}}, {0x1FF9, {0x39F, 0x301}}, {0x1FFA, {0x3A9, 0x300}}, {0x1FFB, {0x3A9, 0x301}}, {0x1FFC, {0x3A9, 0x345}}, {0x1FFD, {0xB4}}, {0x2000, {0x2002}},
|
| 354 |
+
{0x2001, {0x2003}}, {0x2126, {0x3A9}}, {0x212A, {0x4B}}, {0x212B, {0x41, 0x30A}}, {0x219A, {0x2190, 0x338}}, {0x219B, {0x2192, 0x338}}, {0x21AE, {0x2194, 0x338}}, {0x21CD, {0x21D0, 0x338}},
|
| 355 |
+
{0x21CE, {0x21D4, 0x338}}, {0x21CF, {0x21D2, 0x338}}, {0x2204, {0x2203, 0x338}}, {0x2209, {0x2208, 0x338}}, {0x220C, {0x220B, 0x338}}, {0x2224, {0x2223, 0x338}}, {0x2226, {0x2225, 0x338}},
|
| 356 |
+
{0x2241, {0x223C, 0x338}}, {0x2244, {0x2243, 0x338}}, {0x2247, {0x2245, 0x338}}, {0x2249, {0x2248, 0x338}}, {0x2260, {0x3D, 0x338}}, {0x2262, {0x2261, 0x338}}, {0x226D, {0x224D, 0x338}},
|
| 357 |
+
{0x226E, {0x3C, 0x338}}, {0x226F, {0x3E, 0x338}}, {0x2270, {0x2264, 0x338}}, {0x2271, {0x2265, 0x338}}, {0x2274, {0x2272, 0x338}}, {0x2275, {0x2273, 0x338}}, {0x2278, {0x2276, 0x338}},
|
| 358 |
+
{0x2279, {0x2277, 0x338}}, {0x2280, {0x227A, 0x338}}, {0x2281, {0x227B, 0x338}}, {0x2284, {0x2282, 0x338}}, {0x2285, {0x2283, 0x338}}, {0x2288, {0x2286, 0x338}}, {0x2289, {0x2287, 0x338}},
|
| 359 |
+
{0x22AC, {0x22A2, 0x338}}, {0x22AD, {0x22A8, 0x338}}, {0x22AE, {0x22A9, 0x338}}, {0x22AF, {0x22AB, 0x338}}, {0x22E0, {0x227C, 0x338}}, {0x22E1, {0x227D, 0x338}}, {0x22E2, {0x2291, 0x338}},
|
| 360 |
+
{0x22E3, {0x2292, 0x338}}, {0x22EA, {0x22B2, 0x338}}, {0x22EB, {0x22B3, 0x338}}, {0x22EC, {0x22B4, 0x338}}, {0x22ED, {0x22B5, 0x338}}, {0x2329, {0x3008}}, {0x232A, {0x3009}},
|
| 361 |
+
{0x2ADC, {0x2ADD, 0x338}}, {0x304C, {0x304B, 0x3099}}, {0x304E, {0x304D, 0x3099}}, {0x3050, {0x304F, 0x3099}}, {0x3052, {0x3051, 0x3099}}, {0x3054, {0x3053, 0x3099}}, {0x3056, {0x3055, 0x3099}},
|
| 362 |
+
{0x3058, {0x3057, 0x3099}}, {0x305A, {0x3059, 0x3099}}, {0x305C, {0x305B, 0x3099}}, {0x305E, {0x305D, 0x3099}}, {0x3060, {0x305F, 0x3099}}, {0x3062, {0x3061, 0x3099}}, {0x3065, {0x3064, 0x3099}},
|
| 363 |
+
{0x3067, {0x3066, 0x3099}}, {0x3069, {0x3068, 0x3099}}, {0x3070, {0x306F, 0x3099}}, {0x3071, {0x306F, 0x309A}}, {0x3073, {0x3072, 0x3099}}, {0x3074, {0x3072, 0x309A}}, {0x3076, {0x3075, 0x3099}},
|
| 364 |
+
{0x3077, {0x3075, 0x309A}}, {0x3079, {0x3078, 0x3099}}, {0x307A, {0x3078, 0x309A}}, {0x307C, {0x307B, 0x3099}}, {0x307D, {0x307B, 0x309A}}, {0x3094, {0x3046, 0x3099}}, {0x309E, {0x309D, 0x3099}},
|
| 365 |
+
{0x30AC, {0x30AB, 0x3099}}, {0x30AE, {0x30AD, 0x3099}}, {0x30B0, {0x30AF, 0x3099}}, {0x30B2, {0x30B1, 0x3099}}, {0x30B4, {0x30B3, 0x3099}}, {0x30B6, {0x30B5, 0x3099}}, {0x30B8, {0x30B7, 0x3099}},
|
| 366 |
+
{0x30BA, {0x30B9, 0x3099}}, {0x30BC, {0x30BB, 0x3099}}, {0x30BE, {0x30BD, 0x3099}}, {0x30C0, {0x30BF, 0x3099}}, {0x30C2, {0x30C1, 0x3099}}, {0x30C5, {0x30C4, 0x3099}}, {0x30C7, {0x30C6, 0x3099}},
|
| 367 |
+
{0x30C9, {0x30C8, 0x3099}}, {0x30D0, {0x30CF, 0x3099}}, {0x30D1, {0x30CF, 0x309A}}, {0x30D3, {0x30D2, 0x3099}}, {0x30D4, {0x30D2, 0x309A}}, {0x30D6, {0x30D5, 0x3099}}, {0x30D7, {0x30D5, 0x309A}},
|
| 368 |
+
{0x30D9, {0x30D8, 0x3099}}, {0x30DA, {0x30D8, 0x309A}}, {0x30DC, {0x30DB, 0x3099}}, {0x30DD, {0x30DB, 0x309A}}, {0x30F4, {0x30A6, 0x3099}}, {0x30F7, {0x30EF, 0x3099}}, {0x30F8, {0x30F0, 0x3099}},
|
| 369 |
+
{0x30F9, {0x30F1, 0x3099}}, {0x30FA, {0x30F2, 0x3099}}, {0x30FE, {0x30FD, 0x3099}}, {0xF900, {0x8C48}}, {0xF901, {0x66F4}}, {0xF902, {0x8ECA}}, {0xF903, {0x8CC8}}, {0xF904, {0x6ED1}},
|
| 370 |
+
{0xF905, {0x4E32}}, {0xF906, {0x53E5}}, {0xF907, {0x9F9C}}, {0xF908, {0x9F9C}}, {0xF909, {0x5951}}, {0xF90A, {0x91D1}}, {0xF90B, {0x5587}}, {0xF90C, {0x5948}}, {0xF90D, {0x61F6}}, {0xF90E, {0x7669}},
|
| 371 |
+
{0xF90F, {0x7F85}}, {0xF910, {0x863F}}, {0xF911, {0x87BA}}, {0xF912, {0x88F8}}, {0xF913, {0x908F}}, {0xF914, {0x6A02}}, {0xF915, {0x6D1B}}, {0xF916, {0x70D9}}, {0xF917, {0x73DE}}, {0xF918, {0x843D}},
|
| 372 |
+
{0xF919, {0x916A}}, {0xF91A, {0x99F1}}, {0xF91B, {0x4E82}}, {0xF91C, {0x5375}}, {0xF91D, {0x6B04}}, {0xF91E, {0x721B}}, {0xF91F, {0x862D}}, {0xF920, {0x9E1E}}, {0xF921, {0x5D50}}, {0xF922, {0x6FEB}},
|
| 373 |
+
{0xF923, {0x85CD}}, {0xF924, {0x8964}}, {0xF925, {0x62C9}}, {0xF926, {0x81D8}}, {0xF927, {0x881F}}, {0xF928, {0x5ECA}}, {0xF929, {0x6717}}, {0xF92A, {0x6D6A}}, {0xF92B, {0x72FC}}, {0xF92C, {0x90CE}},
|
| 374 |
+
{0xF92D, {0x4F86}}, {0xF92E, {0x51B7}}, {0xF92F, {0x52DE}}, {0xF930, {0x64C4}}, {0xF931, {0x6AD3}}, {0xF932, {0x7210}}, {0xF933, {0x76E7}}, {0xF934, {0x8001}}, {0xF935, {0x8606}}, {0xF936, {0x865C}},
|
| 375 |
+
{0xF937, {0x8DEF}}, {0xF938, {0x9732}}, {0xF939, {0x9B6F}}, {0xF93A, {0x9DFA}}, {0xF93B, {0x788C}}, {0xF93C, {0x797F}}, {0xF93D, {0x7DA0}}, {0xF93E, {0x83C9}}, {0xF93F, {0x9304}}, {0xF940, {0x9E7F}},
|
| 376 |
+
{0xF941, {0x8AD6}}, {0xF942, {0x58DF}}, {0xF943, {0x5F04}}, {0xF944, {0x7C60}}, {0xF945, {0x807E}}, {0xF946, {0x7262}}, {0xF947, {0x78CA}}, {0xF948, {0x8CC2}}, {0xF949, {0x96F7}}, {0xF94A, {0x58D8}},
|
| 377 |
+
{0xF94B, {0x5C62}}, {0xF94C, {0x6A13}}, {0xF94D, {0x6DDA}}, {0xF94E, {0x6F0F}}, {0xF94F, {0x7D2F}}, {0xF950, {0x7E37}}, {0xF951, {0x964B}}, {0xF952, {0x52D2}}, {0xF953, {0x808B}}, {0xF954, {0x51DC}},
|
| 378 |
+
{0xF955, {0x51CC}}, {0xF956, {0x7A1C}}, {0xF957, {0x7DBE}}, {0xF958, {0x83F1}}, {0xF959, {0x9675}}, {0xF95A, {0x8B80}}, {0xF95B, {0x62CF}}, {0xF95C, {0x6A02}}, {0xF95D, {0x8AFE}}, {0xF95E, {0x4E39}},
|
| 379 |
+
{0xF95F, {0x5BE7}}, {0xF960, {0x6012}}, {0xF961, {0x7387}}, {0xF962, {0x7570}}, {0xF963, {0x5317}}, {0xF964, {0x78FB}}, {0xF965, {0x4FBF}}, {0xF966, {0x5FA9}}, {0xF967, {0x4E0D}}, {0xF968, {0x6CCC}},
|
| 380 |
+
{0xF969, {0x6578}}, {0xF96A, {0x7D22}}, {0xF96B, {0x53C3}}, {0xF96C, {0x585E}}, {0xF96D, {0x7701}}, {0xF96E, {0x8449}}, {0xF96F, {0x8AAA}}, {0xF970, {0x6BBA}}, {0xF971, {0x8FB0}}, {0xF972, {0x6C88}},
|
| 381 |
+
{0xF973, {0x62FE}}, {0xF974, {0x82E5}}, {0xF975, {0x63A0}}, {0xF976, {0x7565}}, {0xF977, {0x4EAE}}, {0xF978, {0x5169}}, {0xF979, {0x51C9}}, {0xF97A, {0x6881}}, {0xF97B, {0x7CE7}}, {0xF97C, {0x826F}},
|
| 382 |
+
{0xF97D, {0x8AD2}}, {0xF97E, {0x91CF}}, {0xF97F, {0x52F5}}, {0xF980, {0x5442}}, {0xF981, {0x5973}}, {0xF982, {0x5EEC}}, {0xF983, {0x65C5}}, {0xF984, {0x6FFE}}, {0xF985, {0x792A}}, {0xF986, {0x95AD}},
|
| 383 |
+
{0xF987, {0x9A6A}}, {0xF988, {0x9E97}}, {0xF989, {0x9ECE}}, {0xF98A, {0x529B}}, {0xF98B, {0x66C6}}, {0xF98C, {0x6B77}}, {0xF98D, {0x8F62}}, {0xF98E, {0x5E74}}, {0xF98F, {0x6190}}, {0xF990, {0x6200}},
|
| 384 |
+
{0xF991, {0x649A}}, {0xF992, {0x6F23}}, {0xF993, {0x7149}}, {0xF994, {0x7489}}, {0xF995, {0x79CA}}, {0xF996, {0x7DF4}}, {0xF997, {0x806F}}, {0xF998, {0x8F26}}, {0xF999, {0x84EE}}, {0xF99A, {0x9023}},
|
| 385 |
+
{0xF99B, {0x934A}}, {0xF99C, {0x5217}}, {0xF99D, {0x52A3}}, {0xF99E, {0x54BD}}, {0xF99F, {0x70C8}}, {0xF9A0, {0x88C2}}, {0xF9A1, {0x8AAA}}, {0xF9A2, {0x5EC9}}, {0xF9A3, {0x5FF5}}, {0xF9A4, {0x637B}},
|
| 386 |
+
{0xF9A5, {0x6BAE}}, {0xF9A6, {0x7C3E}}, {0xF9A7, {0x7375}}, {0xF9A8, {0x4EE4}}, {0xF9A9, {0x56F9}}, {0xF9AA, {0x5BE7}}, {0xF9AB, {0x5DBA}}, {0xF9AC, {0x601C}}, {0xF9AD, {0x73B2}}, {0xF9AE, {0x7469}},
|
| 387 |
+
{0xF9AF, {0x7F9A}}, {0xF9B0, {0x8046}}, {0xF9B1, {0x9234}}, {0xF9B2, {0x96F6}}, {0xF9B3, {0x9748}}, {0xF9B4, {0x9818}}, {0xF9B5, {0x4F8B}}, {0xF9B6, {0x79AE}}, {0xF9B7, {0x91B4}}, {0xF9B8, {0x96B8}},
|
| 388 |
+
{0xF9B9, {0x60E1}}, {0xF9BA, {0x4E86}}, {0xF9BB, {0x50DA}}, {0xF9BC, {0x5BEE}}, {0xF9BD, {0x5C3F}}, {0xF9BE, {0x6599}}, {0xF9BF, {0x6A02}}, {0xF9C0, {0x71CE}}, {0xF9C1, {0x7642}}, {0xF9C2, {0x84FC}},
|
| 389 |
+
{0xF9C3, {0x907C}}, {0xF9C4, {0x9F8D}}, {0xF9C5, {0x6688}}, {0xF9C6, {0x962E}}, {0xF9C7, {0x5289}}, {0xF9C8, {0x677B}}, {0xF9C9, {0x67F3}}, {0xF9CA, {0x6D41}}, {0xF9CB, {0x6E9C}}, {0xF9CC, {0x7409}},
|
| 390 |
+
{0xF9CD, {0x7559}}, {0xF9CE, {0x786B}}, {0xF9CF, {0x7D10}}, {0xF9D0, {0x985E}}, {0xF9D1, {0x516D}}, {0xF9D2, {0x622E}}, {0xF9D3, {0x9678}}, {0xF9D4, {0x502B}}, {0xF9D5, {0x5D19}}, {0xF9D6, {0x6DEA}},
|
| 391 |
+
{0xF9D7, {0x8F2A}}, {0xF9D8, {0x5F8B}}, {0xF9D9, {0x6144}}, {0xF9DA, {0x6817}}, {0xF9DB, {0x7387}}, {0xF9DC, {0x9686}}, {0xF9DD, {0x5229}}, {0xF9DE, {0x540F}}, {0xF9DF, {0x5C65}}, {0xF9E0, {0x6613}},
|
| 392 |
+
{0xF9E1, {0x674E}}, {0xF9E2, {0x68A8}}, {0xF9E3, {0x6CE5}}, {0xF9E4, {0x7406}}, {0xF9E5, {0x75E2}}, {0xF9E6, {0x7F79}}, {0xF9E7, {0x88CF}}, {0xF9E8, {0x88E1}}, {0xF9E9, {0x91CC}}, {0xF9EA, {0x96E2}},
|
| 393 |
+
{0xF9EB, {0x533F}}, {0xF9EC, {0x6EBA}}, {0xF9ED, {0x541D}}, {0xF9EE, {0x71D0}}, {0xF9EF, {0x7498}}, {0xF9F0, {0x85FA}}, {0xF9F1, {0x96A3}}, {0xF9F2, {0x9C57}}, {0xF9F3, {0x9E9F}}, {0xF9F4, {0x6797}},
|
| 394 |
+
{0xF9F5, {0x6DCB}}, {0xF9F6, {0x81E8}}, {0xF9F7, {0x7ACB}}, {0xF9F8, {0x7B20}}, {0xF9F9, {0x7C92}}, {0xF9FA, {0x72C0}}, {0xF9FB, {0x7099}}, {0xF9FC, {0x8B58}}, {0xF9FD, {0x4EC0}}, {0xF9FE, {0x8336}},
|
| 395 |
+
{0xF9FF, {0x523A}}, {0xFA00, {0x5207}}, {0xFA01, {0x5EA6}}, {0xFA02, {0x62D3}}, {0xFA03, {0x7CD6}}, {0xFA04, {0x5B85}}, {0xFA05, {0x6D1E}}, {0xFA06, {0x66B4}}, {0xFA07, {0x8F3B}}, {0xFA08, {0x884C}},
|
| 396 |
+
{0xFA09, {0x964D}}, {0xFA0A, {0x898B}}, {0xFA0B, {0x5ED3}}, {0xFA0C, {0x5140}}, {0xFA0D, {0x55C0}}, {0xFA10, {0x585A}}, {0xFA12, {0x6674}}, {0xFA15, {0x51DE}}, {0xFA16, {0x732A}}, {0xFA17, {0x76CA}},
|
| 397 |
+
{0xFA18, {0x793C}}, {0xFA19, {0x795E}}, {0xFA1A, {0x7965}}, {0xFA1B, {0x798F}}, {0xFA1C, {0x9756}}, {0xFA1D, {0x7CBE}}, {0xFA1E, {0x7FBD}}, {0xFA20, {0x8612}}, {0xFA22, {0x8AF8}}, {0xFA25, {0x9038}},
|
| 398 |
+
{0xFA26, {0x90FD}}, {0xFA2A, {0x98EF}}, {0xFA2B, {0x98FC}}, {0xFA2C, {0x9928}}, {0xFA2D, {0x9DB4}}, {0xFA2E, {0x90DE}}, {0xFA2F, {0x96B7}}, {0xFA30, {0x4FAE}}, {0xFA31, {0x50E7}}, {0xFA32, {0x514D}},
|
| 399 |
+
{0xFA33, {0x52C9}}, {0xFA34, {0x52E4}}, {0xFA35, {0x5351}}, {0xFA36, {0x559D}}, {0xFA37, {0x5606}}, {0xFA38, {0x5668}}, {0xFA39, {0x5840}}, {0xFA3A, {0x58A8}}, {0xFA3B, {0x5C64}}, {0xFA3C, {0x5C6E}},
|
| 400 |
+
{0xFA3D, {0x6094}}, {0xFA3E, {0x6168}}, {0xFA3F, {0x618E}}, {0xFA40, {0x61F2}}, {0xFA41, {0x654F}}, {0xFA42, {0x65E2}}, {0xFA43, {0x6691}}, {0xFA44, {0x6885}}, {0xFA45, {0x6D77}}, {0xFA46, {0x6E1A}},
|
| 401 |
+
{0xFA47, {0x6F22}}, {0xFA48, {0x716E}}, {0xFA49, {0x722B}}, {0xFA4A, {0x7422}}, {0xFA4B, {0x7891}}, {0xFA4C, {0x793E}}, {0xFA4D, {0x7949}}, {0xFA4E, {0x7948}}, {0xFA4F, {0x7950}}, {0xFA50, {0x7956}},
|
| 402 |
+
{0xFA51, {0x795D}}, {0xFA52, {0x798D}}, {0xFA53, {0x798E}}, {0xFA54, {0x7A40}}, {0xFA55, {0x7A81}}, {0xFA56, {0x7BC0}}, {0xFA57, {0x7DF4}}, {0xFA58, {0x7E09}}, {0xFA59, {0x7E41}}, {0xFA5A, {0x7F72}},
|
| 403 |
+
{0xFA5B, {0x8005}}, {0xFA5C, {0x81ED}}, {0xFA5D, {0x8279}}, {0xFA5E, {0x8279}}, {0xFA5F, {0x8457}}, {0xFA60, {0x8910}}, {0xFA61, {0x8996}}, {0xFA62, {0x8B01}}, {0xFA63, {0x8B39}}, {0xFA64, {0x8CD3}},
|
| 404 |
+
{0xFA65, {0x8D08}}, {0xFA66, {0x8FB6}}, {0xFA67, {0x9038}}, {0xFA68, {0x96E3}}, {0xFA69, {0x97FF}}, {0xFA6A, {0x983B}}, {0xFA6B, {0x6075}}, {0xFA6C, {0x242EE}}, {0xFA6D, {0x8218}}, {0xFA70, {0x4E26}},
|
| 405 |
+
{0xFA71, {0x51B5}}, {0xFA72, {0x5168}}, {0xFA73, {0x4F80}}, {0xFA74, {0x5145}}, {0xFA75, {0x5180}}, {0xFA76, {0x52C7}}, {0xFA77, {0x52FA}}, {0xFA78, {0x559D}}, {0xFA79, {0x5555}}, {0xFA7A, {0x5599}},
|
| 406 |
+
{0xFA7B, {0x55E2}}, {0xFA7C, {0x585A}}, {0xFA7D, {0x58B3}}, {0xFA7E, {0x5944}}, {0xFA7F, {0x5954}}, {0xFA80, {0x5A62}}, {0xFA81, {0x5B28}}, {0xFA82, {0x5ED2}}, {0xFA83, {0x5ED9}}, {0xFA84, {0x5F69}},
|
| 407 |
+
{0xFA85, {0x5FAD}}, {0xFA86, {0x60D8}}, {0xFA87, {0x614E}}, {0xFA88, {0x6108}}, {0xFA89, {0x618E}}, {0xFA8A, {0x6160}}, {0xFA8B, {0x61F2}}, {0xFA8C, {0x6234}}, {0xFA8D, {0x63C4}}, {0xFA8E, {0x641C}},
|
| 408 |
+
{0xFA8F, {0x6452}}, {0xFA90, {0x6556}}, {0xFA91, {0x6674}}, {0xFA92, {0x6717}}, {0xFA93, {0x671B}}, {0xFA94, {0x6756}}, {0xFA95, {0x6B79}}, {0xFA96, {0x6BBA}}, {0xFA97, {0x6D41}}, {0xFA98, {0x6EDB}},
|
| 409 |
+
{0xFA99, {0x6ECB}}, {0xFA9A, {0x6F22}}, {0xFA9B, {0x701E}}, {0xFA9C, {0x716E}}, {0xFA9D, {0x77A7}}, {0xFA9E, {0x7235}}, {0xFA9F, {0x72AF}}, {0xFAA0, {0x732A}}, {0xFAA1, {0x7471}}, {0xFAA2, {0x7506}},
|
| 410 |
+
{0xFAA3, {0x753B}}, {0xFAA4, {0x761D}}, {0xFAA5, {0x761F}}, {0xFAA6, {0x76CA}}, {0xFAA7, {0x76DB}}, {0xFAA8, {0x76F4}}, {0xFAA9, {0x774A}}, {0xFAAA, {0x7740}}, {0xFAAB, {0x78CC}}, {0xFAAC, {0x7AB1}},
|
| 411 |
+
{0xFAAD, {0x7BC0}}, {0xFAAE, {0x7C7B}}, {0xFAAF, {0x7D5B}}, {0xFAB0, {0x7DF4}}, {0xFAB1, {0x7F3E}}, {0xFAB2, {0x8005}}, {0xFAB3, {0x8352}}, {0xFAB4, {0x83EF}}, {0xFAB5, {0x8779}}, {0xFAB6, {0x8941}},
|
| 412 |
+
{0xFAB7, {0x8986}}, {0xFAB8, {0x8996}}, {0xFAB9, {0x8ABF}}, {0xFABA, {0x8AF8}}, {0xFABB, {0x8ACB}}, {0xFABC, {0x8B01}}, {0xFABD, {0x8AFE}}, {0xFABE, {0x8AED}}, {0xFABF, {0x8B39}}, {0xFAC0, {0x8B8A}},
|
| 413 |
+
{0xFAC1, {0x8D08}}, {0xFAC2, {0x8F38}}, {0xFAC3, {0x9072}}, {0xFAC4, {0x9199}}, {0xFAC5, {0x9276}}, {0xFAC6, {0x967C}}, {0xFAC7, {0x96E3}}, {0xFAC8, {0x9756}}, {0xFAC9, {0x97DB}}, {0xFACA, {0x97FF}},
|
| 414 |
+
{0xFACB, {0x980B}}, {0xFACC, {0x983B}}, {0xFACD, {0x9B12}}, {0xFACE, {0x9F9C}}, {0xFACF, {0x2284A}}, {0xFAD0, {0x22844}}, {0xFAD1, {0x233D5}}, {0xFAD2, {0x3B9D}}, {0xFAD3, {0x4018}},
|
| 415 |
+
{0xFAD4, {0x4039}}, {0xFAD5, {0x25249}}, {0xFAD6, {0x25CD0}}, {0xFAD7, {0x27ED3}}, {0xFAD8, {0x9F43}}, {0xFAD9, {0x9F8E}}, {0xFB1D, {0x5D9, 0x5B4}}, {0xFB1F, {0x5F2, 0x5B7}}, {0xFB2A, {0x5E9, 0x5C1}},
|
| 416 |
+
{0xFB2B, {0x5E9, 0x5C2}}, {0xFB2C, {0x5E9, 0x5BC, 0x5C1}}, {0xFB2D, {0x5E9, 0x5BC, 0x5C2}}, {0xFB2E, {0x5D0, 0x5B7}}, {0xFB2F, {0x5D0, 0x5B8}}, {0xFB30, {0x5D0, 0x5BC}}, {0xFB31, {0x5D1, 0x5BC}},
|
| 417 |
+
{0xFB32, {0x5D2, 0x5BC}}, {0xFB33, {0x5D3, 0x5BC}}, {0xFB34, {0x5D4, 0x5BC}}, {0xFB35, {0x5D5, 0x5BC}}, {0xFB36, {0x5D6, 0x5BC}}, {0xFB38, {0x5D8, 0x5BC}}, {0xFB39, {0x5D9, 0x5BC}},
|
| 418 |
+
{0xFB3A, {0x5DA, 0x5BC}}, {0xFB3B, {0x5DB, 0x5BC}}, {0xFB3C, {0x5DC, 0x5BC}}, {0xFB3E, {0x5DE, 0x5BC}}, {0xFB40, {0x5E0, 0x5BC}}, {0xFB41, {0x5E1, 0x5BC}}, {0xFB43, {0x5E3, 0x5BC}},
|
| 419 |
+
{0xFB44, {0x5E4, 0x5BC}}, {0xFB46, {0x5E6, 0x5BC}}, {0xFB47, {0x5E7, 0x5BC}}, {0xFB48, {0x5E8, 0x5BC}}, {0xFB49, {0x5E9, 0x5BC}}, {0xFB4A, {0x5EA, 0x5BC}}, {0xFB4B, {0x5D5, 0x5B9}},
|
| 420 |
+
{0xFB4C, {0x5D1, 0x5BF}}, {0xFB4D, {0x5DB, 0x5BF}}, {0xFB4E, {0x5E4, 0x5BF}}, {0x1109A, {0x11099, 0x110BA}}, {0x1109C, {0x1109B, 0x110BA}}, {0x110AB, {0x110A5, 0x110BA}},
|
| 421 |
+
{0x1112E, {0x11131, 0x11127}}, {0x1112F, {0x11132, 0x11127}}, {0x1134B, {0x11347, 0x1133E}}, {0x1134C, {0x11347, 0x11357}}, {0x114BB, {0x114B9, 0x114BA}}, {0x114BC, {0x114B9, 0x114B0}},
|
| 422 |
+
{0x114BE, {0x114B9, 0x114BD}}, {0x115BA, {0x115B8, 0x115AF}}, {0x115BB, {0x115B9, 0x115AF}}, {0x1D15E, {0x1D157, 0x1D165}}, {0x1D15F, {0x1D158, 0x1D165}}, {0x1D160, {0x1D158, 0x1D165, 0x1D16E}},
|
| 423 |
+
{0x1D161, {0x1D158, 0x1D165, 0x1D16F}}, {0x1D162, {0x1D158, 0x1D165, 0x1D170}}, {0x1D163, {0x1D158, 0x1D165, 0x1D171}}, {0x1D164, {0x1D158, 0x1D165, 0x1D172}}, {0x1D1BB, {0x1D1B9, 0x1D165}},
|
| 424 |
+
{0x1D1BC, {0x1D1BA, 0x1D165}}, {0x1D1BD, {0x1D1B9, 0x1D165, 0x1D16E}}, {0x1D1BE, {0x1D1BA, 0x1D165, 0x1D16E}}, {0x1D1BF, {0x1D1B9, 0x1D165, 0x1D16F}}, {0x1D1C0, {0x1D1BA, 0x1D165, 0x1D16F}},
|
| 425 |
+
{0x2F800, {0x4E3D}}, {0x2F801, {0x4E38}}, {0x2F802, {0x4E41}}, {0x2F803, {0x20122}}, {0x2F804, {0x4F60}}, {0x2F805, {0x4FAE}}, {0x2F806, {0x4FBB}}, {0x2F807, {0x5002}}, {0x2F808, {0x507A}},
|
| 426 |
+
{0x2F809, {0x5099}}, {0x2F80A, {0x50E7}}, {0x2F80B, {0x50CF}}, {0x2F80C, {0x349E}}, {0x2F80D, {0x2063A}}, {0x2F80E, {0x514D}}, {0x2F80F, {0x5154}}, {0x2F810, {0x5164}}, {0x2F811, {0x5177}},
|
| 427 |
+
{0x2F812, {0x2051C}}, {0x2F813, {0x34B9}}, {0x2F814, {0x5167}}, {0x2F815, {0x518D}}, {0x2F816, {0x2054B}}, {0x2F817, {0x5197}}, {0x2F818, {0x51A4}}, {0x2F819, {0x4ECC}}, {0x2F81A, {0x51AC}},
|
| 428 |
+
{0x2F81B, {0x51B5}}, {0x2F81C, {0x291DF}}, {0x2F81D, {0x51F5}}, {0x2F81E, {0x5203}}, {0x2F81F, {0x34DF}}, {0x2F820, {0x523B}}, {0x2F821, {0x5246}}, {0x2F822, {0x5272}}, {0x2F823, {0x5277}},
|
| 429 |
+
{0x2F824, {0x3515}}, {0x2F825, {0x52C7}}, {0x2F826, {0x52C9}}, {0x2F827, {0x52E4}}, {0x2F828, {0x52FA}}, {0x2F829, {0x5305}}, {0x2F82A, {0x5306}}, {0x2F82B, {0x5317}}, {0x2F82C, {0x5349}},
|
| 430 |
+
{0x2F82D, {0x5351}}, {0x2F82E, {0x535A}}, {0x2F82F, {0x5373}}, {0x2F830, {0x537D}}, {0x2F831, {0x537F}}, {0x2F832, {0x537F}}, {0x2F833, {0x537F}}, {0x2F834, {0x20A2C}}, {0x2F835, {0x7070}},
|
| 431 |
+
{0x2F836, {0x53CA}}, {0x2F837, {0x53DF}}, {0x2F838, {0x20B63}}, {0x2F839, {0x53EB}}, {0x2F83A, {0x53F1}}, {0x2F83B, {0x5406}}, {0x2F83C, {0x549E}}, {0x2F83D, {0x5438}}, {0x2F83E, {0x5448}},
|
| 432 |
+
{0x2F83F, {0x5468}}, {0x2F840, {0x54A2}}, {0x2F841, {0x54F6}}, {0x2F842, {0x5510}}, {0x2F843, {0x5553}}, {0x2F844, {0x5563}}, {0x2F845, {0x5584}}, {0x2F846, {0x5584}}, {0x2F847, {0x5599}},
|
| 433 |
+
{0x2F848, {0x55AB}}, {0x2F849, {0x55B3}}, {0x2F84A, {0x55C2}}, {0x2F84B, {0x5716}}, {0x2F84C, {0x5606}}, {0x2F84D, {0x5717}}, {0x2F84E, {0x5651}}, {0x2F84F, {0x5674}}, {0x2F850, {0x5207}},
|
| 434 |
+
{0x2F851, {0x58EE}}, {0x2F852, {0x57CE}}, {0x2F853, {0x57F4}}, {0x2F854, {0x580D}}, {0x2F855, {0x578B}}, {0x2F856, {0x5832}}, {0x2F857, {0x5831}}, {0x2F858, {0x58AC}}, {0x2F859, {0x214E4}},
|
| 435 |
+
{0x2F85A, {0x58F2}}, {0x2F85B, {0x58F7}}, {0x2F85C, {0x5906}}, {0x2F85D, {0x591A}}, {0x2F85E, {0x5922}}, {0x2F85F, {0x5962}}, {0x2F860, {0x216A8}}, {0x2F861, {0x216EA}}, {0x2F862, {0x59EC}},
|
| 436 |
+
{0x2F863, {0x5A1B}}, {0x2F864, {0x5A27}}, {0x2F865, {0x59D8}}, {0x2F866, {0x5A66}}, {0x2F867, {0x36EE}}, {0x2F868, {0x36FC}}, {0x2F869, {0x5B08}}, {0x2F86A, {0x5B3E}}, {0x2F86B, {0x5B3E}},
|
| 437 |
+
{0x2F86C, {0x219C8}}, {0x2F86D, {0x5BC3}}, {0x2F86E, {0x5BD8}}, {0x2F86F, {0x5BE7}}, {0x2F870, {0x5BF3}}, {0x2F871, {0x21B18}}, {0x2F872, {0x5BFF}}, {0x2F873, {0x5C06}}, {0x2F874, {0x5F53}},
|
| 438 |
+
{0x2F875, {0x5C22}}, {0x2F876, {0x3781}}, {0x2F877, {0x5C60}}, {0x2F878, {0x5C6E}}, {0x2F879, {0x5CC0}}, {0x2F87A, {0x5C8D}}, {0x2F87B, {0x21DE4}}, {0x2F87C, {0x5D43}}, {0x2F87D, {0x21DE6}},
|
| 439 |
+
{0x2F87E, {0x5D6E}}, {0x2F87F, {0x5D6B}}, {0x2F880, {0x5D7C}}, {0x2F881, {0x5DE1}}, {0x2F882, {0x5DE2}}, {0x2F883, {0x382F}}, {0x2F884, {0x5DFD}}, {0x2F885, {0x5E28}}, {0x2F886, {0x5E3D}},
|
| 440 |
+
{0x2F887, {0x5E69}}, {0x2F888, {0x3862}}, {0x2F889, {0x22183}}, {0x2F88A, {0x387C}}, {0x2F88B, {0x5EB0}}, {0x2F88C, {0x5EB3}}, {0x2F88D, {0x5EB6}}, {0x2F88E, {0x5ECA}}, {0x2F88F, {0x2A392}},
|
| 441 |
+
{0x2F890, {0x5EFE}}, {0x2F891, {0x22331}}, {0x2F892, {0x22331}}, {0x2F893, {0x8201}}, {0x2F894, {0x5F22}}, {0x2F895, {0x5F22}}, {0x2F896, {0x38C7}}, {0x2F897, {0x232B8}}, {0x2F898, {0x261DA}},
|
| 442 |
+
{0x2F899, {0x5F62}}, {0x2F89A, {0x5F6B}}, {0x2F89B, {0x38E3}}, {0x2F89C, {0x5F9A}}, {0x2F89D, {0x5FCD}}, {0x2F89E, {0x5FD7}}, {0x2F89F, {0x5FF9}}, {0x2F8A0, {0x6081}}, {0x2F8A1, {0x393A}},
|
| 443 |
+
{0x2F8A2, {0x391C}}, {0x2F8A3, {0x6094}}, {0x2F8A4, {0x226D4}}, {0x2F8A5, {0x60C7}}, {0x2F8A6, {0x6148}}, {0x2F8A7, {0x614C}}, {0x2F8A8, {0x614E}}, {0x2F8A9, {0x614C}}, {0x2F8AA, {0x617A}},
|
| 444 |
+
{0x2F8AB, {0x618E}}, {0x2F8AC, {0x61B2}}, {0x2F8AD, {0x61A4}}, {0x2F8AE, {0x61AF}}, {0x2F8AF, {0x61DE}}, {0x2F8B0, {0x61F2}}, {0x2F8B1, {0x61F6}}, {0x2F8B2, {0x6210}}, {0x2F8B3, {0x621B}},
|
| 445 |
+
{0x2F8B4, {0x625D}}, {0x2F8B5, {0x62B1}}, {0x2F8B6, {0x62D4}}, {0x2F8B7, {0x6350}}, {0x2F8B8, {0x22B0C}}, {0x2F8B9, {0x633D}}, {0x2F8BA, {0x62FC}}, {0x2F8BB, {0x6368}}, {0x2F8BC, {0x6383}},
|
| 446 |
+
{0x2F8BD, {0x63E4}}, {0x2F8BE, {0x22BF1}}, {0x2F8BF, {0x6422}}, {0x2F8C0, {0x63C5}}, {0x2F8C1, {0x63A9}}, {0x2F8C2, {0x3A2E}}, {0x2F8C3, {0x6469}}, {0x2F8C4, {0x647E}}, {0x2F8C5, {0x649D}},
|
| 447 |
+
{0x2F8C6, {0x6477}}, {0x2F8C7, {0x3A6C}}, {0x2F8C8, {0x654F}}, {0x2F8C9, {0x656C}}, {0x2F8CA, {0x2300A}}, {0x2F8CB, {0x65E3}}, {0x2F8CC, {0x66F8}}, {0x2F8CD, {0x6649}}, {0x2F8CE, {0x3B19}},
|
| 448 |
+
{0x2F8CF, {0x6691}}, {0x2F8D0, {0x3B08}}, {0x2F8D1, {0x3AE4}}, {0x2F8D2, {0x5192}}, {0x2F8D3, {0x5195}}, {0x2F8D4, {0x6700}}, {0x2F8D5, {0x669C}}, {0x2F8D6, {0x80AD}}, {0x2F8D7, {0x43D9}},
|
| 449 |
+
{0x2F8D8, {0x6717}}, {0x2F8D9, {0x671B}}, {0x2F8DA, {0x6721}}, {0x2F8DB, {0x675E}}, {0x2F8DC, {0x6753}}, {0x2F8DD, {0x233C3}}, {0x2F8DE, {0x3B49}}, {0x2F8DF, {0x67FA}}, {0x2F8E0, {0x6785}},
|
| 450 |
+
{0x2F8E1, {0x6852}}, {0x2F8E2, {0x6885}}, {0x2F8E3, {0x2346D}}, {0x2F8E4, {0x688E}}, {0x2F8E5, {0x681F}}, {0x2F8E6, {0x6914}}, {0x2F8E7, {0x3B9D}}, {0x2F8E8, {0x6942}}, {0x2F8E9, {0x69A3}},
|
| 451 |
+
{0x2F8EA, {0x69EA}}, {0x2F8EB, {0x6AA8}}, {0x2F8EC, {0x236A3}}, {0x2F8ED, {0x6ADB}}, {0x2F8EE, {0x3C18}}, {0x2F8EF, {0x6B21}}, {0x2F8F0, {0x238A7}}, {0x2F8F1, {0x6B54}}, {0x2F8F2, {0x3C4E}},
|
| 452 |
+
{0x2F8F3, {0x6B72}}, {0x2F8F4, {0x6B9F}}, {0x2F8F5, {0x6BBA}}, {0x2F8F6, {0x6BBB}}, {0x2F8F7, {0x23A8D}}, {0x2F8F8, {0x21D0B}}, {0x2F8F9, {0x23AFA}}, {0x2F8FA, {0x6C4E}}, {0x2F8FB, {0x23CBC}},
|
| 453 |
+
{0x2F8FC, {0x6CBF}}, {0x2F8FD, {0x6CCD}}, {0x2F8FE, {0x6C67}}, {0x2F8FF, {0x6D16}}, {0x2F900, {0x6D3E}}, {0x2F901, {0x6D77}}, {0x2F902, {0x6D41}}, {0x2F903, {0x6D69}}, {0x2F904, {0x6D78}},
|
| 454 |
+
{0x2F905, {0x6D85}}, {0x2F906, {0x23D1E}}, {0x2F907, {0x6D34}}, {0x2F908, {0x6E2F}}, {0x2F909, {0x6E6E}}, {0x2F90A, {0x3D33}}, {0x2F90B, {0x6ECB}}, {0x2F90C, {0x6EC7}}, {0x2F90D, {0x23ED1}},
|
| 455 |
+
{0x2F90E, {0x6DF9}}, {0x2F90F, {0x6F6E}}, {0x2F910, {0x23F5E}}, {0x2F911, {0x23F8E}}, {0x2F912, {0x6FC6}}, {0x2F913, {0x7039}}, {0x2F914, {0x701E}}, {0x2F915, {0x701B}}, {0x2F916, {0x3D96}},
|
| 456 |
+
{0x2F917, {0x704A}}, {0x2F918, {0x707D}}, {0x2F919, {0x7077}}, {0x2F91A, {0x70AD}}, {0x2F91B, {0x20525}}, {0x2F91C, {0x7145}}, {0x2F91D, {0x24263}}, {0x2F91E, {0x719C}}, {0x2F91F, {0x243AB}},
|
| 457 |
+
{0x2F920, {0x7228}}, {0x2F921, {0x7235}}, {0x2F922, {0x7250}}, {0x2F923, {0x24608}}, {0x2F924, {0x7280}}, {0x2F925, {0x7295}}, {0x2F926, {0x24735}}, {0x2F927, {0x24814}}, {0x2F928, {0x737A}},
|
| 458 |
+
{0x2F929, {0x738B}}, {0x2F92A, {0x3EAC}}, {0x2F92B, {0x73A5}}, {0x2F92C, {0x3EB8}}, {0x2F92D, {0x3EB8}}, {0x2F92E, {0x7447}}, {0x2F92F, {0x745C}}, {0x2F930, {0x7471}}, {0x2F931, {0x7485}},
|
| 459 |
+
{0x2F932, {0x74CA}}, {0x2F933, {0x3F1B}}, {0x2F934, {0x7524}}, {0x2F935, {0x24C36}}, {0x2F936, {0x753E}}, {0x2F937, {0x24C92}}, {0x2F938, {0x7570}}, {0x2F939, {0x2219F}}, {0x2F93A, {0x7610}},
|
| 460 |
+
{0x2F93B, {0x24FA1}}, {0x2F93C, {0x24FB8}}, {0x2F93D, {0x25044}}, {0x2F93E, {0x3FFC}}, {0x2F93F, {0x4008}}, {0x2F940, {0x76F4}}, {0x2F941, {0x250F3}}, {0x2F942, {0x250F2}}, {0x2F943, {0x25119}},
|
| 461 |
+
{0x2F944, {0x25133}}, {0x2F945, {0x771E}}, {0x2F946, {0x771F}}, {0x2F947, {0x771F}}, {0x2F948, {0x774A}}, {0x2F949, {0x4039}}, {0x2F94A, {0x778B}}, {0x2F94B, {0x4046}}, {0x2F94C, {0x4096}},
|
| 462 |
+
{0x2F94D, {0x2541D}}, {0x2F94E, {0x784E}}, {0x2F94F, {0x788C}}, {0x2F950, {0x78CC}}, {0x2F951, {0x40E3}}, {0x2F952, {0x25626}}, {0x2F953, {0x7956}}, {0x2F954, {0x2569A}}, {0x2F955, {0x256C5}},
|
| 463 |
+
{0x2F956, {0x798F}}, {0x2F957, {0x79EB}}, {0x2F958, {0x412F}}, {0x2F959, {0x7A40}}, {0x2F95A, {0x7A4A}}, {0x2F95B, {0x7A4F}}, {0x2F95C, {0x2597C}}, {0x2F95D, {0x25AA7}}, {0x2F95E, {0x25AA7}},
|
| 464 |
+
{0x2F95F, {0x7AEE}}, {0x2F960, {0x4202}}, {0x2F961, {0x25BAB}}, {0x2F962, {0x7BC6}}, {0x2F963, {0x7BC9}}, {0x2F964, {0x4227}}, {0x2F965, {0x25C80}}, {0x2F966, {0x7CD2}}, {0x2F967, {0x42A0}},
|
| 465 |
+
{0x2F968, {0x7CE8}}, {0x2F969, {0x7CE3}}, {0x2F96A, {0x7D00}}, {0x2F96B, {0x25F86}}, {0x2F96C, {0x7D63}}, {0x2F96D, {0x4301}}, {0x2F96E, {0x7DC7}}, {0x2F96F, {0x7E02}}, {0x2F970, {0x7E45}},
|
| 466 |
+
{0x2F971, {0x4334}}, {0x2F972, {0x26228}}, {0x2F973, {0x26247}}, {0x2F974, {0x4359}}, {0x2F975, {0x262D9}}, {0x2F976, {0x7F7A}}, {0x2F977, {0x2633E}}, {0x2F978, {0x7F95}}, {0x2F979, {0x7FFA}},
|
| 467 |
+
{0x2F97A, {0x8005}}, {0x2F97B, {0x264DA}}, {0x2F97C, {0x26523}}, {0x2F97D, {0x8060}}, {0x2F97E, {0x265A8}}, {0x2F97F, {0x8070}}, {0x2F980, {0x2335F}}, {0x2F981, {0x43D5}}, {0x2F982, {0x80B2}},
|
| 468 |
+
{0x2F983, {0x8103}}, {0x2F984, {0x440B}}, {0x2F985, {0x813E}}, {0x2F986, {0x5AB5}}, {0x2F987, {0x267A7}}, {0x2F988, {0x267B5}}, {0x2F989, {0x23393}}, {0x2F98A, {0x2339C}}, {0x2F98B, {0x8201}},
|
| 469 |
+
{0x2F98C, {0x8204}}, {0x2F98D, {0x8F9E}}, {0x2F98E, {0x446B}}, {0x2F98F, {0x8291}}, {0x2F990, {0x828B}}, {0x2F991, {0x829D}}, {0x2F992, {0x52B3}}, {0x2F993, {0x82B1}}, {0x2F994, {0x82B3}},
|
| 470 |
+
{0x2F995, {0x82BD}}, {0x2F996, {0x82E6}}, {0x2F997, {0x26B3C}}, {0x2F998, {0x82E5}}, {0x2F999, {0x831D}}, {0x2F99A, {0x8363}}, {0x2F99B, {0x83AD}}, {0x2F99C, {0x8323}}, {0x2F99D, {0x83BD}},
|
| 471 |
+
{0x2F99E, {0x83E7}}, {0x2F99F, {0x8457}}, {0x2F9A0, {0x8353}}, {0x2F9A1, {0x83CA}}, {0x2F9A2, {0x83CC}}, {0x2F9A3, {0x83DC}}, {0x2F9A4, {0x26C36}}, {0x2F9A5, {0x26D6B}}, {0x2F9A6, {0x26CD5}},
|
| 472 |
+
{0x2F9A7, {0x452B}}, {0x2F9A8, {0x84F1}}, {0x2F9A9, {0x84F3}}, {0x2F9AA, {0x8516}}, {0x2F9AB, {0x273CA}}, {0x2F9AC, {0x8564}}, {0x2F9AD, {0x26F2C}}, {0x2F9AE, {0x455D}}, {0x2F9AF, {0x4561}},
|
| 473 |
+
{0x2F9B0, {0x26FB1}}, {0x2F9B1, {0x270D2}}, {0x2F9B2, {0x456B}}, {0x2F9B3, {0x8650}}, {0x2F9B4, {0x865C}}, {0x2F9B5, {0x8667}}, {0x2F9B6, {0x8669}}, {0x2F9B7, {0x86A9}}, {0x2F9B8, {0x8688}},
|
| 474 |
+
{0x2F9B9, {0x870E}}, {0x2F9BA, {0x86E2}}, {0x2F9BB, {0x8779}}, {0x2F9BC, {0x8728}}, {0x2F9BD, {0x876B}}, {0x2F9BE, {0x8786}}, {0x2F9BF, {0x45D7}}, {0x2F9C0, {0x87E1}}, {0x2F9C1, {0x8801}},
|
| 475 |
+
{0x2F9C2, {0x45F9}}, {0x2F9C3, {0x8860}}, {0x2F9C4, {0x8863}}, {0x2F9C5, {0x27667}}, {0x2F9C6, {0x88D7}}, {0x2F9C7, {0x88DE}}, {0x2F9C8, {0x4635}}, {0x2F9C9, {0x88FA}}, {0x2F9CA, {0x34BB}},
|
| 476 |
+
{0x2F9CB, {0x278AE}}, {0x2F9CC, {0x27966}}, {0x2F9CD, {0x46BE}}, {0x2F9CE, {0x46C7}}, {0x2F9CF, {0x8AA0}}, {0x2F9D0, {0x8AED}}, {0x2F9D1, {0x8B8A}}, {0x2F9D2, {0x8C55}}, {0x2F9D3, {0x27CA8}},
|
| 477 |
+
{0x2F9D4, {0x8CAB}}, {0x2F9D5, {0x8CC1}}, {0x2F9D6, {0x8D1B}}, {0x2F9D7, {0x8D77}}, {0x2F9D8, {0x27F2F}}, {0x2F9D9, {0x20804}}, {0x2F9DA, {0x8DCB}}, {0x2F9DB, {0x8DBC}}, {0x2F9DC, {0x8DF0}},
|
| 478 |
+
{0x2F9DD, {0x208DE}}, {0x2F9DE, {0x8ED4}}, {0x2F9DF, {0x8F38}}, {0x2F9E0, {0x285D2}}, {0x2F9E1, {0x285ED}}, {0x2F9E2, {0x9094}}, {0x2F9E3, {0x90F1}}, {0x2F9E4, {0x9111}}, {0x2F9E5, {0x2872E}},
|
| 479 |
+
{0x2F9E6, {0x911B}}, {0x2F9E7, {0x9238}}, {0x2F9E8, {0x92D7}}, {0x2F9E9, {0x92D8}}, {0x2F9EA, {0x927C}}, {0x2F9EB, {0x93F9}}, {0x2F9EC, {0x9415}}, {0x2F9ED, {0x28BFA}}, {0x2F9EE, {0x958B}},
|
| 480 |
+
{0x2F9EF, {0x4995}}, {0x2F9F0, {0x95B7}}, {0x2F9F1, {0x28D77}}, {0x2F9F2, {0x49E6}}, {0x2F9F3, {0x96C3}}, {0x2F9F4, {0x5DB2}}, {0x2F9F5, {0x9723}}, {0x2F9F6, {0x29145}}, {0x2F9F7, {0x2921A}},
|
| 481 |
+
{0x2F9F8, {0x4A6E}}, {0x2F9F9, {0x4A76}}, {0x2F9FA, {0x97E0}}, {0x2F9FB, {0x2940A}}, {0x2F9FC, {0x4AB2}}, {0x2F9FD, {0x29496}}, {0x2F9FE, {0x980B}}, {0x2F9FF, {0x980B}}, {0x2FA00, {0x9829}},
|
| 482 |
+
{0x2FA01, {0x295B6}}, {0x2FA02, {0x98E2}}, {0x2FA03, {0x4B33}}, {0x2FA04, {0x9929}}, {0x2FA05, {0x99A7}}, {0x2FA06, {0x99C2}}, {0x2FA07, {0x99FE}}, {0x2FA08, {0x4BCE}}, {0x2FA09, {0x29B30}},
|
| 483 |
+
{0x2FA0A, {0x9B12}}, {0x2FA0B, {0x9C40}}, {0x2FA0C, {0x9CFD}}, {0x2FA0D, {0x4CCE}}, {0x2FA0E, {0x4CED}}, {0x2FA0F, {0x9D67}}, {0x2FA10, {0x2A0CE}}, {0x2FA11, {0x4CF8}}, {0x2FA12, {0x2A105}},
|
| 484 |
+
{0x2FA13, {0x2A20E}}, {0x2FA14, {0x2A291}}, {0x2FA15, {0x9EBB}}, {0x2FA16, {0x4D56}}, {0x2FA17, {0x9EF9}}, {0x2FA18, {0x9EFE}}, {0x2FA19, {0x9F05}}, {0x2FA1A, {0x9F0F}}, {0x2FA1B, {0x9F16}},
|
| 485 |
+
{0x2FA1D, {0x2A600}},
|
| 486 |
+
};
|
| 487 |
+
|
| 488 |
static std::string codepoint_to_utf8(uint32_t cp) {
|
| 489 |
std::string result;
|
| 490 |
if (/* 0x00 <= cp && */ cp <= 0x7f) {
|
|
|
|
| 666 |
|
| 667 |
static int codepoint_type(uint32_t cp) {
|
| 668 |
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
| 669 |
+
const auto it = codepoint_types.find(cp);
|
| 670 |
+
return it == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second;
|
| 671 |
}
|
| 672 |
|
| 673 |
static int codepoint_type(const std::string & utf8) {
|