/usr/include/googlepinyin/dictdef.h is in libgooglepinyin0-dev 0.1.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | /*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PINYINIME_INCLUDE_DICTDEF_H__
#define PINYINIME_INCLUDE_DICTDEF_H__
#include <stdlib.h>
#include <inttypes.h>
#include "./utf16char.h"
namespace ime_pinyin {
// Enable the following line when building the binary dictionary model.
#define ___BUILD_MODEL___
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint64_t uint64;
const bool kPrintDebug0 = false;
const bool kPrintDebug1 = false;
const bool kPrintDebug2 = false;
// The max length of a lemma.
const size_t kMaxLemmaSize = 8;
// The max length of a Pinyin (spelling).
const size_t kMaxPinyinSize = 6;
// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
// See SpellingTrie.h for details.
const size_t kHalfSpellingIdNum = 29;
// The maximum number of full spellings. For Chinese Pinyin, there are only
// about 410 spellings.
// If change this value is bigger(needs more bits), please also update
// other structures like SpellingNode, to make sure than a spelling id can be
// stored.
// -1 is because that 0 is never used.
const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
const size_t kMaxSearchSteps = 40;
// One character predicts its following characters.
const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
// LemmaIdType must always be size_t.
typedef size_t LemmaIdType;
const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
const size_t kLemmaIdComposing = 0xffffff;
typedef uint16 LmaScoreType;
typedef uint16 KeyScoreType;
// Number of items with highest score are kept for prediction purpose.
const size_t kTopScoreLemmaNum = 10;
const size_t kMaxPredictNumByGt3 = 1;
const size_t kMaxPredictNumBy3 = 2;
const size_t kMaxPredictNumBy2 = 2;
// The last lemma id (included) for the system dictionary. The system
// dictionary's ids always start from 1.
const LemmaIdType kSysDictIdEnd = 500000;
// The first lemma id for the user dictionary.
const LemmaIdType kUserDictIdStart = 500001;
// The last lemma id (included) for the user dictionary.
const LemmaIdType kUserDictIdEnd = 600000;
typedef struct {
uint16 half_splid:5;
uint16 full_splid:11;
} SpellingId, *PSpellingId;
/**
* We use different node types for different layers
* Statistical data of the building result for a testing dictionary:
* root, level 0, level 1, level 2, level 3
* max son num of one node: 406 280 41 2 -
* max homo num of one node: 0 90 23 2 2
* total node num of a layer: 1 406 31766 13516 993
* total homo num of a layer: 9 5674 44609 12667 995
*
* The node number for root and level 0 won't be larger than 500
* According to the information above, two kinds of nodes can be used; one for
* root and level 0, the other for these layers deeper than 0.
*
* LE = less and equal,
* A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
*/
struct LmaNodeLE0 {
size_t son_1st_off;
size_t homo_idx_buf_off;
uint16 spl_idx;
uint16 num_of_son;
uint16 num_of_homo;
};
/**
* GE = great and equal
* A node occupies 8 bytes.
*/
struct LmaNodeGE1 {
uint16 son_1st_off_l; // Low bits of the son_1st_off
uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1
uint16 spl_idx;
unsigned char num_of_son; // number of son nodes
unsigned char num_of_homo; // number of homo words
unsigned char son_1st_off_h; // high bits of the son_1st_off
unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off
};
#ifdef ___BUILD_MODEL___
struct SingleCharItem {
float freq;
char16 hz;
SpellingId splid;
};
struct LemmaEntry {
LemmaIdType idx_by_py;
LemmaIdType idx_by_hz;
char16 hanzi_str[kMaxLemmaSize + 1];
// The SingleCharItem id for each Hanzi.
uint16 hanzi_scis_ids[kMaxLemmaSize];
uint16 spl_idx_arr[kMaxLemmaSize + 1];
char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
unsigned char hz_str_len;
float freq;
};
#endif // ___BUILD_MODEL___
} // namespace ime_pinyin
#endif // PINYINIME_INCLUDE_DICTDEF_H__
|