This file is indexed.

/usr/include/googlepinyin/dicttrie.h is in libgooglepinyin0-dev 0.1.2-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
#define PINYINIME_INCLUDE_DICTTRIE_H__

#include <stdlib.h>
#include "./atomdictbase.h"
#include "./dictdef.h"
#include "./dictlist.h"
#include "./searchutility.h"

namespace ime_pinyin {

class DictTrie : AtomDictBase {
 private:
  struct ParsingMark {
    size_t node_offset:24;
    size_t node_num:8;           // Number of nodes with this spelling id given
                                 // by spl_id. If spl_id is a Shengmu, for nodes
                                 // in the first layer of DictTrie, it equals to
                                 // SpellingTrie::shm2full_num(); but for those
                                 // nodes which are not in the first layer,
                                 // node_num < SpellingTrie::shm2full_num().
                                 // For a full spelling id, node_num = 1;
  };

  // Used to indicate an extended mile stone.
  // An extended mile stone is used to mark a partial match in the dictionary
  // trie to speed up further potential extending.
  // For example, when the user inputs "w", a mile stone is created to mark the
  // partial match status, so that when user inputs another char 'm', it will be
  // faster to extend search space based on this mile stone.
  //
  // For partial match status of "wm", there can be more than one sub mile
  // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
  // there may be more one parsing mark used to mark these partial matchings.
  // A mile stone records the starting position in the mark list and number of
  // marks.
  struct MileStone {
    uint16 mark_start;
    uint16 mark_num;
  };

  DictList* dict_list_;

  const SpellingTrie *spl_trie_;

  LmaNodeLE0* root_;        // Nodes for root and the first layer.
  LmaNodeGE1* nodes_ge1_;   // Nodes for other layers.

  // An quick index from spelling id to the LmaNodeLE0 node buffer, or
  // to the root_ buffer.
  // Index length:
  // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
  // to get the end.
  // All Shengmu ids are not indexed because they will be converted into
  // corresponding full ids.
  // So, given an id splid, the son is:
  // root_[splid_le0_index_[splid - kFullSplIdStart]]
  uint16 *splid_le0_index_;

  size_t lma_node_num_le0_;
  size_t lma_node_num_ge1_;

  // The first part is for homophnies, and the last  top_lma_num_ items are
  // lemmas with highest scores.
  unsigned char *lma_idx_buf_;
  size_t lma_idx_buf_len_;  // The total size of lma_idx_buf_ in byte.
  size_t total_lma_num_;    // Total number of lemmas in this dictionary.
  size_t top_lmas_num_;     // Number of lemma with highest scores.

  // Parsing mark list used to mark the detailed extended statuses.
  ParsingMark *parsing_marks_;
  // The position for next available mark.
  uint16 parsing_marks_pos_;

  // Mile stone list used to mark the extended status.
  MileStone *mile_stones_;
  // The position for the next available mile stone. We use positions (except 0)
  // as handles.
  MileStoneHandle mile_stones_pos_;

  // Get the offset of sons for a node.
  inline size_t get_son_offset(const LmaNodeGE1 *node);

  // Get the offset of homonious ids for a node.
  inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);

  // Get the lemma id by the offset.
  inline LemmaIdType get_lemma_id(size_t id_offset);

  void free_resource(bool free_dict_list);

  bool load_dict(FILE *fp);

  // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
  // them into the lpi_items buffer.
  // This function is called by the search engine.
  size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
                         LmaNodeLE0 *node);

  // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
  // them into the lpi_items buffer.
  // This function is called by inner functions extend_dict0(), extend_dict1()
  // and extend_dict2().
  size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
                         size_t homo_buf_off, LmaNodeGE1 *node,
                         uint16 lma_len);

  // Extend in the trie from level 0.
  MileStoneHandle extend_dict0(MileStoneHandle from_handle,
                               const DictExtPara *dep, LmaPsbItem *lpi_items,
                               size_t lpi_max, size_t *lpi_num);

  // Extend in the trie from level 1.
  MileStoneHandle extend_dict1(MileStoneHandle from_handle,
                               const DictExtPara *dep, LmaPsbItem *lpi_items,
                               size_t lpi_max, size_t *lpi_num);

  // Extend in the trie from level 2.
  MileStoneHandle extend_dict2(MileStoneHandle from_handle,
                               const DictExtPara *dep, LmaPsbItem *lpi_items,
                               size_t lpi_max, size_t *lpi_num);

  // Try to extend the given spelling id buffer, and if the given id_lemma can
  // be successfully gotten, return true;
  // The given spelling ids are all valid full ids.
  bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);

#ifdef ___BUILD_MODEL___
  bool save_dict(FILE *fp);
#endif  // ___BUILD_MODEL___

  static const int kMaxMileStone = 100;
  static const int kMaxParsingMark = 600;
  static const MileStoneHandle kFirstValidMileStoneHandle = 1;

  friend class DictParser;
  friend class DictBuilder;

 public:

  DictTrie();
  ~DictTrie();

#ifdef ___BUILD_MODEL___
  // Construct the tree from the file fn_raw.
  // fn_validhzs provide the valid hanzi list. If fn_validhzs is
  // NULL, only chars in GB2312 will be included.
  bool build_dict(const char *fn_raw, const char *fn_validhzs);

  // Save the binary dictionary
  // Actually, the SpellingTrie/DictList instance will be also saved.
  bool save_dict(const char *filename);
#endif  // ___BUILD_MODEL___

  void convert_to_hanzis(char16 *str, uint16 str_len);

  void convert_to_scis_ids(char16 *str, uint16 str_len);

  // Load a binary dictionary
  // The SpellingTrie instance/DictList will be also loaded
  bool load_dict(const char *filename, LemmaIdType start_id,
                 LemmaIdType end_id);
  bool load_dict_fd(int sys_fd, long start_offset, long length,
                    LemmaIdType start_id, LemmaIdType end_id);
  bool close_dict() {return true;}
  size_t number_of_lemmas() {return 0;}

  void reset_milestones(uint16 from_step, MileStoneHandle from_handle);

  MileStoneHandle extend_dict(MileStoneHandle from_handle,
                              const DictExtPara *dep,
                              LmaPsbItem *lpi_items,
                              size_t lpi_max, size_t *lpi_num);

  size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
                  LmaPsbItem *lpi_items, size_t lpi_max);

  uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);

  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
                          uint16 splids_max, bool arg_valid);

  size_t predict(const char16 *last_hzs, uint16 hzs_len,
                 NPredictItem *npre_items, size_t npre_max,
                 size_t b4_used);

  LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len, uint16 count) {return 0;}

  LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
                           bool selected) {return 0;}

  LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
                           uint16 lemma_len) {return 0;}

  LmaScoreType get_lemma_score(LemmaIdType lemma_id) {return 0;}

  LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
                        uint16 lemma_len) {return 0;}

  bool remove_lemma(LemmaIdType lemma_id) {return false;}

  size_t get_total_lemma_count() {return 0;}
  void set_total_lemma_count_of_others(size_t count);

  void flush_cache() {}

  LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);

  // Fill the lemmas with highest scores to the prediction buffer.
  // his_len is the history length to fill in the prediction buffer.
  size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
                          size_t npre_max, size_t b4_used);
};
}

#endif  // PINYINIME_INCLUDE_DICTTRIE_H__