/usr/include/sunpinyin-2.0/lexicon/pytrie.h is in libsunpinyin-dev 2.0.3+git20130507-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | // -*- mode: c++ -*-
#ifndef __SUNPINYIN_PYTRIE_H__
#define __SUNPINYIN_PYTRIE_H__
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "../portability.h"
#include "pinyin/syllable.h"
#include <map>
#define WORD_ID_WIDTH 24
class CPinyinTrie {
public:
friend class CPinyinTrieMaker;
struct TTransUnit {
TSyllable m_Syllable;
unsigned m_Offset;
};
struct TWordIdInfo {
#ifdef WORDS_BIGENDIAN
unsigned m_bSeen : 1;
unsigned m_cost : 5;
unsigned m_csLevel : 2;
unsigned m_id : WORD_ID_WIDTH;
#else
unsigned m_id : WORD_ID_WIDTH;
unsigned m_csLevel : 2;
unsigned m_cost : 5;
unsigned m_bSeen : 1;
#endif
TWordIdInfo() { memset(this, 0, sizeof(TWordIdInfo)); }
TWordIdInfo(unsigned id,
unsigned len = 0,
unsigned seen = 0,
unsigned cost = 0,
unsigned cslvl = 0)
: m_id(id), m_csLevel(cslvl), m_cost(cost), m_bSeen(seen) { }
operator unsigned int() const { return m_id; }
};
struct TNode {
#ifdef WORDS_BIGENDIAN
unsigned m_other : 5;
unsigned m_bFullSyllableTransfer : 1;
unsigned m_csLevel : 2;
unsigned m_nTransfer : 12;
unsigned m_nWordId : 12;
#else
unsigned m_nWordId : 12;
unsigned m_nTransfer : 12;
unsigned m_csLevel : 2;
unsigned m_bFullSyllableTransfer : 1;
unsigned m_other : 5;
#endif
static unsigned int size_for(unsigned int nTransfer,
unsigned int nWordId) {
return sizeof(TNode) + sizeof(TTransUnit) * nTransfer +
sizeof(TWordIdInfo) * nWordId;
}
TNode()
{ *((unsigned *) this) = 0; }
bool hasPinyinChild(void) const
{ return(m_nTransfer > 1); }
const TTransUnit*getTrans() const
{ return (TTransUnit *) (this + 1); }
const TWordIdInfo*getWordIdPtr() const
{ return (TWordIdInfo *) (((char *) (this +
1)) + sizeof(TTransUnit) *
m_nTransfer); }
unsigned int transfer(unsigned s) const {
unsigned int b = 0, e = m_nTransfer;
const TTransUnit* ptrans = getTrans();
while (b < e) {
int m = b + (e - b) / 2;
if (ptrans[m].m_Syllable == s)
return ptrans[m].m_Offset;
if (ptrans[m].m_Syllable < s)
b = m + 1;
else
e = m;
}
return 0;
}
};
public:
CPinyinTrie() : m_Size(0), m_mem(NULL), m_words(NULL) { }
~CPinyinTrie()
{ free(); }
bool
load(const char* fileName);
void
free(void);
bool
isValid(const TNode* pnode, bool allowNonComplete, unsigned csLevel = 0);
unsigned int getRootOffset() const
{ return 3 * sizeof(unsigned int); }
const TNode*getRootNode() const
{ return (TNode *) (m_mem + getRootOffset()); }
const TNode*nodeFromOffset(unsigned int offset) const
{ return (offset < getRootOffset()) ? NULL : ((TNode *) (m_mem + offset)); }
unsigned int getWordCount(void) const
{ return *(unsigned int *) m_mem; }
unsigned int getNodeCount(void) const
{ return *(unsigned int *) (m_mem + sizeof(unsigned int)); }
unsigned int getStringOffset(void) const
{ return *(unsigned int *) (m_mem + 2 * sizeof(unsigned int)); }
inline const TNode*transfer(const TNode* pnode, unsigned s) const
{ return nodeFromOffset(pnode->transfer(s)); }
inline const TNode*transfer(unsigned s) const
{ return transfer(getRootNode(), s); }
unsigned int
getSymbolId(const TWCHAR* wstr);
unsigned int
getSymbolId(const wstring & wstr);
const TWCHAR*operator[](unsigned int idx) const
{ return m_words[idx]; }
int
lengthAt(unsigned int idx) const;
void
print(FILE *fp) const;
protected:
unsigned int m_Size;
char *m_mem;
TWCHAR **m_words;
std::map<wstring, unsigned> m_SymbolMap;
void
print(const TNode* pRoot, std::string& prefix, FILE *fp) const;
};
#endif /* __SUNPINYIN_PYTRIE_H__*/
// -*- indent-tabs-mode: nil -*- vim:et:ts=4
|