/usr/include/sunpinyin-2.0/slm/slm.h is in libsunpinyin-dev 2.0.3+git20140127-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | // -*- mode: c++ -*-
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
*
* The contents of this file are subject to the terms of either the GNU Lesser
* General Public License Version 2.1 only ("LGPL") or the Common Development and
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
* file except in compliance with the License. You can obtain a copy of the CDDL at
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
* specific language governing permissions and limitations under the License. When
* distributing the software, include this License Header Notice in each file and
* include the full text of the License in the License file as well as the
* following notice:
*
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
* (CDDL)
* For Covered Software in this distribution, this License shall be governed by the
* laws of the State of California (excluding conflict-of-law provisions).
* Any litigation relating to this License shall be subject to the jurisdiction of
* the Federal Courts of the Northern District of California and the state courts
* of the State of California, with venue lying in Santa Clara County, California.
*
* Contributor(s):
*
* If you wish your version of this file to be governed by only the CDDL or only
* the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
* include this software in this distribution under the [CDDL or LGPL Version 2.1]
* license." If you don't indicate a single choice of license, a recipient has the
* option to distribute your version of this file under either the CDDL or the LGPL
* Version 2.1, or to extend the choice of license to its licensees as provided
* above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
* Version 2 license, then the option applies only if the new code is made subject
* to such option by the copyright holder.
*/
#ifndef _SUN_AGC_SLM_H
#define _SUN_AGC_SLM_H
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "../portability.h"
#include <stdio.h>
/**
* Thread slm make the following modifications to simple back-off language model
* -# Word id are limited to 18 bits, about 240K word ids
* -# Compact all float value of -log(pr) into 65536 (16 bits)
* level and use a table to map the index to a float value;
* -# Compact all float value of -log(pr) into 16384 (14 bits)
* level and use a table to map the index to a float value;
* -# threading infomation embed into binary model file. Threading include
* - bol(back-off-level) from current level
* - bon(back-off-node)'s index in the bol level array
* .
* The thread could be used:
* - when leaf node are arrived, it could use (bol,bon) as history for
* history node.
* - when a word could not be found in current node (cl, cn)'s children,
* searching could be transfered to (bol, bon) directly and continue
* searching the target word
* -# Add a basic type TState in Language model, a state is pair of\n
* (level, array_idx_of_the level)
* -# change all get probability interface to\n
* double transfer(TState& history, unsigned int wid, TState& result);
*/
class CThreadSlm {
public:
enum {
BITS_BOW = 14,
BITS_PR = 16,
ID_NOT_WORD = 69,
};
/**
* (level:idx) located a state in the language model very well
* Please note the psuedo unigram state, with level == 0, but idx > 0
* it's for used with bigram cache model
*/
union TState {
TState(const TState &b) : m_all(b.m_all) {
}
TState(unsigned level = 0, unsigned idx = 0) {
anony.m_Level = level; anony.m_Idx = idx;
}
TState& operator++() { ++anony.m_Idx; return *this; }
void setIdx(unsigned int idx) { anony.m_Idx = idx; }
void setLevel(unsigned int lvl) { anony.m_Level = lvl; }
unsigned int getLevel() const { return anony.m_Level; }
unsigned int getIdx() const { return anony.m_Idx; }
operator unsigned() const { return m_all; }
bool isTailState() const { return getIdx() <= 1; }
bool operator==(const TState & b) const {
return m_all == b.m_all;
}
bool operator<(const TState & b) const {
return unsigned(*this) < unsigned(b);
}
private:
unsigned int m_all;
#ifndef WORDS_BIGENDIAN
struct TAnonymous {
unsigned m_Idx : 24;
unsigned m_Level : 8;
} anony;
#else
struct TAnonymous {
unsigned m_Level : 8;
unsigned m_Idx : 24;
} anony;
#endif
};
/**
* Machine dependent
*/
struct TNode {
public:
unsigned int wid() const {
return m_wid;
}
unsigned int bow() const {
return m_bow;
}
unsigned int pr() const {
return m_pr;
}
unsigned int bon() const {
return m_bon;
}
unsigned int bol() const {
return m_bol;
}
unsigned int ch() const {
return((m_ch_hi << 16) + m_ch_lo);
}
void set_wid(unsigned int wid){
m_wid = wid;
}
void set_bow(unsigned int bow){
m_bow = bow;
}
void set_pr(unsigned int pr){
m_pr = pr;
}
void set_bon(unsigned int bon){
m_bon = bon;
}
void set_bol(unsigned int bol){
m_bol = bol;
}
void set_ch(unsigned int ch){
m_ch_hi = ((ch >> 16) & 0x7F);
m_ch_lo = (ch & 0xFFFF);
}
protected:
#ifndef WORDS_BIGENDIAN
unsigned m_wid : 18;
unsigned m_bow : 14;
unsigned m_pr : 16;
unsigned m_ch_lo : 16;
unsigned m_bon : 23;
unsigned m_bol : 2;
unsigned m_ch_hi : 7;
#else
unsigned m_ch_hi : 7;
unsigned m_bol : 2;
unsigned m_bon : 23;
unsigned m_ch_lo : 16;
unsigned m_pr : 16;
unsigned m_bow : 14;
unsigned m_wid : 18;
#endif
private:
/**
* Machine dependent
union TChildIdx {
public:
inline TChildIdx(unsigned val) : m_all(val) { }
inline TChildIdx(const TChildIdx& b) : m_all(b.m_all) { }
inline TChildIdx(unsigned int hi, unsigned lo) : m_all(0) { anony.m_hi = hi; anony.m_lo = lo; }
inline unsigned int lo() { return anony.m_lo; }
inline unsigned int hi() { return anony.m_hi; }
inline unsigned int all(){ return m_all; }
inline unsigned int set_lo(unsigned int lo) { return (anony.m_lo = lo); }
inline unsigned int set_hi(unsigned int hi) { return (anony.m_hi = hi); }
inline unsigned int set_all(unsigned int all) { return (m_all = all); }
private:
unsigned int m_all;
*#ifndef WORDS_BIGENDIAN
struct TAnony {
unsigned m_lo :16;
unsigned m_hi : 7;
unsigned NOUSE: 9;
} anony;
*#else
struct TAnony {
unsigned NOUSE: 9;
unsigned m_hi : 7;
unsigned m_lo :16;
} anony;
*#endif
};
*/
};
/**
* Machine dependent
*/
struct TLeaf {
public:
inline unsigned int wid() const { return m_wid; }
inline unsigned int bon() const { return m_bon; }
inline unsigned int bol() const { return m_bol; }
inline unsigned int pr() const { return((m_pr_hi << 14) + m_pr_lo); }
inline void set_wid(unsigned int wid) { m_wid = wid; }
inline void set_bon(unsigned int bon) { m_bon = bon; }
inline void set_bol(unsigned int bol) { m_bol = bol; }
inline void set_pr(unsigned int pr) { m_pr_hi = ((pr >> 14) & 0x3);
m_pr_lo = pr & 0x3FFF; }
protected:
#ifndef WORDS_BIGENDIAN
unsigned m_wid : 18;
unsigned m_pr_lo : 14;
unsigned m_bon : 23;
unsigned m_bol : 2;
unsigned m_pr_hi : 2;
#else
unsigned m_pr_hi : 2;
unsigned m_bol : 2;
unsigned m_bon : 23;
unsigned m_pr_lo : 14;
unsigned m_wid : 18;
#endif
private:
/*
union TPr {
public:
inline TPr(unsigned int val) : m_all(val) { }
inline TPr(const TPr & b) : m_all(b.m_all) { }
inline TPr(unsigned int hi, unsigned lo) : m_all(0) { anony.m_hi=hi, anony.m_lo=lo; }
inline unsigned int lo() { return anony.m_lo; }
inline unsigned int hi() { return anony.m_hi; }
inline unsigned int all(){ return m_all; }
inline unsigned int set_lo(unsigned int lo) { return (anony.m_lo = lo); }
inline unsigned int set_hi(unsigned int hi) { return (anony.m_hi = hi); }
inline unsigned int set_all(unsigned int all) { return (m_all = all); }
private:
unsigned int m_all;
#ifndef WORDS_BIGENDIAN
struct TAnony {
unsigned m_lo :14;
unsigned m_hi : 2;
unsigned NONUSE:16;
} anony;
#else
struct TAnony {
unsigned NONUSE:16;
unsigned m_hi : 2;
unsigned m_lo :14;
} anony;
#endif
};
*/
};
public:
CThreadSlm()
: m_N(0), m_UseLogPr(0), m_Levels(NULL), m_LevelSizes(NULL),
m_bowTable(NULL), m_prTable(NULL), m_bMMap(false), m_buf(NULL) { }
~CThreadSlm() { free(); }
bool
load(const char* fname, bool MMap = false);
unsigned isUseLogPr() const
{ return m_UseLogPr; }
void
free();
double
transferNegLog(TState history, unsigned int wid, TState& result);
double
transfer(TState history, unsigned int wid, TState& result);
TState
history_state_of(TState st);
TState&
historify(TState& st);
unsigned int
lastWordId(TState st);
protected:
double
rawTransfer(TState history, unsigned int wid, TState& result);
protected:
typedef void* PtrVoid;
unsigned m_N;
unsigned m_UseLogPr;
void **m_Levels;
unsigned *m_LevelSizes;
float *m_bowTable;
float *m_prTable;
private:
ssize_t m_bufSize;
bool m_bMMap;
char *m_buf;
};
#endif
// -*- indent-tabs-mode: nil -*- vim:et:ts=4
|