/usr/include/hfst/HfstFlagDiacritics.h is in libhfst42-dev 3.9.0~r4595-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | #ifndef _FLAG_DIACRITICS_H_
#define _FLAG_DIACRITICS_H_
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <cassert>
#include <utility>
#include "hfstdll.h"
/** @file HfstFlagDiacritics.h
\brief Class declarations for flag diacritic handling. */
namespace hfst {
enum FdOperator {Pop, Nop, Rop, Dop, Cop, Uop};
typedef unsigned short FdFeature;
typedef short FdValue;
class FdOperation
{
private:
FdOperator op;
FdFeature feature;
FdValue value;
std::string name;
public:
HFSTDLL FdOperation
(FdOperator op, FdFeature feat, FdValue val, const std::string& str):
op(op), feature(feat), value(val), name(str) {}
HFSTDLL FdOperator Operator(void) const { return op; }
HFSTDLL FdFeature Feature(void) const { return feature; }
HFSTDLL FdValue Value(void) const { return value; }
HFSTDLL std::string Name(void) const { return name; }
HFSTDLL static FdOperator char_to_operator(char c)
{
switch (c) {
case 'P': return Pop;
case 'N': return Nop;
case 'R': return Rop;
case 'D': return Dop;
case 'C': return Cop;
case 'U': return Uop;
default:
throw;
}
}
HFSTDLL static bool is_diacritic(const std::string& diacritic_str);
HFSTDLL static std::string::size_type find_diacritic
(const std::string& diacritic_str,
std::string::size_type& length);
HFSTDLL static std::string get_operator(const std::string& diacritic);
HFSTDLL static std::string get_feature(const std::string& diacritic);
HFSTDLL static std::string get_value(const std::string& diacritic);
HFSTDLL static bool has_value(const std::string& diacritic);
};
template<class T> class FdState;
/** \brief A collection of the flag diacritics from a symbol table indexed
by keys of type \a T
*/
template<class T>
class FdTable
{
private:
// Used for generating IDs that stand in for feature and value strings
std::map<std::string, FdFeature> feature_map;
std::map<std::string, FdValue> value_map;
std::map<T, FdOperation> operations;
std::map<std::string, T> symbol_map;
public:
FdTable(): feature_map(), value_map()
{ value_map[std::string()] = 0; } // empty value = neutral
void define_diacritic(T symbol, const std::string& str)
{
if(!FdOperation::is_diacritic(str))
throw;
FdOperator op = FdOperation::char_to_operator(str.at(1));
std::string feat;
std::string val;
// Third character is always the first fullstop.
size_t first_full_stop_pos = 2;
// Find the second full stop, if there is one.
size_t second_full_stop_pos = str.find('.',first_full_stop_pos+1);
size_t last_char_pos = str.size() - 1;
if(second_full_stop_pos == std::string::npos)
{
assert(op == Cop || op == Dop || op == Rop);
feat = str.substr(first_full_stop_pos+1,
last_char_pos-first_full_stop_pos-1);
}
else
{
feat = str.substr(first_full_stop_pos+1,
second_full_stop_pos-first_full_stop_pos-1);
val = str.substr(second_full_stop_pos+1,
last_char_pos-second_full_stop_pos-1);
}
if(feature_map.count(feat) == 0)
{
FdFeature next = feature_map.size();
feature_map[feat] = next;
}
if(value_map.count(val) == 0)
{
FdValue next = value_map.size()+1;
value_map[val] = next;
}
operations.insert
(std::pair<T,FdOperation>
(symbol,
FdOperation(op, feature_map[feat], value_map[val], str)));
symbol_map.insert(std::pair<std::string,T>(str, symbol));
}
FdFeature num_features() const { return feature_map.size(); }
bool is_diacritic(T symbol) const
{ return operations.find(symbol) != operations.end(); }
const FdOperation* get_operation(T symbol) const
{
// for some reason this fails to compile???
//std::map<T,FdOperation>::const_iterator i
// = operations.find(symbol);
//return (i==operations.end()) ? NULL : &(i->second);
return (operations.find(symbol)==operations.end()) ? NULL :
&(operations.find(symbol)->second);
}
const FdOperation* get_operation(const std::string& symbol) const
{
return (symbol_map.find(symbol)==symbol_map.end()) ? NULL :
get_operation(symbol_map.find(symbol)->second);
}
bool is_valid_string(const std::vector<T>& symbols) const
{
FdState<T> state(*this);
for(size_t i=0; i<symbols.size(); i++)
{
if(!state.apply_operation(symbols[i]))
break;
}
return !state.fails();
}
bool is_valid_string(const std::string& str) const
{
FdState<T> state(*this);
std::string remaining(str);
std::string::size_type length;
while(true)
{
std::string::size_type next_diacritic_pos
= FdOperation::find_diacritic(remaining, length);
if(next_diacritic_pos == std::string::npos)
break;
std::string diacritic = remaining.substr(0, length);
if(!state.apply_operation(diacritic))
break;
remaining = remaining.substr(length);
}
return !state.fails();
}
};
/** \brief Contains the values of each of the flag diacritic features from a
table. It allows for evaluating a series of diacritic operations
*/
template<class T>
class FdState
{
private:
const FdTable<T>* table;
// This is indexed with values of type FdFeature
typename std::vector<FdValue> values;
T num_features;
bool error_flag;
public:
FdState(const FdTable<T>& t):
table(&t), values(table->num_features()),
num_features(table->num_features()), error_flag(false)
{}
FdState():
table(NULL), values(), num_features(0), error_flag(false)
{}
const FdTable<T>& get_table() const {return *table;}
const std::vector<FdValue> & get_values(void) const
{ return values; }
void assign_values(std::vector<FdValue> const & vals)
{
values = vals;
if (values.size() != num_features) {
error_flag = true;
}
}
bool apply_operation(T symbol)
{
const FdOperation* op = table->get_operation(symbol);
if(op)
return apply_operation(*op);
return true; // if the symbol isn't a diacritic
}
bool apply_operation(const FdOperation& op)
{
switch(op.Operator()) {
case Pop: // positive set
values[op.Feature()] = op.Value();
return true;
case Nop: // negative set (literally, in this implementation)
values[op.Feature()] = -1*op.Value();
return true;
case Rop: // require
if (op.Value() == 0) // empty require
return (values[op.Feature()] != 0);
else // nonempty require
return (values[op.Feature()] == op.Value());
case Dop: // disallow
if (op.Value() == 0) // empty disallow
return (values[op.Feature()] == 0);
else // nonempty disallow
return (values[op.Feature()] != op.Value());
case Cop: // clear
values[op.Feature()] = 0;
return true;
case Uop: // unification
if(values[op.Feature()] == 0 || /* if the feature is unset or */
values[op.Feature()] == op.Value() || /* the feature is at
this value already
or */
(values[op.Feature()] < 0 &&
(values[op.Feature()]*(-1) != op.Value())) /* the feature is
negatively set
to something
else */
)
{
values[op.Feature()] = op.Value();
return true;
}
return false;
}
throw; // for the compiler's peace of mind
}
bool apply_operation(const std::string& symbol)
{
const FdOperation* op = table->get_operation(symbol);
if(op)
return apply_operation(*op);
return true;
}
bool fails() const {return error_flag;}
void reset()
{
error_flag = false;
values.clear();
values.insert(values.begin(), table->num_features(), 0);
}
};
}
#endif
|