This file is indexed.

/usr/include/fst/extensions/linear/linear-fst-data-builder.h is in libfst-dev 1.6.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.

#ifndef FST_EXTENSIONS_LINEAR_LINEAR_FST_DATA_BUILDER_H_
#define FST_EXTENSIONS_LINEAR_LINEAR_FST_DATA_BUILDER_H_

#include <map>
#include <queue>
#include <set>
#include <sstream>
#include <stack>
#include <string>
#include <vector>

#include <fst/compat.h>
#include <fst/log.h>
#include <fst/fst.h>
#include <fst/symbol-table.h>
#include <fst/util.h>

#include <fst/extensions/linear/linear-fst-data.h>

namespace fst {

// Forward declaration
template <class A>
class FeatureGroupBuilder;

// For logging purposes
inline string TranslateLabel(int64 label, const SymbolTable *syms);
template <class Iterator>
string JoinLabels(Iterator begin, Iterator end, const SymbolTable *syms);
template <class Label>
string JoinLabels(const std::vector<Label> &labels, const SymbolTable *syms);

// Guesses the appropriate boundary label (start- or end-of-sentence)
// for all labels equal to `boundary` and modifies the `sequence`
// in-place. Returns the number of positions that are still uncertain.
template <class A>
typename A::Label GuessStartOrEnd(std::vector<typename A::Label> *sequence,
                                  typename A::Label boundary);

// Builds a `LinearFstData` object by adding words and feature
// weights. A few conventions:
//
// - Input labels forms a dense non-empty range from 1 to `MaxInputLabel()`.
// - Feature labels, output labels are > 0.
// - Being a discriminative linear model, it only makes sense to use tropical
// semirings.
template <class A>
class LinearFstDataBuilder {
 public:
  typedef typename A::Label Label;
  typedef typename A::Weight Weight;

  // Constructs a builder with associated symbol tables for diagonstic
  // output. Each of these symbol tables may also be nullptr.
  explicit LinearFstDataBuilder(const SymbolTable *isyms = nullptr,
                                const SymbolTable *fsyms = nullptr,
                                const SymbolTable *osyms = nullptr)
      : error_(false),
        max_future_size_(0),
        max_input_label_(1),
        isyms_(isyms),
        fsyms_(fsyms),
        osyms_(osyms) {}

  // Tests whether the builder has encountered any error. No operation
  // is valid if the builder is already at error state. All other
  // public methods should check this before any actual operations.
  bool Error() const { return error_; }

  // Adds a word and its feature labels to the vocabulary; this
  // version allows the word to have any output label. Returns true
  // iff the word is added.
  //
  // This may fail if the word is added twice or if the feature labels
  // are non-positive.
  bool AddWord(Label word, const std::vector<Label> &features);

  // Adds a word and its feature labels to the vocabulary; this
  // version puts constraint on possible output labels the word can
  // have. `possible_output` must not be empty. Returns true iff the
  // word is added.
  //
  // In addition to the reasons above in the two-parameter version,
  // this may also fail if `possible_output` is empty or any output
  // label in it is non-positive.
  bool AddWord(Label word, const std::vector<Label> &word_features,
               const std::vector<Label> &possible_output);

  // Creates a new feature group with specified future size (size of
  // the look-ahead window), returns the group id to be used for
  // adding actual feature weights or a negative number when called at
  // error state.
  //
  // This does not fail unless called at error state.
  int AddGroup(size_t future_size);

  // Adds an instance of feature weight to the specified feature
  // group. If some weight has already been added with the same
  // feature, the product of the old and new weights are
  // stored. Returns true iff the weight is added. A weight is not
  // added when the context has ill-formed context involving start-,
  // end-of-sentence marks.
  //
  // For two features to be within the same group, it must satisfy
  // that (1) they have the same future size; (2) the two either have
  // disjoint context or one is the back-off context of the
  // other. Furthermore, for all features in a single group, there
  // must be one and only one other context (not necessarily an active
  // feature) that the feature immediately backs off to (i.e. there is
  // no other context that is the back-off of the first and backs off
  // to the second).
  //
  // Consider for example features with zero look-ahead of the form
  // (input, OUTPUT).
  //
  // - The following two features can be put in the same group because
  // their context is disjoint: (a a a, A A), (b, B B);
  //
  // - The following two features can be put in the same group because
  // one is the back-off context of the other: (a a a, A A), (a a, A
  // A);
  //
  // - The following two features can NOT be put in the same group
  // because there is overlap but neither is the other's back-off: (a
  // a a, A), (a a, A A);
  //
  // - Finally, the following three features cannot be in a same group
  // because the first one can immediately back off to either of the
  // rest: (a a a, A A), (a a, A A), (a a a, A).
  //
  // The easiest way to satisfy the constraints is to create a feature
  // group for each feature template. However, better feature grouping
  // may help improve speed.
  //
  // This may fail if any of input or output labels are non-positive,
  // or if any call to `FeatureGroupBuilder<>::AddWeight()` fails.
  bool AddWeight(size_t group, const std::vector<Label> &input,
                 const std::vector<Label> &output, Weight weight);

  // Returns a newly created `LinearFstData` object or nullptr in case
  // of failure. The caller takes the ownership of the memory. No
  // other methods shall be called after this --- this is enforced by
  // putting the builder at error state, even when a
  // `LinearFstData<>` object is successfully built.
  //
  // This may fail if the call to any `FeatureGroupBuilder<>::Dump()`
  // fails.
  LinearFstData<A> *Dump();

 private:
  bool error_;
  CompactSet<Label, kNoLabel> all_output_labels_;
  std::map<Label, std::set<Label>> word_output_map_, word_feat_map_;
  std::map<Label, std::set<size_t>> feat_groups_;
  std::vector<std::unique_ptr<FeatureGroupBuilder<A>>> groups_;
  size_t max_future_size_;
  Label max_input_label_;
  const SymbolTable *isyms_, *fsyms_, *osyms_;

  LinearFstDataBuilder(const LinearFstDataBuilder &) = delete;
  LinearFstDataBuilder &operator=(const LinearFstDataBuilder &) = delete;
};

// Builds a LinearFstData tailored for a LinearClassifierFst. The
// major difference between an ordinary LinearFstData that works on
// taggers and a LinearFstData that works on classifiers is that
// feature groups are divided into sections by the prediction class
// label. For a prediction label `pred` and a logical group id
// `group`, the actual group id is `group * num_classes + pred -
// 1`.
//
// This layout saves us from recording output labels in each single
// FeatureGroup. Because there is no need for any delaying, stripping
// the output allows features with different shapes but using the same
// set of feature label mapping to reside in a single FeatureGroup.
template <class A>
class LinearClassifierFstDataBuilder {
 public:
  typedef typename A::Label Label;
  typedef typename A::Weight Weight;

  // Constructs a builder for a `num_classes`-class classifier,
  // optinally with associated symbol tables for diagnostic
  // output. The output labels (i.e. prediction) must be in the range
  // of [1, num_classes].
  explicit LinearClassifierFstDataBuilder(size_t num_classes,
                                          const SymbolTable *isyms = nullptr,
                                          const SymbolTable *fsyms = nullptr,
                                          const SymbolTable *osyms = nullptr)
      : error_(false),
        num_classes_(num_classes),
        num_groups_(0),
        builder_(isyms, fsyms, osyms) {}

  // Tests whether the builder has encountered any error. Similar to
  // LinearFstDataBuilder<>::Error().
  bool Error() const { return error_; }

  // Same as LinearFstDataBuilder<>::AddWord().
  bool AddWord(Label word, const std::vector<Label> &features);

  // Adds a logical feature group. Similar to
  // LinearFstDataBuilder<>::AddGroup(), with the exception that the
  // returned group id is the logical group id. Also there is no need
  // for "future" in a classifier.
  int AddGroup();

  // Adds an instance of feature weight to the specified logical
  // feature group. Instead of a vector of output, only a single
  // prediction is needed as the output.
  //
  // This may fail if `pred` is not in the range of [1, num_classes_].
  bool AddWeight(size_t group, const std::vector<Label> &input, Label pred,
                 Weight weight);

  // Returns a newly created `LinearFstData` object or nullptr in case of
  // failure.
  LinearFstData<A> *Dump();

 private:
  std::vector<Label> empty_;
  bool error_;
  size_t num_classes_, num_groups_;
  LinearFstDataBuilder<A> builder_;
};

// Builds a single feature group. Usually used in
// `LinearFstDataBuilder::AddWeight()`. See that method for the
// constraints on grouping features.
template <class A>
class FeatureGroupBuilder {
 public:
  typedef typename A::Label Label;
  typedef typename A::Weight Weight;

  // Constructs a builder with the given future size. All features
  // added to the group will have look-ahead windows of this size.
  FeatureGroupBuilder(size_t future_size, const SymbolTable *fsyms,
                      const SymbolTable *osyms)
      : error_(false), future_size_(future_size), fsyms_(fsyms), osyms_(osyms) {
    // This edge is special; see doc of class `FeatureGroup` on the
    // details.
    start_ = trie_.Insert(trie_.Root(), InputOutputLabel(kNoLabel, kNoLabel));
  }

  // Tests whether the builder has encountered any error. No operation
  // is valid if the builder is already at error state. All other
  // public methods should check this before any actual operations.
  bool Error() const { return error_; }

  // Adds a feature weight with the given context. Returns true iff
  // the weight is added. A weight is not added if it has ill-formed
  // context involving start-, end-of-sentence marks.
  //
  // Note: `input` is the sequence of input
  // features, instead of input labels themselves. `input` must be at
  // least as long as `future_size`; `output` may be empty, but
  // usually should be non-empty because an empty output context is
  // useless in discriminative modelling. All labels in both `input`
  // and `output` must be > 0 (this is checked in
  // `LinearFstDataBuilder::AddWeight()`). See
  // LinearFstDataBuilder<>::AddWeight for more details.
  //
  // This may fail if the input is smaller than the look-ahead window.
  bool AddWeight(const std::vector<Label> &input,
                 const std::vector<Label> &output, Weight weight);

  // Creates an actual FeatureGroup<> object. Connects back-off links;
  // pre-accumulates weights from back-off features. Returns nullptr if
  // there is any violation in unique immediate back-off
  // constraints.
  //
  // Regardless of whether the call succeeds or not, the error flag is
  // always set before this returns, to prevent repeated dumping.
  //
  // TODO(wuke): check overlapping top-level contexts (see
  // `DumpOverlappingContext()` in tests).
  FeatureGroup<A> *Dump(size_t max_future_size);

 private:
  typedef typename FeatureGroup<A>::InputOutputLabel InputOutputLabel;
  typedef typename FeatureGroup<A>::InputOutputLabelHash InputOutputLabelHash;
  typedef typename FeatureGroup<A>::WeightBackLink WeightBackLink;
  // Nested trie topology uses more memory but we can traverse a
  // node's children easily, which is required in `BuildBackLinks()`.
  typedef NestedTrieTopology<InputOutputLabel, InputOutputLabelHash> Topology;
  typedef MutableTrie<InputOutputLabel, WeightBackLink, Topology> Trie;

  // Finds the first node with an arc with `label` following the
  // back-off chain of `parent`. Returns the node index or
  // `kNoTrieNodeId` when not found. The number of hops is stored in
  // `hop` when it is not `nullptr`.
  //
  // This does not fail.
  int FindFirstMatch(InputOutputLabel label, int parent, int *hop) const;

  // Links each node to its immediate back-off. root is linked to -1.
  //
  // This may fail when the unique immediate back-off constraint is
  // violated.
  void BuildBackLinks();

  // Traces back on the back-chain for each node to multiply the
  // weights from back-offs to the node itself.
  //
  // This does not fail.
  void PreAccumulateWeights();

  // Reconstruct the path from trie root to given node for logging.
  bool TrieDfs(const Topology &topology, int cur, int target,
               std::vector<InputOutputLabel> *path) const;
  string TriePath(int node, const Topology &topology) const;

  bool error_;
  size_t future_size_;
  Trie trie_;
  int start_;
  const SymbolTable *fsyms_, *osyms_;

  FeatureGroupBuilder(const FeatureGroupBuilder &) = delete;
  FeatureGroupBuilder &operator=(const FeatureGroupBuilder &) = delete;
};

//
// Implementation of methods in `LinearFstDataBuilder`
//
template <class A>
bool LinearFstDataBuilder<A>::AddWord(Label word,
                                      const std::vector<Label> &features) {
  if (error_) {
    FSTERROR() << "Calling LinearFstDataBuilder<>::AddWord() at error state";
    return false;
  }
  if (word == LinearFstData<A>::kStartOfSentence ||
      word == LinearFstData<A>::kEndOfSentence) {
    LOG(WARNING) << "Ignored: adding boundary label: "
                 << TranslateLabel(word, isyms_)
                 << "(start-of-sentence=" << LinearFstData<A>::kStartOfSentence
                 << ", end-of-sentence=" << LinearFstData<A>::kEndOfSentence
                 << ")";
    return false;
  }
  if (word <= 0) {
    error_ = true;
    FSTERROR() << "Word label must be > 0; got " << word;
    return false;
  }
  if (word > max_input_label_) max_input_label_ = word;
  // Make sure the word hasn't been added before
  if (word_feat_map_.find(word) != word_feat_map_.end()) {
    error_ = true;
    FSTERROR() << "Input word " << TranslateLabel(word, isyms_)
               << " is added twice";
    return false;
  }
  // Store features
  std::set<Label> *feats = &word_feat_map_[word];
  for (size_t i = 0; i < features.size(); ++i) {
    Label feat = features[i];
    if (feat <= 0) {
      error_ = true;
      FSTERROR() << "Feature label must be > 0; got " << feat;
      return false;
    }
    feats->insert(feat);
  }
  return true;
}

template <class A>
bool LinearFstDataBuilder<A>::AddWord(
    Label word, const std::vector<Label> &word_features,
    const std::vector<Label> &possible_output) {
  if (error_) {
    FSTERROR() << "Calling LinearFstDataBuilder<>::AddWord() at error state";
    return false;
  }
  if (!AddWord(word, word_features)) return false;
  // Store possible output constraint
  if (possible_output.empty()) {
    error_ = true;
    FSTERROR() << "Empty possible output constraint; "
               << "use the two-parameter version if no constraint is need.";
    return false;
  }
  std::set<Label> *outputs = &word_output_map_[word];
  for (size_t i = 0; i < possible_output.size(); ++i) {
    Label output = possible_output[i];
    if (output == LinearFstData<A>::kStartOfSentence ||
        output == LinearFstData<A>::kEndOfSentence) {
      LOG(WARNING) << "Ignored: word = " << TranslateLabel(word, isyms_)
                   << ": adding boundary label as possible output: " << output
                   << "(start-of-sentence="
                   << LinearFstData<A>::kStartOfSentence
                   << ", end-of-sentence=" << LinearFstData<A>::kEndOfSentence
                   << ")";
      continue;
    }
    if (output <= 0) {
      error_ = true;
      FSTERROR() << "Output label must be > 0; got " << output;
      return false;
    }
    outputs->insert(output);
    all_output_labels_.Insert(output);
  }
  return true;
}

template <class A>
inline int LinearFstDataBuilder<A>::AddGroup(size_t future_size) {
  if (error_) {
    FSTERROR() << "Calling LinearFstDataBuilder<>::AddGroup() at error state";
    return -1;
  }
  size_t ret = groups_.size();
  groups_.emplace_back(new FeatureGroupBuilder<A>(future_size, fsyms_, osyms_));
  if (future_size > max_future_size_) max_future_size_ = future_size;
  return ret;
}

template <class A>
bool LinearFstDataBuilder<A>::AddWeight(size_t group,
                                        const std::vector<Label> &input,
                                        const std::vector<Label> &output,
                                        Weight weight) {
  if (error_) {
    FSTERROR() << "Calling LinearFstDataBuilder<>::AddWeight() at error state";
    return false;
  }
  // Check well-formedness of boundary marks on the input.
  {
    bool start_in_middle = false, end_in_middle = false;
    for (int i = 1; i < input.size(); ++i) {
      if (input[i] == LinearFstData<A>::kStartOfSentence &&
          input[i - 1] != LinearFstData<A>::kStartOfSentence)
        start_in_middle = true;
      if (input[i - 1] == LinearFstData<A>::kEndOfSentence &&
          input[i] != LinearFstData<A>::kEndOfSentence)
        end_in_middle = true;
    }
    if (start_in_middle) {
      LOG(WARNING) << "Ignored: start-of-sentence in the middle of the input!";
      LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
      LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
      return false;
    }
    if (end_in_middle) {
      LOG(WARNING) << "Ignored: end-of-sentence in the middle of the input!";
      LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
      LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
      return false;
    }
  }
  // Check well-formedness of boundary marks on the output.
  {
    bool non_first_start = false, non_last_end = false;
    for (int i = 1; i < output.size(); ++i) {
      if (output[i] == LinearFstData<A>::kStartOfSentence)
        non_first_start = true;
      if (output[i - 1] == LinearFstData<A>::kEndOfSentence)
        non_last_end = true;
    }
    if (non_first_start) {
      LOG(WARNING) << "Ignored: start-of-sentence not appearing "
                   << "as the first label in the output!";
      LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
      LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
      return false;
    }
    if (non_last_end) {
      LOG(WARNING) << "Ignored: end-of-sentence not appearing "
                   << "as the last label in the output!";
      LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
      LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
      return false;
    }
  }

  for (size_t i = 0; i < input.size(); ++i) {
    Label feat = input[i];
    if (feat != LinearFstData<A>::kStartOfSentence &&
        feat != LinearFstData<A>::kEndOfSentence && feat <= 0) {
      error_ = true;
      FSTERROR() << "Feature label must be > 0; got " << feat;
      return false;
    }
    feat_groups_[feat].insert(group);
  }
  for (size_t i = 0; i < output.size(); ++i) {
    Label label = output[i];
    if (label != LinearFstData<A>::kStartOfSentence &&
        label != LinearFstData<A>::kEndOfSentence && label <= 0) {
      error_ = true;
      FSTERROR() << "Output label must be > 0; got " << label;
      return false;
    }
    if (label != LinearFstData<A>::kStartOfSentence &&
        label != LinearFstData<A>::kEndOfSentence)
      all_output_labels_.Insert(label);
  }

  // Everything looks good at this point (more checks on the way in
  // the feature group). Add this feature weight.
  bool added = groups_[group]->AddWeight(input, output, weight);
  if (groups_[group]->Error()) {
    error_ = true;
    FSTERROR() << "FeatureGroupBuilder<>::AddWeight() failed";
    return false;
  }
  return added;
}

template <class A>
LinearFstData<A> *LinearFstDataBuilder<A>::Dump() {
  if (error_) {
    FSTERROR() << "Calling LinearFstDataBuilder<>::Dump() at error state";
    return nullptr;
  }

  std::unique_ptr<LinearFstData<A>> data(new LinearFstData<A>());
  data->max_future_size_ = max_future_size_;
  data->max_input_label_ = max_input_label_;

  // Feature groups; free builders after it's dumped.
  data->groups_.resize(groups_.size());
  for (int group = 0; group != groups_.size(); ++group) {
    FeatureGroup<A> *new_group = groups_[group]->Dump(max_future_size_);
    if (new_group == nullptr) {
      error_ = true;
      FSTERROR() << "Error in dumping group " << group;
      return nullptr;
    }
    data->groups_[group].reset(new_group);
    groups_[group].reset();
    VLOG(1) << "Group " << group << ": " << new_group->Stats();
  }

  // Per-group feature mapping
  data->group_feat_map_.Init(data->NumGroups(), max_input_label_ + 1);
  for (Label word = 1; word <= max_input_label_; ++word) {
    typename std::map<Label, std::set<Label>>::const_iterator it =
        word_feat_map_.find(word);
    if (it == word_feat_map_.end()) continue;
    for (typename std::set<Label>::const_iterator oit = it->second.begin();
         oit != it->second.end(); ++oit) {
      Label feat = *oit;
      typename std::map<Label, std::set<size_t>>::const_iterator jt =
          feat_groups_.find(feat);
      if (jt == feat_groups_.end()) continue;
      for (std::set<size_t>::const_iterator git = jt->second.begin();
           git != jt->second.end(); ++git) {
        size_t group_id = *git;
        if (!data->group_feat_map_.Set(group_id, word, feat)) {
          error_ = true;
          return nullptr;
        }
      }
    }
  }

  // Possible output labels
  {
    std::vector<typename LinearFstData<A>::InputAttribute> *input_attribs =
        &data->input_attribs_;
    std::vector<Label> *output_pool = &data->output_pool_;
    input_attribs->resize(max_input_label_ + 1);
    for (Label word = 0; word <= max_input_label_; ++word) {
      typename std::map<Label, std::set<Label>>::const_iterator it =
          word_output_map_.find(word);
      if (it == word_output_map_.end()) {
        (*input_attribs)[word].output_begin = 0;
        (*input_attribs)[word].output_length = 0;
      } else {
        (*input_attribs)[word].output_begin = output_pool->size();
        (*input_attribs)[word].output_length = it->second.size();
        for (typename std::set<Label>::const_iterator oit = it->second.begin();
             oit != it->second.end(); ++oit) {
          Label olabel = *oit;
          output_pool->push_back(olabel);
        }
      }
    }
  }

  for (typename CompactSet<Label, kNoLabel>::const_iterator it =
           all_output_labels_.Begin();
       it != all_output_labels_.End(); ++it)
    data->output_set_.push_back(*it);

  error_ = true;  // prevent future calls on this object
  return data.release();
}

//
// Implementation of methods in `LinearClassifierFstDataBuilder`
//
template <class A>
inline bool LinearClassifierFstDataBuilder<A>::AddWord(
    Label word, const std::vector<Label> &features) {
  if (error_) {
    FSTERROR() << "Calling LinearClassifierFstDataBuilder<>::AddWord() at "
                  "error state";
    return false;
  }
  bool added = builder_.AddWord(word, features);
  if (builder_.Error()) error_ = true;
  return added;
}

template <class A>
inline int LinearClassifierFstDataBuilder<A>::AddGroup() {
  if (error_) {
    FSTERROR() << "Calling LinearClassifierFstDataBuilder<>::AddGroup() at "
                  "error state";
    return -1;
  }
  for (int i = 0; i < num_classes_; ++i) builder_.AddGroup(0);
  if (builder_.Error()) {
    error_ = true;
    return -1;
  }
  return num_groups_++;
}

template <class A>
inline bool LinearClassifierFstDataBuilder<A>::AddWeight(
    size_t group, const std::vector<Label> &input, Label pred, Weight weight) {
  if (error_) {
    FSTERROR() << "Calling LinearClassifierFstDataBuilder<>::AddWeight() at "
                  "error state";
    return false;
  }
  if (pred <= 0 || pred > num_classes_) {
    FSTERROR() << "Out-of-range prediction label: " << pred
               << " (num classes = " << num_classes_ << ")";
    error_ = true;
    return false;
  }
  size_t real_group = group * num_classes_ + pred - 1;
  bool added = builder_.AddWeight(real_group, input, empty_, weight);
  if (builder_.Error()) error_ = true;
  return added;
}

template <class A>
inline LinearFstData<A> *LinearClassifierFstDataBuilder<A>::Dump() {
  if (error_) {
    FSTERROR()
        << "Calling LinearClassifierFstDataBuilder<>::Dump() at error state";
    return nullptr;
  }
  LinearFstData<A> *data = builder_.Dump();
  error_ = true;
  return data;
}

//
// Implementation of methods in `FeatureGroupBuilder`
//
template <class A>
bool FeatureGroupBuilder<A>::AddWeight(const std::vector<Label> &input,
                                       const std::vector<Label> &output,
                                       Weight weight) {
  if (error_) {
    FSTERROR() << "Calling FeatureGroupBuilder<>::AddWeight() at error state";
    return false;
  }

  // `LinearFstDataBuilder<>::AddWeight()` ensures prefix/suffix
  // properties for us. We can directly count.
  int num_input_start = 0;
  while (num_input_start < input.size() &&
         input[num_input_start] == LinearFstData<A>::kStartOfSentence)
    ++num_input_start;
  int num_output_start = 0;
  while (num_output_start < output.size() &&
         output[num_output_start] == LinearFstData<A>::kStartOfSentence)
    ++num_output_start;
  int num_input_end = 0;
  for (int i = input.size() - 1;
       i >= 0 && input[i] == LinearFstData<A>::kEndOfSentence; --i)
    ++num_input_end;
  int num_output_end = 0;
  for (int i = output.size() - 1;
       i >= 0 && output[i] == LinearFstData<A>::kEndOfSentence; --i)
    ++num_output_end;

  DCHECK_LE(num_output_end, 1);

  if (input.size() - num_input_start < future_size_) {
    LOG(WARNING) << "Ignored: start-of-sentence in the future!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, fsyms_);
    return false;
  }
  if (num_input_start > 0 &&
      input.size() - future_size_ - num_input_start <
          output.size() - num_output_start) {
    LOG(WARNING) << "Ignored: matching start-of-sentence with actual output!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
    return false;
  }
  if (num_output_start > 0 &&
      input.size() - future_size_ - num_input_start >
          output.size() - num_output_start) {
    LOG(WARNING) << "Ignored: matching start-of-sentence with actual input!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
    return false;
  }
  // The following two require `num_output_end` <= 1.
  if (num_input_end > future_size_ && num_input_end - future_size_ != 1) {
    LOG(WARNING) << "Ignored: matching end-of-sentence with actual output!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
    return false;
  }
  if (num_output_end > 0 &&
      ((input.size() == future_size_ && future_size_ != num_input_end) ||
       (input.size() > future_size_ &&
        num_input_end != future_size_ + num_output_end))) {
    LOG(WARNING) << "Ignored: matching end-of-sentence with actual input!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
    return false;
  }
  // Check if the context has no other labels than boundary marks
  // (such features are useless).
  if (num_input_start + num_input_end == input.size() &&
      num_output_start + num_output_end == output.size()) {
    LOG(WARNING)
        << "Ignored: feature context consisting of only boundary marks!";
    LOG(WARNING) << "\tInput: " << JoinLabels(input, fsyms_);
    LOG(WARNING) << "\tOutput: " << JoinLabels(output, osyms_);
    return false;
  }

  // Start point for insertion in the trie. Insert at `start_` iff the
  // beginning of the context is non-consumed start-of-sentence.
  int cur = (num_input_start == 0 && num_output_start <= future_size_)
                ? trie_.Root()
                : start_;
  // Skip all input start-of-sentence marks
  size_t ipos = num_input_start;
  // Skip to keep at most `future_size_` start-of-sentence marks
  size_t opos =
      num_output_start <= future_size_ ? 0 : num_output_start - future_size_;
  // Skip `num_output_end` end-of-sentence marks on both input and output
  size_t iend = !input.empty() ? input.size() - num_output_end : 0,
         oend = output.size() - num_output_end;
  // Further, when output is empty, keep at most `future_size_`
  // end-of-sentence marks on input.
  if (output.empty() && num_input_end > future_size_)
    iend = input.size() - num_input_end + future_size_;

  // Actual feature context is (input[ipos:iend], output[opos:oend]).

  // Pad `kNoLabel` as don't cares on the shorter of actual `input`
  // and `output`.
  const size_t effective_input_size = iend - ipos,
               effective_output_size = oend - opos;
  if (effective_input_size > effective_output_size) {
    for (size_t pad = effective_input_size - effective_output_size; pad != 0;
         --pad, ++ipos)
      cur = trie_.Insert(cur, InputOutputLabel(input[ipos], kNoLabel));
  } else if (effective_input_size < effective_output_size) {
    for (size_t pad = effective_output_size - effective_input_size; pad != 0;
         --pad, ++opos)
      cur = trie_.Insert(cur, InputOutputLabel(kNoLabel, output[opos]));
  }
  CHECK_EQ(iend - ipos, oend - opos);
  for (; ipos != iend; ++ipos, ++opos)
    cur = trie_.Insert(cur, InputOutputLabel(input[ipos], output[opos]));
  // We only need to attach final weight when there is an output
  // end-of-sentence. When there is only end-of-sentence on the input,
  // they are all consumed as the end-of-sentence paddings from
  // `LinearFstImpl<>::ShiftBuffer()`. `LinearFstImpl<>::Expand()`
  // and `LinearFstImpl<>::MatchInput()` ensures no other
  // transition takes place after consuming the padding.
  if (num_output_end > 0 || (output.empty() && num_input_end > future_size_))
    trie_[cur].final_weight = Times(trie_[cur].final_weight, weight);
  else
    trie_[cur].weight = Times(trie_[cur].weight, weight);

  return true;
}

template <class A>
FeatureGroup<A> *FeatureGroupBuilder<A>::Dump(size_t max_future_size) {
  if (error_) {
    FSTERROR() << "Calling FeatureGroupBuilder<>::PreAccumulateWeights() "
               << "at error state";
    return nullptr;
  }

  if (max_future_size < future_size_) {
    error_ = true;
    FSTERROR() << "max_future_size (= " << max_future_size
               << ") is smaller the builder's future_size (= " << future_size_
               << ")";
    return nullptr;
  }

  BuildBackLinks();
  if (error_) return nullptr;
  PreAccumulateWeights();  // does not fail

  FeatureGroup<A> *ret =
      new FeatureGroup<A>(max_future_size - future_size_, start_);

  // Walk around the trie to compute next states
  ret->next_state_.resize(trie_.NumNodes());
  const Topology &topology = trie_.TrieTopology();
  for (int i = 0; i < topology.NumNodes(); ++i) {
    int next = i;
    while (next != topology.Root() && topology.ChildrenOf(next).empty() &&
           trie_[next].final_weight ==
               trie_[trie_[next].back_link].final_weight)
      next = trie_[next].back_link;
    ret->next_state_[i] = next;
  }

  // Copy the trie
  typename FeatureGroup<A>::Trie store_trie(trie_);
  ret->trie_.swap(store_trie);

  // Put the builder at error state to prevent repeated call of `Dump()`.
  error_ = true;
  return ret;
}

template <class A>
int FeatureGroupBuilder<A>::FindFirstMatch(InputOutputLabel label, int parent,
                                           int *hop) const {
  int hop_count = 0;
  int ret = kNoTrieNodeId;
  for (; parent >= 0; parent = trie_[parent].back_link, ++hop_count) {
    int next = trie_.Find(parent, label);
    if (next != kNoTrieNodeId) {
      ret = next;
      break;
    }
  }
  if (hop != nullptr) *hop = hop_count;
  return ret;
}

template <class A>
void FeatureGroupBuilder<A>::BuildBackLinks() {
  // Breadth first search from the root. In the case where we only
  // have the input label, the immedate back-off is simply the longest
  // suffix of the current node that is also in the trie. For a node
  // reached from its parent with label L, we can simply walk through
  // the parent's back-off chain to find the first state with an arc
  // of the same label L. The uniqueness is always
  // guanranteed. However, in the case with both input and output
  // labels, it is possible to back off by removing first labels from
  // either side, which in general causes non-uniqueness.

  const Topology &topology = trie_.TrieTopology();
  std::queue<int> q;  // all enqueued or visited nodes have known links

  // Note: nodes have back link initialized to -1 in their
  // constructor.
  q.push(trie_.Root());
  while (!error_ && !q.empty()) {
    int parent = q.front();
    q.pop();
    // Find links for every child
    const typename Topology::NextMap &children = topology.ChildrenOf(parent);
    for (typename Topology::NextMap::const_iterator eit = children.begin();
         eit != children.end(); ++eit) {
      const std::pair<InputOutputLabel, int> &edge = *eit;
      InputOutputLabel label = edge.first;
      int child = edge.second;
      if (label.input == kNoLabel || label.output == kNoLabel) {
        // Label pairs from root to here all have one and only one
        // `kNoLabel` on the same side; equivalent to the
        // "longest-suffix" case.
        trie_[child].back_link =
            FindFirstMatch(label, trie_[parent].back_link, nullptr);
      } else {
        // Neither side is `kNoLabel` at this point, there are
        // three possible ways to back-off: if the parent backs
        // off to some context with only one side non-empty, the
        // empty side may remain empty; or else an exact match of
        // both sides is needed. Try to find all three possible
        // backs and look for the closest one (in terms of hops
        // along the parent's back-off chain).
        int only_input_hop, only_output_hop, full_hop;
        int only_input_link =
                FindFirstMatch(InputOutputLabel(label.input, kNoLabel), parent,
                               &only_input_hop),
            only_output_link =
                FindFirstMatch(InputOutputLabel(kNoLabel, label.output), parent,
                               &only_output_hop),
            full_link =
                FindFirstMatch(label, trie_[parent].back_link, &full_hop);
        if (only_input_link != -1 && only_output_link != -1) {
          error_ = true;
          FSTERROR() << "Branching back-off chain:\n"
                     << "\tnode " << child << ": " << TriePath(child, topology)
                     << "\n"
                     << "\tcan back-off to node " << only_input_link << ": "
                     << TriePath(only_input_link, topology) << "\n"
                     << "\tcan back-off to node " << only_output_link << ": "
                     << TriePath(only_output_link, topology);
          return;
        } else if (full_link != -1) {
          ++full_hop;
          if (full_hop <= only_input_hop && full_hop <= only_output_hop) {
            trie_[child].back_link = full_link;
          } else {
            error_ = true;
            int problem_link = only_input_link != kNoTrieNodeId
                                   ? only_input_link
                                   : only_output_link;
            CHECK_NE(problem_link, kNoTrieNodeId);
            FSTERROR() << "Branching back-off chain:\n"
                       << "\tnode " << child << ": "
                       << TriePath(child, topology) << "\n"
                       << "\tcan back-off to node " << full_link << ": "
                       << TriePath(full_link, topology) << "\n"
                       << "tcan back-off to node " << problem_link << ": "
                       << TriePath(problem_link, topology);
            return;
          }
        } else {
          trie_[child].back_link =
              only_input_link != -1 ? only_input_link : only_output_link;
        }
      }
      if (error_) break;
      // Point to empty context (root) when no back-off can be found
      if (trie_[child].back_link == -1) trie_[child].back_link = 0;
      q.push(child);
    }
  }
}

template <class A>
void FeatureGroupBuilder<A>::PreAccumulateWeights() {
  std::vector<bool> visited(trie_.NumNodes(), false);
  visited[trie_.Root()] = true;

  for (size_t i = 0; i != trie_.NumNodes(); ++i) {
    std::stack<int> back_offs;
    for (int j = i; !visited[j]; j = trie_[j].back_link) back_offs.push(j);
    while (!back_offs.empty()) {
      int j = back_offs.top();
      back_offs.pop();
      WeightBackLink &node = trie_[j];
      node.weight = Times(node.weight, trie_[node.back_link].weight);
      node.final_weight =
          Times(node.final_weight, trie_[node.back_link].final_weight);
      visited[j] = true;
    }
  }
}

template <class A>
bool FeatureGroupBuilder<A>::TrieDfs(
    const Topology &topology, int cur, int target,
    std::vector<InputOutputLabel> *path) const {
  if (cur == target) return true;
  const typename Topology::NextMap &children = topology.ChildrenOf(cur);
  for (typename Topology::NextMap::const_iterator eit = children.begin();
       eit != children.end(); ++eit) {
    const std::pair<InputOutputLabel, int> &edge = *eit;
    path->push_back(edge.first);
    if (TrieDfs(topology, edge.second, target, path)) return true;
    path->pop_back();
  }
  return false;
}

template <class A>
string FeatureGroupBuilder<A>::TriePath(int node,
                                        const Topology &topology) const {
  std::vector<InputOutputLabel> labels;
  TrieDfs(topology, topology.Root(), node, &labels);
  bool first = true;
  std::ostringstream strm;
  for (typename std::vector<InputOutputLabel>::const_iterator it =
           labels.begin();
       it != labels.end(); ++it) {
    InputOutputLabel i = *it;
    if (first)
      first = false;
    else
      strm << ", ";
    strm << "(" << TranslateLabel(i.input, fsyms_) << ", "
         << TranslateLabel(i.output, osyms_) << ")";
  }
  return strm.str();
}

inline string TranslateLabel(int64 label, const SymbolTable *syms) {
  string ret;
  if (syms != nullptr) ret += syms->Find(label);
  if (ret.empty()) {
    std::ostringstream strm;
    strm << '<' << label << '>';
    ret = strm.str();
  }
  return ret;
}

template <class Iterator>
string JoinLabels(Iterator begin, Iterator end, const SymbolTable *syms) {
  if (begin == end) return "<empty>";
  std::ostringstream strm;
  bool first = true;
  for (Iterator it = begin; it != end; ++it) {
    if (first)
      first = false;
    else
      strm << '|';
    strm << TranslateLabel(*it, syms);
  }
  return strm.str();
}

template <class Label>
string JoinLabels(const std::vector<Label> &labels, const SymbolTable *syms) {
  return JoinLabels(labels.begin(), labels.end(), syms);
}

template <class A>
typename A::Label GuessStartOrEnd(std::vector<typename A::Label> *sequence,
                                  typename A::Label boundary) {
  const size_t length = sequence->size();
  std::vector<bool> non_boundary_on_left(length, false),
      non_boundary_on_right(length, false);
  for (size_t i = 1; i < length; ++i) {
    non_boundary_on_left[i] =
        non_boundary_on_left[i - 1] || (*sequence)[i - 1] != boundary;
    non_boundary_on_right[length - 1 - i] = non_boundary_on_right[length - i] ||
                                            (*sequence)[length - i] != boundary;
  }
  int unresolved = 0;
  for (size_t i = 0; i < length; ++i) {
    if ((*sequence)[i] != boundary) continue;
    const bool left = non_boundary_on_left[i], right = non_boundary_on_right[i];
    if (left && right) {
      // Boundary in the middle
      LOG(WARNING) << "Boundary label in the middle of the sequence! position: "
                   << i << "; boundary: " << boundary
                   << "; sequence: " << JoinLabels(*sequence, nullptr);
      LOG(WARNING)
          << "This is an invalid sequence anyway so I will set it to start.";
      (*sequence)[i] = LinearFstData<A>::kStartOfSentence;
    } else if (left && !right) {
      // Can only be end
      (*sequence)[i] = LinearFstData<A>::kEndOfSentence;
    } else if (!left && right) {
      // Can only be start
      (*sequence)[i] = LinearFstData<A>::kStartOfSentence;
    } else {
      // !left && !right; can't really tell
      ++unresolved;
    }
  }
  return unresolved;
}

}  // namespace fst

#endif  // FST_EXTENSIONS_LINEAR_LINEAR_FST_DATA_BUILDER_H_