This file is indexed.

/usr/include/mlpack/methods/hmm/hmm.hpp is in libmlpack-dev 2.2.5-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/**
 * @file hmm.hpp
 * @author Ryan Curtin
 * @author Tran Quoc Long
 * @author Michael Fox
 *
 * Definition of HMM class.
 *
 * mlpack is free software; you may redistribute it and/or modify it under the
 * terms of the 3-clause BSD license.  You should have received a copy of the
 * 3-clause BSD license along with mlpack.  If not, see
 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
 */
#ifndef MLPACK_METHODS_HMM_HMM_HPP
#define MLPACK_METHODS_HMM_HMM_HPP

#include <mlpack/prereqs.hpp>
#include <mlpack/core/dists/discrete_distribution.hpp>

namespace mlpack {
namespace hmm /** Hidden Markov Models. */ {

/**
 * A class that represents a Hidden Markov Model with an arbitrary type of
 * emission distribution.  This HMM class supports training (supervised and
 * unsupervised), prediction of state sequences via the Viterbi algorithm,
 * estimation of state probabilities, generation of random sequences, and
 * calculation of the log-likelihood of a given sequence.
 *
 * The template parameter, Distribution, specifies the distribution which the
 * emissions follow.  The class should implement the following functions:
 *
 * @code
 * class Distribution
 * {
 *  public:
 *   // The type of observation used by this distribution.
 *   typedef something DataType;
 *
 *   // Return the probability of the given observation.
 *   double Probability(const DataType& observation) const;
 *
 *   // Estimate the distribution based on the given observations.
 *   void Train(const std::vector<DataType>& observations);
 *
 *   // Estimate the distribution based on the given observations, given also
 *   // the probability of each observation coming from this distribution.
 *   void Train(const std::vector<DataType>& observations,
 *              const std::vector<double>& probabilities);
 * };
 * @endcode
 *
 * See the mlpack::distribution::DiscreteDistribution class for an example.  One
 * would use the DiscreteDistribution class when the observations are
 * non-negative integers.  Other distributions could be Gaussians, a mixture of
 * Gaussians (GMM), or any other probability distribution implementing the
 * four Distribution functions.
 *
 * Usage of the HMM class generally involves either training an HMM or loading
 * an already-known HMM and taking probability measurements of sequences.
 * Example code for supervised training of a Gaussian HMM (that is, where the
 * emission output distribution is a single Gaussian for each hidden state) is
 * given below.
 *
 * @code
 * extern arma::mat observations; // Each column is an observation.
 * extern arma::Row<size_t> states; // Hidden states for each observation.
 * // Create an untrained HMM with 5 hidden states and default (N(0, 1))
 * // Gaussian distributions with the dimensionality of the dataset.
 * HMM<GaussianDistribution> hmm(5, GaussianDistribution(observations.n_rows));
 *
 * // Train the HMM (the labels could be omitted to perform unsupervised
 * // training).
 * hmm.Train(observations, states);
 * @endcode
 *
 * Once initialized, the HMM can evaluate the probability of a certain sequence
 * (with LogLikelihood()), predict the most likely sequence of hidden states
 * (with Predict()), generate a sequence (with Generate()), or estimate the
 * probabilities of each state for a sequence of observations (with Train()).
 *
 * @tparam Distribution Type of emission distribution for this HMM.
 */
template<typename Distribution = distribution::DiscreteDistribution>
class HMM
{
 public:
  /**
   * Create the Hidden Markov Model with the given number of hidden states and
   * the given default distribution for emissions.  The dimensionality of the
   * observations is taken from the emissions variable, so it is important that
   * the given default emission distribution is set with the correct
   * dimensionality.  Alternately, set the dimensionality with Dimensionality().
   * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
   * be set.
   *
   * By default, the transition matrix and initial probability vector are set to
   * contain equal probability for each state.
   *
   * @param states Number of states.
   * @param emissions Default distribution for emissions.
   * @param tolerance Tolerance for convergence of training algorithm
   *      (Baum-Welch).
   */
  HMM(const size_t states = 0,
      const Distribution emissions = Distribution(),
      const double tolerance = 1e-5);

  /**
   * Create the Hidden Markov Model with the given initial probability vector,
   * the given transition matrix, and the given emission distributions.  The
   * dimensionality of the observations of the HMM are taken from the given
   * emission distributions.  Alternately, the dimensionality can be set with
   * Dimensionality().
   *
   * The initial state probability vector should have length equal to the number
   * of states, and each entry represents the probability of being in the given
   * state at time T = 0 (the beginning of a sequence).
   *
   * The transition matrix should be such that T(i, j) is the probability of
   * transition to state i from state j.  The columns of the matrix should sum
   * to 1.
   *
   * The emission matrix should be such that E(i, j) is the probability of
   * emission i while in state j.  The columns of the matrix should sum to 1.
   *
   * Optionally, the tolerance for convergence of the Baum-Welch algorithm can
   * be set.
   *
   * @param initial Initial state probabilities.
   * @param transition Transition matrix.
   * @param emission Emission distributions.
   * @param tolerance Tolerance for convergence of training algorithm
   *      (Baum-Welch).
   */
  HMM(const arma::vec& initial,
      const arma::mat& transition,
      const std::vector<Distribution>& emission,
      const double tolerance = 1e-5);

  /**
   * Train the model using the Baum-Welch algorithm, with only the given
   * unlabeled observations.  Instead of giving a guess transition and emission
   * matrix here, do that in the constructor.  Each matrix in the vector of data
   * sequences holds an individual data sequence; each point in each individual
   * data sequence should be a column in the matrix.  The number of rows in each
   * matrix should be equal to the dimensionality of the HMM (which is set in
   * the constructor).
   *
   * It is preferable to use the other overload of Train(), with labeled data.
   * That will produce much better results.  However, if labeled data is
   * unavailable, this will work.  In addition, it is possible to use Train()
   * with labeled data first, and then continue to train the model using this
   * overload of Train() with unlabeled data.
   *
   * The tolerance of the Baum-Welch algorithm can be set either in the
   * constructor or with the Tolerance() method.  When the change in
   * log-likelihood of the model between iterations is less than the tolerance,
   * the Baum-Welch algorithm terminates.
   *
   * @note
   * Train() can be called multiple times with different sequences; each time it
   * is called, it uses the current parameters of the HMM as a starting point
   * for training.
   * @endnote
   *
   * @param dataSeq Vector of observation sequences.
   */
  void Train(const std::vector<arma::mat>& dataSeq);

  /**
   * Train the model using the given labeled observations; the transition and
   * emission matrices are directly estimated.  Each matrix in the vector of
   * data sequences corresponds to a vector in the vector of state sequences.
   * Each point in each individual data sequence should be a column in the
   * matrix, and its state should be the corresponding element in the state
   * sequence vector.  For instance, dataSeq[0].col(3) corresponds to the fourth
   * observation in the first data sequence, and its state is stateSeq[0][3].
   * The number of rows in each matrix should be equal to the dimensionality of
   * the HMM (which is set in the constructor).
   *
   * @note
   * Train() can be called multiple times with different sequences; each time it
   * is called, it uses the current parameters of the HMM as a starting point
   * for training.
   * @endnote
   *
   * @param dataSeq Vector of observation sequences.
   * @param stateSeq Vector of state sequences, corresponding to each
   *     observation.
   */
  void Train(const std::vector<arma::mat>& dataSeq,
             const std::vector<arma::Row<size_t> >& stateSeq);

  /**
   * Estimate the probabilities of each hidden state at each time step for each
   * given data observation, using the Forward-Backward algorithm.  Each matrix
   * which is returned has columns equal to the number of data observations, and
   * rows equal to the number of hidden states in the model.  The log-likelihood
   * of the most probable sequence is returned.
   *
   * @param dataSeq Sequence of observations.
   * @param stateProb Matrix in which the probabilities of each state at each
   *    time interval will be stored.
   * @param forwardProb Matrix in which the forward probabilities of each state
   *    at each time interval will be stored.
   * @param backwardProb Matrix in which the backward probabilities of each
   *    state at each time interval will be stored.
   * @param scales Vector in which the scaling factors at each time interval
   *    will be stored.
   * @return Log-likelihood of most likely state sequence.
   */
  double Estimate(const arma::mat& dataSeq,
                  arma::mat& stateProb,
                  arma::mat& forwardProb,
                  arma::mat& backwardProb,
                  arma::vec& scales) const;

  /**
   * Estimate the probabilities of each hidden state at each time step of each
   * given data observation, using the Forward-Backward algorithm.  The returned
   * matrix of state probabilities has columns equal to the number of data
   * observations, and rows equal to the number of hidden states in the model.
   * The log-likelihood of the most probable sequence is returned.
   *
   * @param dataSeq Sequence of observations.
   * @param stateProb Probabilities of each state at each time interval.
   * @return Log-likelihood of most likely state sequence.
   */
  double Estimate(const arma::mat& dataSeq,
                  arma::mat& stateProb) const;

  /**
   * Generate a random data sequence of the given length.  The data sequence is
   * stored in the dataSequence parameter, and the state sequence is stored in
   * the stateSequence parameter.  Each column of dataSequence represents a
   * random observation.
   *
   * @param length Length of random sequence to generate.
   * @param dataSequence Vector to store data in.
   * @param stateSequence Vector to store states in.
   * @param startState Hidden state to start sequence in (default 0).
   */
  void Generate(const size_t length,
                arma::mat& dataSequence,
                arma::Row<size_t>& stateSequence,
                const size_t startState = 0) const;

  /**
   * Compute the most probable hidden state sequence for the given data
   * sequence, using the Viterbi algorithm, returning the log-likelihood of the
   * most likely state sequence.
   *
   * @param dataSeq Sequence of observations.
   * @param stateSeq Vector in which the most probable state sequence will be
   *    stored.
   * @return Log-likelihood of most probable state sequence.
   */
  double Predict(const arma::mat& dataSeq,
                 arma::Row<size_t>& stateSeq) const;

  /**
   * Compute the log-likelihood of the given data sequence.
   *
   * @param dataSeq Data sequence to evaluate the likelihood of.
   * @return Log-likelihood of the given sequence.
   */
  double LogLikelihood(const arma::mat& dataSeq) const;

  /**
   * HMM filtering. Computes the k-step-ahead expected emission at each time
   * conditioned only on prior observations. That is
   * E{ Y[t+k] | Y[0], ..., Y[t] }.
   * The returned matrix has columns equal to the number of observations. Note
   * that the expectation may not be meaningful for discrete emissions.
   *
   * @param dataSeq Sequence of observations.
   * @param filterSeq Vector in which the expected emission sequence will be
   *    stored.
   * @param ahead Number of steps ahead (k) for expectations.
   */
  void Filter(const arma::mat& dataSeq,
              arma::mat& filterSeq,
              size_t ahead = 0) const;

  /**
   * HMM smoothing. Computes expected emission at each time conditioned on all
   * observations. That is
   * E{ Y[t] | Y[0], ..., Y[T] }.
   * The returned matrix has columns equal to the number of observations. Note
   * that the expectation may not be meaningful for discrete emissions.
   *
   * @param dataSeq Sequence of observations.
   * @param smoothSeq Vector in which the expected emission sequence will be
   *    stored.
   */
  void Smooth(const arma::mat& dataSeq,
              arma::mat& smoothSeq) const;

  //! Return the vector of initial state probabilities.
  const arma::vec& Initial() const { return initial; }
  //! Modify the vector of initial state probabilities.
  arma::vec& Initial() { return initial; }

  //! Return the transition matrix.
  const arma::mat& Transition() const { return transition; }
  //! Return a modifiable transition matrix reference.
  arma::mat& Transition() { return transition; }

  //! Return the emission distributions.
  const std::vector<Distribution>& Emission() const { return emission; }
  //! Return a modifiable emission probability matrix reference.
  std::vector<Distribution>& Emission() { return emission; }

  //! Get the dimensionality of observations.
  size_t Dimensionality() const { return dimensionality; }
  //! Set the dimensionality of observations.
  size_t& Dimensionality() { return dimensionality; }

  //! Get the tolerance of the Baum-Welch algorithm.
  double Tolerance() const { return tolerance; }
  //! Modify the tolerance of the Baum-Welch algorithm.
  double& Tolerance() { return tolerance; }

  /**
   * Serialize the object.
   */
  template<typename Archive>
  void Serialize(Archive& ar, const unsigned int version);

 protected:
  // Helper functions.
  /**
   * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
   * forward probabilities for each state for each observation in the given data
   * sequence.  The returned matrix has rows equal to the number of hidden
   * states and columns equal to the number of observations.
   *
   * @param dataSeq Data sequence to compute probabilities for.
   * @param scales Vector in which scaling factors will be saved.
   * @param forwardProb Matrix in which forward probabilities will be saved.
   */
  void Forward(const arma::mat& dataSeq,
               arma::vec& scales,
               arma::mat& forwardProb) const;

  /**
   * The Backward algorithm (part of the Forward-Backward algorithm).  Computes
   * backward probabilities for each state for each observation in the given
   * data sequence, using the scaling factors found (presumably) by Forward().
   * The returned matrix has rows equal to the number of hidden states and
   * columns equal to the number of observations.
   *
   * @param dataSeq Data sequence to compute probabilities for.
   * @param scales Vector of scaling factors.
   * @param backwardProb Matrix in which backward probabilities will be saved.
   */
  void Backward(const arma::mat& dataSeq,
                const arma::vec& scales,
                arma::mat& backwardProb) const;

  //! Set of emission probability distributions; one for each state.
  std::vector<Distribution> emission;

  //! Transition probability matrix.
  arma::mat transition;

 private:
  //! Initial state probability vector.
  arma::vec initial;

  //! Dimensionality of observations.
  size_t dimensionality;

  //! Tolerance of Baum-Welch algorithm.
  double tolerance;
};

} // namespace hmm
} // namespace mlpack

// Include implementation.
#include "hmm_impl.hpp"

#endif