This file is indexed.

/usr/include/sphinx3/cont_mgau.h is in libs3decoder-dev 0.8-0ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
 * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * This work was supported in part by funding from the Defense Advanced 
 * Research Projects Agency and the National Science Foundation of the 
 * United States of America, and the CMU Sphinx Speech Consortium.
 *
 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 *
 */
/*
 * cont_mgau.h -- Mixture Gaussians for continuous HMM models.
 *
 * **********************************************
 * CMU ARPA Speech Project
 *
 * Copyright (c) 1997 Carnegie Mellon University.
 * ALL RIGHTS RESERVED.
 * **********************************************
 *
 * HISTORY
 * $Log$
 * Revision 1.1  2006/04/05  20:27:30  dhdfu
 * A Great Reorganzation of header files and executables
 * 
 * Revision 1.15  2006/02/22 16:32:40  arthchan2003
 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Dave's change in 1.18 for removing temp_hack is followed 2, The logic of uninit_compact is changed, by default the code will remove a Gaussian if it has zero mean AND zero variance. The old behavior (removal if Gaussian has zero mean.) could be retained if specifying -remove_zero_var_gau=1, 3, Fix issue in  .
 *
 *
 * Revision 1.13.4.3  2005/07/05 21:28:57  arthchan2003
 * 1, Merged from HEAD. 2, Remove redundant keyword in cont_mgau.
 *
 * Revision 1.13.4.2  2005/07/05 06:48:54  arthchan2003
 * Merged from HEAD.
 *
 * Revision 1.14  2005/07/04 20:57:53  dhdfu
 * Finally remove the "temporary hack" for the endpointer, and do
 * everything in logs3 domain.  Should make it faster and less likely to
 * crash on Alphas.
 *
 * Actually it kind of duplicates the existing GMM computation functions,
 * but it is slightly different (see the comment in classify.c).  I don't
 * know the rationale for this.
 *
 * Revision 1.13.4.1  2005/07/05 05:47:59  arthchan2003
 * Fixed dox-doc. struct level of documentation are included.
 *
 *
 * Revision 1.13  2005/06/21 18:06:45  arthchan2003
 *
 * Log. 1, Fixed Doxygen documentation. 2, Added $Log$
 * Revision 1.1  2006/04/05  20:27:30  dhdfu
 * A Great Reorganzation of header files and executables
 * 
 * Log. 1, Fixed Doxygen documentation. 2, Added Revision 1.15  2006/02/22 16:32:40  arthchan2003
 * Log. 1, Fixed Doxygen documentation. 2, Added Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Dave's change in 1.18 for removing temp_hack is followed 2, The logic of uninit_compact is changed, by default the code will remove a Gaussian if it has zero mean AND zero variance. The old behavior (removal if Gaussian has zero mean.) could be retained if specifying -remove_zero_var_gau=1, 3, Fix issue in  .
 * Log. 1, Fixed Doxygen documentation. 2, Added
 *
 * Revision 1.4  2005/06/13 04:02:55  archan
 * Fixed most doxygen-style documentation under libs3decoder.
 *
 * Revision 1.3  2005/03/30 01:22:46  archan
 * Fixed mistakes in last updates. Add
 *
 * 
 * 20.Apr.2001  RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
 *              Added mgau_free to free memory allocated by mgau_init()
 * 15-Dec-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
 * 		Added mgau_model_t.{frm_sen_eval,frm_gau_eval}.
 * 		Added mgau_var_nzvec_floor().

 * 
 * 28-Mar-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
 * 		Started.
 */


#ifndef _S3_CONT_MGAU_H_
#define _S3_CONT_MGAU_H_

#include <s3types.h>
#include <logmath.h>

/** \file cont_mgau.h
 *  \brief Interface of full GMM computation with integer value of log likelihood. 
 */
#ifdef __cplusplus
extern "C" {
#endif
#if 0
} /* Fool Emacs into not indenting things. */
#endif

/**
 * Specification of sets of GMM 
 */

#define CONTHMM  10001 /**(NOT USED, for backward compatibility only)  */
#define SEMIHMM  10002 /**(NOT USED, for backward compatibllity only)  */

#define FULL_INT_COMP 20001  /** (NOT USED) Use full integer computation */
#define FULL_FLOAT_COMP 20002 /** (NOT USED) Use full floating point computation */
#define MIX_INT_FLOAT_COMP 20003 /** (NOT USED) Use mixture of integer and floating point computation */

#define MGAU_MEAN		1 /** Constant specifying the mean is used */
#define MGAU_VAR		2 /** Constant specifying the variance is used */
#define MGAU_FULLVAR		3 /** Constant specifying the full covariance is used */

#define NO_BSTIDX               -1 /** When there is no best index */
#define NOT_UPDATED              -100 /** Constant defined that a gaussian is not updated. */

/**
 * Mixture Gaussians: Weighted set of Gaussian densities, each with its own mean vector and
 * diagonal covariance matrix.  Specialized for continuous HMMs to improve speed performance.
 * So, a separate mixture Gaussian, with its own mixture weights, for each HMM state.  Also,
 * a single feature stream assumed.  (In other words, the mgau_t structure below represents
 * a senone in a fully continuous HMM model.)
 * 
 * Given a Gaussian density with mean vector m and diagonal variance vector v, and some
 * input vector x, all of length n, the Mahalanobis distance of x from the Gaussian mean m
 * is given by:
 *     {1/sqrt((2pi)^n * det(v))} * exp{-Sum((x[i] - m[i])^2 / (2v[i]))}
 * To speed up this evaluation, the first sub-expression ({1/sqrt...}) can be precomputed at
 * initialization, and so can 1/2v[i] in the second sub-expression.  Secondly, recognition
 * systems work with log-likelihood values, so these distances or likelihood values are
 * computed in log-domain.  Finally, float32 operations are costlier than int32 ones, so
 * the log-values are converted to logs3 domain (see libmisc/logs3.h) (but before the mixing
 * weights are applied).  Thus, to reiterate, the final scores are (int32) logs3 values.
 */

/* 20040826 ARCHAN: 
 * Introduced hook to the GMM definition to allow gaussian computation using full float 
 * operations. Also added another hook that allows potential use of full covariance matrix. 
 * At this point, full covariance matrix computation was not fully implemented. 
 * If comp_type = MIX_INT_FLOAT_COMP, then the interger pointer will be used to store the 
 * mixture weight.  It is also important to initialize the logs3 routine separately.
 * If comp_type = FULL_FLOAT_COMP, then the floating point pointer will be used to store the mixture
 * weights. 
 */

/**
 * \struct mgau_t
 * \brief A single mixture-Gaussian model for one senone (see above comment).
 */
typedef struct {
    int32 n_comp;	/**< #Component Gaussians in this mixture.  NOTE: May be 0 (for the
			   untrained states). */

    int32 bstidx;         /**< Index for the most likely Gaussians in this mixture of component. It persists through time. */ 
    int32 bstscr;         /**< Scores for the most likely Gaussians in this mixture of component.*/
    int32 updatetime;     /**< Update time */

    /* Definition for mean */
    float32 **mean;	/**< The n_comp means of the Gaussians. The mean vector for a single mixture-Gaussian model for one senone. Dimension: n_comp * dimension */

    /* Definition for variances */
    float32 **var;	/**< The n_comp (diagonal) variances of the Gaussians.  Could be
			   converted to 1/(2*var) for faster computation (see above comment).  The diagonal variance vector for a single mixture-Gaussian model for one senone. Dimension: n_comp * dimension */

    float32 ***fullvar;   /* The n_comp (full) variances of the Gaussians. */
    /* A full co-variance matrix for a single mixture-Gaussian model for one senone */
    /* Dimension: n_comp * dimension * dimension */

    /* Definition for the log reciprocal terms */
    float32 *lrd;	        /**< Log(Reciprocal(Determinant (variance))).  (Then there is also a
				   (2pi)^(veclen) involved...) */

    /* Definitions for the mixture weights */
    int32 *mixw;	        /**< Mixture weights for the n_comp components (int32 instead of float32
				   because these values are in logs3 domain)*/
    float32 *mixw_f;      /**< (ONLY USED IN ENDPOINTER) mixture weights for the n_comp
                             components in float32 */
} mgau_t;



/**
 * \struct mgau_model_t
 * \brief The set of mixture-Gaussians in an acoustic model used in Sphinx 3.X family of tool
 */
typedef struct {
    int32 n_mgau;	/**< #Mixture Gaussians in this model (i.e., #senones) */
    int32 max_comp;	/**< Max components in any mixture */
    int32 veclen;	/**< Vector length of the Gaussian density means (and diagonal vars) */
    mgau_t *mgau;	/**< The n_mgau mixture Gaussians */
    float64 distfloor;	/**< Mahalanobis distances can underflow when finally converted to
                           logs3 values.  To prevent this, floor the log values first. */
    int32 comp_type;  /**< Type of computation used in this set of mixture-Gaussians*/		     
    int32 verbose;    /**< Whether to display information */

    /* Used only in the flat lexicon decoder, statistics */
    int32 frm_sen_eval;		/**< #Senones evaluated in the most recent frame */
    int32 frm_gau_eval;		/**< #Gaussian densities evaluated in the most recent frame */
    int32 frm_ci_sen_eval;        /**< #CI Senones evaluated in most recent frame*/
    int32 frm_ci_gau_eval;        /**< #CI Senones evaluated in most recent frame*/

    int32 gau_type; /**< gau_type=CONTHMM if it is fully continous HMM, 
                       gau_type=SEMIHMM if it is semi continous HMM. Currently SEMIHMM is not supported. */

    logmath_t *logmath;		/**< The logmath_t structure */
} mgau_model_t;


/** Access macros */
/** \def mgau_n_mgau
    Access number of GMMs
    \def mgau_max_comp
    Access the maximum number of components. It can be different across GMMs
    \def mgau_veclen
    Access an integer array contains the size of each stream
    \def mgau_n_comp
    Access the number of component for a particular mixture in a GMM
    \def mgau_mean 
    Access the mean
    \def mgau_var
    Access the variance
    \def mgau_lrd
    Access the floating point version of the Gaussian constant
    \def mgau_lrd
    Access the integer version of the Gaussian constant
    \def mgau_frm_sen_eval
    Number of senones evaluated in this frame
    \def mgau_frm_gau_eval
    Number of gaussians evaluated in this frame
    \def mgau_frm_ci_sen_eval
    Number of CI senones evaluated in this frame
    \def mgau_frm_ci_gau_eval
    Number of CI gaussians evaluated in this frame

*/
#define mgau_n_mgau(g)		((g)->n_mgau)
#define mgau_max_comp(g)	((g)->max_comp)
#define mgau_veclen(g)		((g)->veclen)
#define mgau_n_comp(g,m)	((g)->mgau[m].n_comp)
#define mgau_mean(g,m,c)	((g)->mgau[m].mean[c])
#define mgau_var(g,m,c)		((g)->mgau[m].var[c])
#define mgau_fullvar(g,m,c)	((g)->mgau[m].fullvar[c])
#define mgau_mixw(g,m,c)	((g)->mgau[m].mixw[c])
#define mgau_mixw_f(g,m,c)	((g)->mgau[m].mixw_f[c])
#define mgau_lrd(g,m,c)		((g)->mgau[m].lrd[c])
#define mgau_lrdi(g,m,c)	((g)->mgau[m].lrdi[c])
#define mgau_frm_sen_eval(g)	((g)->frm_sen_eval)
#define mgau_frm_gau_eval(g)	((g)->frm_gau_eval)
#define mgau_frm_cisen_eval(g)	((g)->frm_ci_sen_eval)
#define mgau_frm_cigau_eval(g)	((g)->frm_ci_gau_eval)

/**
 * Create a new mixture Gaussian model from the given files (Sphinx3 format).  Optionally,
 * apply the precomputations mentioned in the main comment above.
 * @see mgau_file_read
 * @see mgau_mixw_read
 * @see mgau_uninit_compact
 * @see mgau_var_floor
 * @see mgau_precomp
 * @return pointer to the model created if successful; NULL if error.
 */
S3DECODER_EXPORT
mgau_model_t *
mgau_init (const char *meanfile,	/**< In: File containing means of mixture gaussians */
	   const char *varfile,		/**< In: File containing variances of mixture gaussians */
	   float64 varfloor,		/**< In: Floor value applied to variances; e.g., 0.0001 */
	   const char *mixwfile,	/**< In: File containing mixture weights */
	   float64 mixwfloor,		/**< In: Floor value for mixture weights; e.g., 0.0000001 */
	   int32 precomp,		/**< In: If TRUE, create and precompute mgau_t.lrd and also
						   transform each var value to 1/(2*var).  (If FALSE, one
						   cannot use the evaluation routines provided here.) */
	   const char* senmgau,		/**< In: type of the gaussians distribution, .cont. or .semi. FIX 
						   me! This is confusing!*/
	   int32 comp_type,		/**< In: Type of computation in this set of gaussian mixtures. */
	   logmath_t *logmath);
				

/**
 * Floor any variance vector that is non-zero (vector).
 * @return No. of variance VALUES floored.
 */
int32 mgau_var_nzvec_floor (mgau_model_t *g,  /**< In: A mixture of Gaussian components */
			    float64 floor /**< In: The floor value */
    );


/**
 * Evaluate a single mixture Gaussian at the given vector x; i.e., compute the Mahalanobis
 * distance of x from each mean in the mixture, and combine them using the mixture weights.
 * Return value: The final score from this evaluation (a logs3 domain value).  NOTE: if the
 * specified mixture is empty, S3_LOGPROB_ZERO is returned (see libmisc/libmisc.h).
 * @return the senone score.
 */
/* The hybrid integer and floating point implementation of GMM computation */

S3DECODER_EXPORT
int32 mgau_eval (mgau_model_t *g,	/**< In: The entire mixture Gaussian model */
	   int32 m,		/**< In: The chosen mixture in the model (i.e., g->mgau[m]) */
	   int32 *active_comp,	/**< In: An optional, -1 terminated list of active component
				   indices; if non-NULL, only the specified components are
				   used in the evaluation. */
	   float32 *x, /**< In: Input observation vector (of length g->veclen). */
	   int32 fr,    /**< In: Frame number where GMM m is updated */
	   int32 bUpdBstIdx    /**< In: Whether the best index for the GMM will be updated or not */
    );		

/**
 * Like mgau_eval, but return the scores of the individual components, instead of combining
 * them into a senone score.  
 * @return: Best component score.
 */
S3DECODER_EXPORT
int32 mgau_comp_eval (mgau_model_t *g,	/**< In: Set of mixture Gaussians */
		      int32 m,		/**< In: Mixture being considered */
		      float32 *x,	/**< In: Input vector being compared to the components */
		      int32 *score);	/**< Out: Array of scores for each component */

/**
 * A routine that dump all mean and variance parameters of a set of gaussian distribution.   
 * @return always 0
 */

int32 mgau_dump (mgau_model_t *g,  /**< In: Set of mixture Gaussians */
		 int32 type      /**< In: type of output, MGAU_MEAN
				    for mean or MGAU_VAR for
				    variance.  */
    );

/** RAH
 * Free memory allocated by mgau_init
 */
S3DECODER_EXPORT
void mgau_free (mgau_model_t *g /**< In: A set of model to free */
    );


/** 
 * Reloading the means. This is particularly useful for speaker adaptation. 
 */
S3DECODER_EXPORT
int32 mgau_mean_reload(mgau_model_t *g,  /**< In/Out : The mean which will be resetted*/
		       const char* mean_file_name /**< In: The mean files */
    );

#if 0
{ /* Stop indent from complaining */
#endif
#ifdef __cplusplus
}
#endif


#endif