This file is indexed.

/usr/include/clustalo/squid/msa.h is in libclustalo-dev 1.2.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/*****************************************************************
 * SQUID - a library of functions for biological sequence analysis
 * Copyright (C) 1992-2002 Washington University School of Medicine
 * 
 *     This source code is freely distributed under the terms of the
 *     GNU General Public License. See the files COPYRIGHT and LICENSE
 *     for details.
 *****************************************************************/

#ifndef SQUID_MSA_INCLUDED
#define SQUID_MSA_INCLUDED

/* msa.h
 * SRE, Mon May 17 10:24:30 1999
 * 
 * Header file for SQUID's multiple sequence alignment 
 * manipulation code.
 * 
 * RCS $Id: msa.h 291 2014-02-27 18:20:54Z fabian $ (Original squid RCS Id: msa.h,v 1.12 2002/10/12 04:40:35 eddy Exp)
 */

#include <stdio.h>		/* FILE support */
#include "gki.h"		/* hash table support */
#include "ssi.h"		/* sequence file index support */
#include "squid.h"		/* need SQINFO */

/****************************************************
 * Obsolete alignment information, AINFO
 * Superceded by MSA structure further below; but we
 * need AINFO for the near future for backwards
 * compatibility.
 ****************************************************/
/* Structure: aliinfo_s
 * 
 * Purpose:   Optional information returned from an alignment file.
 * 
 *            flags: always used. Flags for which info is valid/alloced.
 *       
 *            alen: mandatory. Alignments are always flushed right
 *                  with gaps so that all aseqs are the same length, alen.
 *                  Available for all alignment formats.
 *
 *            nseq: mandatory. Aligned seqs are indexed 0..nseq-1. 
 *                  
 *            wgt:  0..nseq-1 vector of sequence weights. Mandatory.
 *                  If not explicitly set, weights are initialized to 1.0.
 *
 *            cs:   0..alen-1, just like the alignment. Contains single-letter
 *                  secondary structure codes for consensus structure; "<>^+"
 *                  for RNA, "EHL." for protein. May be NULL if unavailable
 *                  from seqfile. Only available for SELEX format files.
 *                  
 *            rf:   0..alen-1, just like the alignment. rf is an arbitrary string
 *                  of characters, used for annotating columns. Blanks are
 *                  interpreted as non-canonical columns and anything else is
 *                  considered canonical. Only available from SELEX files.
 *                  
 *            sqinfo: mandatory. Array of 0..nseq-1 
 *                  per-sequence information structures, carrying
 *                  name, id, accession, coords.
 *                  
 */
struct aliinfo_s {		
  int               flags;      /* flags for what info is valid             */
  int               alen;	/* length of alignment (columns)            */
  int               nseq;       /* number of seqs in alignment              */
  float            *wgt;	/* sequence weights [0..nseq-1]             */
  char             *cs;         /* consensus secondary structure string     */
  char             *rf;         /* reference coordinate system              */
  struct seqinfo_s *sqinfo;     /* name, id, coord info for each sequence   */

        /* Pfam/HMMER pick-ups */	
  char  *name;			/* name of alignment        */
  char  *desc;			/* description of alignment */
  char  *acc;			/* accession of alignment   */
  char  *au;			/* "author" information     */
  float  tc1, tc2;		/* trusted score cutoffs (per-seq, per-domain) */
  float  nc1, nc2;		/* noise score cutoffs (per-seq, per-domain)   */
  float  ga1, ga2;		/* gathering cutoffs */
};
typedef struct aliinfo_s AINFO;
#define AINFO_TC      (1 << 0)
#define AINFO_NC      (1 << 1)
#define AINFO_GA      (1 << 2)

/*****************************************************************
 * MSA  
 * SRE, Sun Jun 27 15:03:35 1999 [TW 723 over Greenland]
 * 
 * Defines the new data structure and API for multiple
 * sequence alignment i/o.
 *****************************************************************/

/* The following constants define the Pfam/Rfam cutoff set we'll propagate
 * from msa's into HMMER and Infernal models.
 */
#define MSA_CUTOFF_TC1 0
#define MSA_CUTOFF_TC2 1
#define MSA_CUTOFF_GA1 2
#define MSA_CUTOFF_GA2 3
#define MSA_CUTOFF_NC1 4
#define MSA_CUTOFF_NC2 5
#define MSA_MAXCUTOFFS 6

/* Structure: MSA
 * SRE, Tue May 18 11:33:08 1999
 * 
 * Our object for a multiple sequence alignment.
 */
typedef struct msa_struct {
  /* Mandatory information associated with the alignment.
   */
  char **aseq;                  /* the alignment itself, [0..nseq-1][0..alen-1] */
  char **sqname;                /* names of sequences, [0..nseq-1][0..alen-1]   */
  float *wgt;	                /* sequence weights [0..nseq-1]                 */
  int    alen;			/* length of alignment (columns)                */
  int    nseq;			/* number of seqs in alignment                  */

  /* Optional information that we understand, and might have.
   */
  int    flags;			/* flags for what optional info is valid    */
  int    type;			/* kOtherSeq, kRNA/hmmNUCLEIC, or kAmino/hmmAMINO */
  char  *name;             	/* name of alignment, or NULL */
  char  *desc;	                /* description of alignment, or NULL */
  char  *acc;	                /* accession of alignment, or NULL */
  char  *au;		        /* "author" information, or NULL */
  char  *ss_cons;		/* consensus secondary structure string, or NULL */
  char  *sa_cons;               /* consensus surface accessibility string, or NULL */
  char  *rf;                    /* reference coordinate system, or NULL */
  char **sqacc;			/* accession numbers for individual sequences */
  char **sqdesc;		/* description lines for individual sequences */
  char **ss;                    /* per-seq secondary structure annotation, or NULL */
  char **sa;                    /* per-seq surface accessibility annotation, or NULL */
  float  cutoff[MSA_MAXCUTOFFS];       /* NC, TC, GA cutoffs propagated to Pfam/Rfam */
  int    cutoff_is_set[MSA_MAXCUTOFFS];/* TRUE if a cutoff is set; else FALSE */

  /* Optional information that we don't understand.
   * That is, we know what type of information it is, but it's
   * either (interpreted as) free-text comment, or it's Stockholm 
   * markup with unfamiliar tags.
   */
  char  **comment;              /* free text comments, or NULL      */
  int     ncomment;		/* number of comment lines          */
  int     alloc_ncomment;	/* number of comment lines alloc'ed */

  char  **gf_tag;               /* markup tags for unparsed #=GF lines  */
  char  **gf;                   /* annotations for unparsed #=GF lines  */
  int     ngf;			/* number of unparsed #=GF lines        */
  int     alloc_ngf;		/* number of gf lines alloc'ed          */

  char  **gs_tag;               /* markup tags for unparsed #=GS lines     */
  char ***gs;                   /* [0..ngs-1][0..nseq-1][free text] markup */
  GKI    *gs_idx;               /* hash of #=GS tag types                  */
  int     ngs;                  /* number of #=GS tag types                */
  
  char  **gc_tag;               /* markup tags for unparsed #=GC lines  */
  char  **gc;                   /* [0..ngc-1][0..alen-1] markup         */
  GKI    *gc_idx;               /* hash of #=GC tag types               */
  int     ngc;                  /* number of #=GC tag types             */

  char  **gr_tag;               /* markup tags for unparsed #=GR lines   */
  char ***gr;                   /* [0..ngr][0..nseq-1][0..alen-1] markup */
  GKI    *gr_idx;               /* hash of #=GR tag types                */
  int     ngr;			/* number of #=GR tag types              */

  /* Stuff we need for our own maintenance of the data structure
   */
  GKI   *index;		        /* name ->seqidx hash table */
  int    nseqalloc;		/* number of seqs currently allocated for   */
  int    nseqlump;		/* lump size for dynamic expansions of nseq */
  int   *sqlen;                 /* individual sequence lengths during parsing */
  int   *sslen;                 /* individual ss lengths during parsing       */
  int   *salen;                 /* individual sa lengths during parsing       */
  int    lastidx;		/* last index we saw; use for guessing next   */
} MSA;
#define MSA_SET_WGT     (1 << 0)  /* track whether wgts were set, or left at default 1.0 */

                                     
/* Structure: MSAFILE
 * SRE, Tue May 18 11:36:54 1999
 * 
 * Defines an alignment file that's open for reading.
 */
typedef struct msafile_struct {
  FILE *f;                      /* open file pointer                         */
  char *fname;			/* name of file. used for diagnostic output  */
  int   linenumber;		/* what line are we on in the file           */

  char *buf;			/* buffer for line input w/ sre_fgets() */
  int   buflen;			/* current allocated length for buf     */

  SSIFILE *ssi;		        /* open SSI index file; or NULL, if none. */

  int   do_gzip;		/* TRUE if f is a pipe from gzip -dc (need pclose(f))  */
  int   do_stdin;		/* TRUE if f is stdin (don't close f, not our problem) */
  int   format;			/* format of alignment file we're reading */
} MSAFILE;


/* Alignment file formats.
 * Must coexist with sqio.c/squid.h unaligned file format codes.
 * Rules:
 *     - 0 is an unknown/unassigned format 
 *     - <100 reserved for unaligned formats
 *     - >100 reserved for aligned formats
 */
#define MSAFILE_UNKNOWN   0	/* unknown format                          */
#define MSAFILE_STOCKHOLM 101	/* Pfam/HMMER's Stockholm format           */
#define MSAFILE_SELEX	  102	/* Obsolete(!): old HMMER/SELEX format     */
#define MSAFILE_MSF	  103	/* GCG MSF format                          */
#define MSAFILE_CLUSTAL	  104	/* Clustal V/W format                      */
#define MSAFILE_A2M	  105	/* aligned FASTA (A2M is UCSC terminology) */
#define MSAFILE_PHYLIP    106	/* Felsenstein's PHYLIP format             */
#define MSAFILE_EPS       107	/* Encapsulated PostScript (output only)   */
#ifdef CLUSTALO
#define MSAFILE_VIENNA    108	/* Vienna: concatenated fasta   */
#endif

#define IsAlignmentFormat(fmt)  ((fmt) > 100)


/* from msa.c
 */
extern MSAFILE *MSAFileOpen(char *filename, int format, char *env);
extern MSA     *MSAFileRead(MSAFILE *afp);
extern void     MSAFileClose(MSAFILE *afp);
extern void     MSAFree(MSA *msa);
#ifdef CLUSTALO
extern void     MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline, int iWrap, int bResno, int iSeqtype);
#else
extern void     MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline);
#endif

extern int MSAFileRewind(MSAFILE *afp);
extern int MSAFilePositionByKey(MSAFILE *afp, char *key);
extern int MSAFilePositionByIndex(MSAFILE *afp, int idx);

extern int   MSAFileFormat(MSAFILE *afp);
extern MSA  *MSAAlloc(int nseq, int alen);
extern void  MSAExpand(MSA *msa);
extern char *MSAFileGetLine(MSAFILE *afp);
extern void  MSASetSeqAccession(MSA *msa, int seqidx, char *acc);
extern void  MSASetSeqDescription(MSA *msa, int seqidx, char *desc);
extern void  MSAAddComment(MSA *msa, char *s);
extern void  MSAAddGF(MSA *msa, char *tag, char *value);
extern void  MSAAddGS(MSA *msa, char *tag, int seqidx, char *value);
extern void  MSAAppendGC(MSA *msa, char *tag, char *value);
extern char *MSAGetGC(MSA *msa, char *tag);
extern void  MSAAppendGR(MSA *msa, char *tag, int seqidx, char *value);
extern void  MSAVerifyParse(MSA *msa);
extern int   MSAGetSeqidx(MSA *msa, char *name, int guess);

extern MSA  *MSAFromAINFO(char **aseq, AINFO *ainfo);   

extern void  MSAMingap(MSA *msa);
extern void  MSANogap(MSA *msa);
extern void  MSAShorterAlignment(MSA *msa, int *useme);
extern void  MSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new);

extern char *MSAGetSeqAccession(MSA *msa, int idx);
extern char *MSAGetSeqDescription(MSA *msa, int idx);
extern char *MSAGetSeqSS(MSA *msa, int idx);
extern char *MSAGetSeqSA(MSA *msa, int idx);

extern float MSAAverageSequenceLength(MSA *msa);

/* from a2m.c
 */
extern MSA  *ReadA2M(MSAFILE *afp);
#ifdef CLUSTALO
/*extern void  WriteA2M(FILE *fp, MSA *msa, int vienna);*/
extern void  WriteA2M(FILE *fp, MSA *msa, int iWrap);
#else
extern void  WriteA2M(FILE *fp, MSA *msa);
#endif
/* from clustal.c
 */
extern MSA  *ReadClustal(MSAFILE *afp);
#ifdef CLUSTALO
extern void  WriteClustal(FILE *fp, MSA *msa, int iWrap, int bResno, int iSeqType);
#else
extern void  WriteClustal(FILE *fp, MSA *msa);
#endif

/* from eps.c
 */
extern void EPSWriteSmallMSA(FILE *fp, MSA *msa);

/* from msf.c
 */
extern MSA  *ReadMSF(MSAFILE *afp);
extern void  WriteMSF(FILE *fp, MSA *msa);

/* from phylip.c
 */
extern MSA  *ReadPhylip(MSAFILE *afp);
extern void  WritePhylip(FILE *fp, MSA *msa);

/* from selex.c
 */
extern MSA  *ReadSELEX(MSAFILE *afp);
extern void  WriteSELEX(FILE *fp, MSA *msa);
extern void  WriteSELEXOneBlock(FILE *fp, MSA *msa);

/* from stockholm.c
 */
extern MSA  *ReadStockholm(MSAFILE *afp);
extern void  WriteStockholm(FILE *fp, MSA *msa);
extern void  WriteStockholmOneBlock(FILE *fp, MSA *msa);

#endif /*SQUID_MSA_INCLUDED*/