This file is indexed.

/usr/include/ncbi/seed.h is in libncbi6-dev 6.1.20120620-8.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/* $Id: seed.h,v 6.22 2008/01/09 19:04:04 merezhuk Exp $
* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================
*/
 
/*****************************************************************************
 
File name: seed.h
Author: Alejandro Schaffer
 
Contents: header file for PHI-BLAST and pseed3.

$Revision: 6.22 $

$Log: seed.h,v $
Revision 6.22  2008/01/09 19:04:04  merezhuk
add extra hitArraySz parameter and provide error message if number of hits exceed it.

Revision 6.21  2007/03/13 20:41:20  madden
  - In the prototype for seedEngineCore, the searchSpEff parameter
    should be a Nlm_FloatHi, not an Nlm_FloatLo
  [from Mike Gertz]

Revision 6.20  2005/07/28 14:57:10  coulouri
remove dead code

Revision 6.19  2004/04/01 13:43:08  lavr
Spell "occurred", "occurrence", and "occurring"

Revision 6.18  2002/08/28 13:37:09  madden
Lower MAX_HIT to 20000 again (for LINUX)

Revision 6.17  2002/08/09 17:32:09  madden
Raise MAX_HIT to 20000

Revision 6.16  2000/08/01 17:21:13  shavirin
Added protection for using C++ compiler.

Revision 6.15  1999/10/18 19:54:43  shavirin
Removed unused definition.

Revision 6.14  1999/10/05 19:36:54  shavirin
Changed to use functions from blast.c: BlastGetDbChunk and BlastTickProc.
Removed unused functions.

Revision 6.13  1999/09/22 17:50:18  shavirin
Now functions will collect messages in ValNodePtr before printing out.
 
 
*****************************************************************************/

#if !defined(SEED__H)
#define SEED__H


#ifdef __cplusplus
extern "C" {
#endif

#define ALPHABET_SIZE 25
#define DNA_ALPHABET_SIZE 4
#define MAX_HIT 20000
#define PATTERN_SPACE_SIZE 1000
#define BUF_SIZE 100
#define PATTERN_BUF_SIZE 2000
#define PATTERN_NAME_SIZE 200
#define SeqIdBufferSize   64 /*buffer size for SeqIdWrite*/
#define BITS_PACKED_PER_WORD 30
#define OVERFLOW1  (1 << BITS_PACKED_PER_WORD)
#define allone  ((1 << ALPHABET_SIZE) - 1)

#define SEED_INFINITY 1000000 /*large score for array sentinel*/

#define MAX_EVALUE 1000 /*maximum e-value allowed as threshold*/


/*The following 3 flags define 3 options for running the program*/
#define SEED_FLAG 1
#define PATTERN_FLAG 2
#define PAT_SEED_FLAG 3
#define PAT_MATCH_FLAG 4

#define MaxW   11
#define MaxP   (BITS_PACKED_PER_WORD * MaxW) /*threshold pattern length*/
#ifdef WIN16
#define MAX_WORDS_IN_PATTERN 16
#else
#define MAX_WORDS_IN_PATTERN 100
#endif
#define ASCII_SIZE 256

#define DEFAULT_PARAM_C 0.6
#define DEFAULT_PARAM_LAMBDA 0.270

#define ONE_WORD_PATTERN  0
#define MULTI_WORD_PATTERN 1
#define PATTERN_TOO_LONG  2

#define PAT_PROB_THRESH  0.002  /*upper threshold for pattern probabilities*/
#define EXPECT_MATCH_THRESH 20000 /*upper threshold for number of occurrences*/
#define WILDCARD_THRESH  30 /*threshold for product of variable-length wildcards*/

/*band amounts for banded alignment*/
#define BAND_LOW (-5)
#define BAND_HIGH 5

/*Limit on length of DNA sequence*/
#define MAXDNA 200000

/*   The following integer codes used for trace back in align.
     Each node must implcitly store three pointers to decide where a CC DD, or
     e value comes from. For example the  CC value can come from a sub del or
     ins edge. A DD can come from extension of a gap or an initiation of
     and new gap.  So th three flags are needed.
     One flag have three states, the other 2 has 2 states.
     The flags are packed into one integer. Let the flags be s1, s2, s3.
     The integer state is then s3*20+s2*10+s1. where s1={0,1,2} s2=s3={0,1}. 
     There numbers above are from this packing. */

#define DELETE_CODE 20
#define INSERT_CODE 10
#define DIAGONAL_DELETE 2
#define DIAGONAL_INSERT 1

/* #define BLAST_DB_CHUNK_SIZE 500
   #define BLAST_NTICKS 50 */

typedef struct hit_str {
    Int4 score;
    Int4 l_score;
    Nlm_FloatHi mul; /*multiplier for scores of characters*/
    Int4 hit_pos, hit_end;
    Int4 bi, bj, ei, ej; /*beginning and end of pattern occurrence
                           in query sequence
                           and database sequence, respectively*/
    struct hit_str *next;  /*next hit in linked list*/
} *hit_ptr, hit_node;

typedef struct store_str {
    Int4 l_score;
    Uint1Ptr seq;
    Char *header;
    Int4 seqno;
    hit_ptr hit_list;
    struct store_str *next;
} store_node, *store_ptr;

typedef struct qseq {
     Uint1Ptr lseq, rseq, sseq;
     Int4 llen, rlen, slen;
} query_seq, *qseq_ptr;

typedef struct seedSearchItems {

    Nlm_FloatHi  charMultiple[ALPHABET_SIZE];
    Nlm_FloatHi  paramC; /*used in e-value computation*/
    Nlm_FloatHi  paramLambda; /*used in e-value computation*/
    Nlm_FloatHi  paramK; /*used in the bit score computation*/
    Int4         cutoffScore; /*lower bound for what is a hit*/
    Nlm_FloatHi  standardProb[ALPHABET_SIZE]; /*probability of each letter*/
    Char         order[ASCII_SIZE];
    Char         pchars[ALPHABET_SIZE+1];
    Char         name_space[BUF_SIZE];  /*name of a pattern*/
    Char         pat_space[PATTERN_SPACE_SIZE];  /*string description
                                                   of pattern*/
} seedSearchItems;

typedef struct seedResultItems {

    store_ptr    listOfMatchingSequences;
} seedResultItems;

typedef struct patternSearchItems {
  
   Int4   numWords;  /*number of words need to hold bit representation
                       of pattern*/
   Int4   match_mask;   /*bit mask representation of input pattern
                          for patterns that fit in a word*/
   Int4   match_maskL[BUF_SIZE]; /*bit mask representation of input pattern
                                   for long patterns*/
  /*which positions can a character occur in for long patterns*/
   Int4   bitPatternByLetter[ASCII_SIZE][MaxW]; 
   Int4   *whichPositionPtr; /*used to pass a piece a row of the arrays*/
   Uint4   *DNAwhichPrefixPosPtr, *DNAwhichSuffixPosPtr; /*similar for DNA patterns*/
  /*which positions can a character occur in for short patterns*/
   Int4   whichPositionsByCharacter[ASCII_SIZE];
   Uint4   DNAwhichPrefixPositions[ASCII_SIZE]; /*for DNA sequence where
                             prefix of DNA 4-mer matches pattern*/
   Uint4   DNAwhichSuffixPositions[ASCII_SIZE]; /*similar to above for suffixes*/
    /*for each letter in the alphabet and each word in the masked
      pattern representation, holds a bit pattern saying for which
      positions the letter will match*/
   Int4   SLL[MAX_WORDS_IN_PATTERN][ASCII_SIZE];  /*similar to
                  whichPositionsByCharacter for many-word patterns*/
   Uint4   DNAprefixSLL[MAX_WORDS_IN_PATTERN][ASCII_SIZE];
  /*similar to DNAwhichPrefixPositions for many word patterns*/
   Uint4   DNAsuffixSLL[MAX_WORDS_IN_PATTERN][ASCII_SIZE];
  /*similar to DNAwhichSuffixPositions for many word patterns*/
   Char   flagPatternLength; /*indicates if pattern fits in 1 word,
                               some words, or is too long*/
   Nlm_FloatHi  patternProbability;  /*probability of this letter
                                        combination*/
   Int4   whichMostSpecific; /*which word in an extra long pattern
                               has the lowest probability of a match*/
   Int4   numPlacesInWord[MAX_WORDS_IN_PATTERN]; /*when pattern has more than 7
             words keep track of how many places of pattern in each word of 
             the  representation; was called lening */
   Int4   spacing[MAX_WORDS_IN_PATTERN]; /*spaces until next word due to
                                          wildcard*/
   Int4   inputPatternMasked[MaxP];
   Int4   highestPlace; /*number of places in pattern representation
                          as computed in input_pattern; was called num*/
  Int4   minPatternMatchLength; /*minimum length of string to match this pattern*/
  Int4   wildcardProduct; /*product of wildcard lengths*/
} patternSearchItems;

typedef struct alignSearchItems {
   Int4** matrix;   /*score matrix*/
   Int4   gapOpen;  /*penalty to open a gap*/
   Int4   gapExtend; /*penalty to extend a gap one position*/
   Int4   gapCost;   /*gapOpen + gapExtend*/
} alignSearchItems;

typedef struct seedParallelItems {
    ReadDBFILEPtr rdpt; /*pointer to database*/
    qseq_ptr query_seq; /*multi-piece representation of query sequence*/
    Int4 lenPatMatch;  /*number of characters in the pattern occurrence*/
    GapAlignBlkPtr gap_align; /*structure for description of the gapped
                                alignment*/
    Boolean is_dna;  /*is this DNA or protein data*/
    patternSearchItems * patternSearch; /*holds items about the pattern*/
    seedResultItems * seedResults; /*holds the results for this thread*/
     seedSearchItems * seedSearch; /*holds preprocessing info about the
                                     search*/
    Int4 totalOccurrences;  /*total number of pattern occurrences 
                              found in this thread*/
    Int4 matchIndex; /* total number of matches with reportable score in
                        this thread; match ===> occurrenece, but
                        occurrence !===> match */
    /* threadInfoItems *threadInfo; */
    BlastThrInfoPtr thr_info;
} seedParallelItems;


void PGPOutTextMessages(ValNodePtr info_vnp, FILE *fd);

Char * LIBCALL strsave PROTO((Char *s));
ValNodePtr LIBCALL  seedEngineCore PROTO((BlastSearchBlkPtr search, 
  BLAST_OptionsBlkPtr options, Uint1Ptr query, Uint1Ptr unfilter_query,
  CharPtr database, CharPtr patfile, Int4 program_flag,  FILE * patfp, 
  Boolean is_dna, Boolean reverseDb, seedSearchItems *seedSearch,
   Nlm_FloatHi posEThresh, Nlm_FloatHi searchSpEff,
   posSearchItems * posSearch, SeqLocPtr *seed_seq_loc, Boolean showDiagnostics, ValNodePtr PNTR info_vnp));
void LIBCALL init_order PROTO((Int4 **matrix, Int4 program_flag, Boolean is_dna,
   seedSearchItems *seedSearch));

Int4 LIBCALL convertProgramToFlag PROTO((Char * program, Boolean * is_dna));

void LIBCALL initProbs PROTO((seedSearchItems * seedSearch));

Int4 LIBCALL find_hits PROTO((Int4 *hitArray, Uint1Ptr seq, Int4 len, Boolean is_dna,  patternSearchItems * patternSearch));

Int4 LIBCALL init_pattern PROTO((Uint1 *pattern, Boolean is_dna, patternSearchItems * patternSearch,  seedSearchItems *seedSearch, ValNodePtr * error_return));


Int4 LIBCALL align_of_pattern PROTO((Uint1 *querySeq, Uint1 *dbSeq, Int4 lenQuerySeq,  Int4 lenDbSeq, Int4 *alignScript,  Int4 **tback,  GapAlignBlkPtr gap_align, Int4 *useful_score,  Nlm_FloatHi *multiple, patternSearchItems *patternSearch, seedSearchItems * seedSearch));

void LIBCALL pat_output PROTO((Uint1 *seq, Int4 begin, Int4 end, patternSearchItems *patternSearch, ValNodePtr PNTR info_vnp));

qseq_ptr LIBCALL split_target_seq PROTO((Uint1 *seq, Int4 seed, Int4 len_pat, Int4 len_query));

hit_ptr LIBCALL get_hits PROTO((qseq_ptr qp, Int4 len_of_pat, 
		 Uint1Ptr seq_db, Int4 len_seq_db, GapAlignBlkPtr gap_align, 
		 Boolean is_dna, patternSearchItems * patternSearch,
                 seedSearchItems * seedSearch, Int4 * newOccurrences));
void LIBCALL search_pat PROTO((ReadDBFILEPtr rdpt, Char *patternFileName, Boolean is_dna, seedSearchItems *seedSearch, patternSearchItems *patternSearch, ValNodePtr * error_return, ValNodePtr PNTR info_vnp));

SeqAlignPtr LIBCALL output_hits PROTO((ReadDBFILEPtr rdpt,
	    Boolean score_only, Uint1 *seq1, qseq_ptr qp, 
	    Int4 len, Nlm_FloatHi dbLength, GapAlignBlkPtr gap_align, Boolean is_dna,
            Int4 effectiveOccurrences, seedSearchItems *seedSearch, seedResultItems *seedResults, 
            patternSearchItems * patternSearch, Boolean reverse, 
            Int4 numOccurrences, Nlm_FloatHi eThresh,
            SeqIdPtr query_id, Nlm_FloatHi posEthresh, 
            posSearchItems *posSearch, Int4 numMatches, 
            Int4 *totalBelowEThresh, Boolean showDiagnostics,
            ValNodePtr PNTR info_vnp));

Char*  LIBCALL get_a_pat PROTO((FILE *fp, Char **name, Int4Ptr hitArray, Int4Ptr fullHitArray, 
   Int4 * numPatOccur, Int4 *numEffectiveOccurrences, Int4 program_flag, 
   Uint1Ptr unfilter_seq, Uint1Ptr seq, Int4 len, Boolean is_dna,
   patternSearchItems *patternSearch, seedSearchItems * seedSearch,
   Boolean showDiagnostics, ValNodePtr * error_return, 
   ValNodePtr PNTR info_vnp, Int4 hitArraySz));


void LIBCALL quicksort_hits PROTO((Int4 no_of_seq, seedResultItems *seedResults));

Int4 LIBCALL eValueFit PROTO((Nlm_FloatHi eThresh, Nlm_FloatHi dbLength, 
        seedSearchItems *seedSearch, Int4 numOccurrences, 
        Nlm_FloatHi patternProbability));

void LIBCALL storeOneMatch PROTO((hit_ptr hit_list, Int4 seqno, Uint1Ptr seq, 
	      seedResultItems *seedResults));

void LIBCALL seed_free_all PROTO((seedResultItems *seedResults));

ValNodePtr  LIBCALL SeedPruneHitsFromSeedReturn PROTO((ValNodePtr seedReturn, Int4 number_of_descriptions));


#ifdef __cplusplus
}
#endif

#endif /*define SEED__H*/