This file is indexed.

/usr/include/kmer/seqStore.H is in libmeryl-dev 0~20150903+r2013-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#ifndef SEQSTORE_H
#define SEQSTORE_H

#include "util++.H"
#include "seqCache.H"

//  A binary fasta file.
//
//  HEADER
//    magic number
//    number of sequences
//    optional - alphabet size
//    optional - alphabet map (0x00 -> 'a', etc)
//    position of index start
//    position of data start
//  DATA
//  INDEX
//    position of sequence start in DATA
//    header length
//    sequence length
//  MAP
//    name to IID mapping

struct seqStoreHeader {
  uint64  _magic[2];
  uint32  _pad;
  uint32  _numberOfSequences;
  uint64  _numberOfACGT;
  uint32  _numberOfBlocksACGT;
  uint32  _numberOfBlocksGAP;
  uint32  _numberOfBlocks;
  uint32  _namesLength;

  uint64  _indexStart;
  uint64  _blockStart;
  uint64  _namesStart;
};


//  This index allows us to return a complete sequence
//
struct seqStoreIndex {
  uint32  _hdrPosition;  //  Offset into _names for the defline
  uint32  _hdrLength;    //  Length of the defline
  uint64  _seqPosition;  //  Offset into _bpf for the sequence data
  uint32  _seqLength;    //  Length, in bases, of the sequence
  uint32  _block;        //  The seqStoreBlock that starts this sequence
};


//  This index allows us to seek to a specific base in the
//  file of sequences.  Each block is either:
//    ACGT - and has data
//    N    - no data
//  It will map a specific ACGT location to the sequence, and the ID
//  of that sequence (seq ID and location in that sequence).
//
struct seqStoreBlock {
  uint64      _isACGT:1;    // block is acgt
  uint64      _pos:32;      // position in sequence
  uint64      _iid:32;      // iid of the sequence we are in
  uint64      _len:23;      // length of block
  uint64      _bpf:40;      // position in the bit file of sequence
};

#define SEQSTOREBLOCK_MAXPOS uint64MASK(32)
#define SEQSTOREBLOCK_MAXIID uint64MASK(32)
#define SEQSTOREBLOCK_MAXLEN uint64MASK(23)

class seqStore : public seqFile {
protected:
  seqStore(const char *filename);
  seqStore();

public:
  ~seqStore();

protected:
  seqFile            *openFile(const char *filename);

public:
  uint32              find(const char *sequencename);

  uint32              getSequenceLength(uint32 iid);
  bool                getSequence(uint32 iid,
                                  char *&h, uint32 &hLen, uint32 &hMax,
                                  char *&s, uint32 &sLen, uint32 &sMax);
  bool                getSequence(uint32 iid,
                                  uint32 bgn, uint32 end, char *s);

private:
  void                clear(void);
  void                loadIndex(void);

  bitPackedFile     *_bpf;

  seqStoreHeader     _header;

  seqStoreIndex     *_index;
  seqStoreBlock     *_block;
  char              *_names;

  bitPackedFile     *_indexBPF;
  bitPackedFile     *_blockBPF;
  bitPackedFile     *_namesBPF;

  uint32             _lastIIDloaded;

  friend class seqFactory;
};


//  Construct a new seqStore 'filename' from input file 'inputseq'.
//
void
constructSeqStore(char *filename,
                  seqCache *inputseq);


#endif  //  SEQSTORE_H