This file is indexed.

/usr/share/pyshared/qiime/barcode.py is in qiime 1.8.0+dfsg-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
#!/usr/bin/env python
import numpy

__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011, The QIIME Project" 
__credits__ = ["Justin Kuczynski"] #remember to add yourself
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "Justin Kuczynski"
__email__ = "justinak@gmail.com"

"""
provides functions, used to assign a possibly error-fraught DNA
barcode to a list of original barcodes.  correct_barcode uses edit distance 
of the DNA, not the bit encoding of the DNA.  correct_barcode_bitwise
uses bit encoding to determine closest match
"""
DEFAULT_GOLAY_NT_TO_BITS = { "A":"11",  "C":"00", "T":"10", "G":"01"}
DEFAULT_HAMMING_NT_TO_BITS = { "A":"11",  "C":"10", "T":"00", "G":"01"}

def correct_barcode(query_seq, seq_possibilities):
    """ finds closest (by nt seq edit distance) match to query_seq

    assumes:
    all sequences are same length
    no sequence appears twice in seq_possibilities

    returns (best_hit, min_dist)
    * best_hit is closest sequence from seq_possibilities, or None if a tie
    * min_dist is the edit distance between the query_seq and the best hit
    cw1 = AAACCCGGGTTT (12 nucleotides)
    cw2 = AAACCCGGGTTA (edit distance of 1 from cw1)
    cw3 = AAACCCGGGTTG (edit distance of 1 from cw1)
    cw4 = AAACCCGGGTTC (edit distance of 1 from cw1)    
    cw5 = AAACCCGGGTGG (edit distance of 2 from cw1)
    """
    dists = [_edit_dist(query_seq, seq) for seq in seq_possibilities]
    min_dist = min(dists)
    number_mins = dists.count(min_dist)
    if number_mins > 1:
        return None, min_dist
    else:
        best_hit = seq_possibilities[dists.index(min_dist)]
        return best_hit, min_dist


def _edit_dist(s1, s2):
    """ computes edit (hamming) between to strings of equal len

    designed for strings of nucleotides, not bits"""
    dist = 0
    for i in range(len(s1)):
        if s1[i] != s2[i]:
            dist += 1
    return dist

def correct_barcode_bitwise(query_seq, seq_possibilities, 
    nt_to_bits=DEFAULT_GOLAY_NT_TO_BITS):
    """ finds closest (by bit distance) match to query_seq

    assumes:
    all sequences are same length
    no sequence appears twice in seq_possibilities

    returns (best_hit, min_dist)
    * best_hit is closest sequence from seq_possibilities, or None if a tie
    * min_dist is the bitwise hamming distance between the query_seq and the 
    best hit 

    assuming default nt_to_bits of 
    { "A":"11",  "C":"00", "T":"10", "G":"01"}, examples:
    cw1 = AAACCCGGGTTT (12 nucleotides)
    cw2 = AAACCCGGGTTA (hamming distance of 1 from cw1)
    cw3 = AAACCCGGGTTG (hamming distance of 1 from cw1)
    cw4 = AAACCCGGGTTC (hamming distance of 2 from cw1)
    cw4 = AAACCCGGGTCC (hamming distance of 4 from cw1)
    """
    if nt_to_bits == None:
        nt_to_bits = DEFAULT_NT_TO_BITS
    dists = []
    query_seq_bits = seq_to_bits(query_seq, nt_to_bits)
    for seq in seq_possibilities:
        possible_seq_bits = seq_to_bits(seq, nt_to_bits)
        dists.append(hamming_dist(query_seq_bits, possible_seq_bits))
    min_dist = min(dists)
    number_mins = dists.count(min_dist)
    if number_mins > 1:
        return None, min_dist
    else:
        best_hit = seq_possibilities[dists.index(min_dist)]
        return best_hit, min_dist

def hamming_dist(v1,v2):
    """ two binary arrays, equal len.  for bits, not nucleotide strings"""
    edits = (v1!=v2)
    return edits.sum()

def seq_to_bits(seq, nt_to_bits):
    """ e.g.: "AAG" -> array([1,1,1,1,0,1]), for a specific nt_to_bits
    """
    bitstring = ""
    for nt in seq:
        bitstring += nt_to_bits[nt]
    bits = numpy.array(map(int,bitstring))
    return bits