This file is indexed.

/usr/bin/last-postmask is in last-align 921-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#! /usr/bin/env python

# Copyright 2014 Martin C. Frith

# Read MAF-format alignments, and write those that have a segment with
# score >= threshold, with gentle masking of lowercase letters.  There
# must be a lastal header with score parameters.

# Gentle masking is described in: MC Frith, PLoS One 2011;6(12):e28819
# "Gentle masking of low-complexity sequences improves homology search"

# Limitations: doesn't (yet) handle sequence quality data,
# frameshifts, or generalized affine gaps.

import gzip
import itertools, optparse, os, signal, sys

def myOpen(fileName):
    if fileName == "-":
        return sys.stdin
    if fileName.endswith(".gz"):
        return gzip.open(fileName)
    return open(fileName)

def getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
    defaultScore = min(map(min, matrix))
    scoreMatrix = [[defaultScore for i in range(128)] for j in range(128)]
    for i, x in enumerate(rowHeads):
        for j, y in enumerate(colHeads):
            xu = ord(x.upper())
            xl = ord(x.lower())
            yu = ord(y.upper())
            yl = ord(y.lower())
            score = matrix[i][j]
            maskScore = min(score, 0)
            scoreMatrix[xu][yu] = score
            scoreMatrix[xu][yl] = maskScore
            scoreMatrix[xl][yu] = maskScore
            scoreMatrix[xl][yl] = maskScore
    for i in range(128):
        scoreMatrix[i][ord("-")] = -deleteCost
        scoreMatrix[ord("-")][i] = -insertCost
    return scoreMatrix

def isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    """Does the alignment have a segment with score >= minScore?"""
    r, q = seqs
    score = 0
    xOld = " "
    yOld = " "
    for x, y in itertools.izip(r, q):
        score += scoreMatrix[ord(x)][ord(y)]
        if score >= minScore: return True
        if x == "-" and xOld != "-": score -= insOpenCost
        if y == "-" and yOld != "-": score -= delOpenCost
        if score < 0: score = 0
        xOld = x
        yOld = y
    return False

def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
    if isGoodAlignment(seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
        for line in maf:
            print line,
        print

def doOneFile(lines):
    scoreMatrix = []
    maf = []
    seqs = []

    for line in lines:
        if line[0] == "#":
            print line,
            w = line.split()
            for i in w:
                if i.startswith("a="): aDel = int(i[2:])
                if i.startswith("b="): bDel = int(i[2:])
                if i.startswith("A="): aIns = int(i[2:])
                if i.startswith("B="): bIns = int(i[2:])
                if i.startswith("e="): minScore = int(i[2:])
            if len(w) > 1 and max(map(len, w)) == 1:
                colHeads = w[1:]
                rowHeads = []
                matrix = []
            elif len(w) > 2 and len(w[1]) == 1:
                rowHeads.append(w[1])
                matrix.append(map(int, w[2:]))
        elif line.isspace():
            if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
            maf = []
            seqs = []
        else:
            if not scoreMatrix:
                scoreMatrix = getScoreMatrix(rowHeads, colHeads, matrix,
                                             bDel, bIns)
            maf.append(line)
            if line[0] == "s": seqs.append(line.split()[6])
    if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)

def lastPostmask(args):
    if not args:
        args = ["-"]
    for i in args:
        f = myOpen(i)
        doOneFile(f)
        f.close()

if __name__ == "__main__":
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message

    usage = "%prog in.maf > out.maf"
    description = "Get alignments that have a segment with score >= threshold, with gentle masking of lowercase letters."
    op = optparse.OptionParser(usage=usage, description=description)
    (opts, args) = op.parse_args()

    try: lastPostmask(args)
    except KeyboardInterrupt: pass  # avoid silly error message
    except Exception, e:
        prog = os.path.basename(sys.argv[0])
        sys.exit(prog + ": error: " + str(e))