/usr/lib/python2.7/dist-packages/GenomicConsensus/variants.py is in python-pbgenomicconsensus 2.1.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | #################################################################################
# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Pacific Biosciences nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#################################################################################
# Author: David Alexander
from __future__ import absolute_import
from .utils import CommonEqualityMixin
__all__ = [ "Variant" ]
class Variant(CommonEqualityMixin):
"""
Variant objects represent homozygous/haploid OR heterozygous
variants corresponding to a fixed window of a reference genome
Internally we use Python-style half-open intervals zero-based
[start, end) to delineate reference ranges. An insertion has
start==end, a SNP has start+1==end, etc.
GFF files use 1-based indexing and open intervals [start, end).
In a GFF both insertions and SNPs have start==end, which doesn't
make too much sense to me, but so be it.
VCF files use 1-based indexing as well, but do not record the
"end"
"""
def __init__(self, refId, refStart, refEnd, refSeq, readSeq1,
readSeq2=None, confidence=None, coverage=None,
frequency1=None, frequency2=None, annotations=None):
self.refId = refId
self.refStart = refStart
self.refEnd = refEnd
self.refSeq = refSeq
self.readSeq1 = readSeq1
self.readSeq2 = readSeq2
self.confidence = confidence
self.coverage = coverage
self.frequency1 = frequency1
self.frequency2 = frequency2
self.annotations = annotations
@property
def isHeterozygous(self):
return (self.readSeq2 != None)
@property
def variantType(self):
lr = len(self.refSeq)
l1 = len(self.readSeq1)
l2 = len(self.readSeq2) if self.readSeq2 else None
if lr == 0:
return "Insertion"
elif l1==0 or l2==0:
return "Deletion"
elif (l1==lr) and (l2==None or l2==lr):
return "Substitution"
else:
return "Variant"
def __str__(self):
refSeq_ = self.refSeq or "."
if self.isHeterozygous:
readAlleles = "%s/%s" % (self.readSeq1 or ".",
self.readSeq2 or ".")
else:
readAlleles = "%s" % (self.readSeq1 or ".")
return "%s@%s:%d-%d %s -> %s" % \
(self.variantType,
self.refId,
self.refStart,
self.refEnd,
refSeq_,
readAlleles)
def __repr__(self):
return str(self)
def __lt__(self, other):
return ((self.refId, self.refStart, self.refEnd, self.readSeq1) <
(other.refId, other.refStart, other.refEnd, other.readSeq1))
def annotate(self, key, value):
if self.annotations == None:
self.annotations = []
self.annotations.append((key, value))
def filterVariants(minCoverage, minConfidence, variants):
return [ v for v in variants
if ((v.coverage >= minCoverage) and
(v.confidence >= minConfidence)) ]
def annotateVariants(variants, alns):
# Operates in place
for v in variants:
v.annotate("rows", ",".join(str(a.rowNumber) for a in alns))
|