This file is indexed.

/usr/share/pyshared/cogent/struct/rna2d.py is in python-cogent 1.5.3-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
#!/usr/bin/env python
"""Code for handling RNA secondary structure.

RNA secondary structures can be represented in many different ways. The
representation makes a large difference to the efficiency of different
algorithms, so several different structural representations (and the means
to interconvert them) are provided.

Provides following classes:  
    Stem: representation of a stem in a secondary structure
    Partners: list holding partner of each position.
    Pairs: list of base pairs in a structure.
    StructureString: string holding a secondary structure representation.
    ViennaStructure: representation of Vienna-format RNA structure.
    WussStructure: Wash U secondary structure format, handles pseudoknots.
    StructureNode: for tree representation of nested RNA structure.
"""
from numpy import zeros, put
from cogent.util.transform import make_trans, float_from_string
from cogent.core.tree import TreeNode, TreeError
from cogent.util.misc import not_none, flatten

__author__ = "Rob Knight"
__copyright__ = "Copyright 2007-2012, The Cogent Project"
__credits__ = ["Rob Knight", "Sandra Smit", "Peter Maxwell"]
__license__ = "GPL"
__version__ = "1.5.3"
__maintainer__ = "Rob Knight"
__email__ = "rob@spot.colorado.edu"
__status__ = "Production"

class PairError(ValueError):
    """Base class for errors in pairing."""
    pass

class Stem(object):
    """Holds a Start, an End, and a Length.
    
    Note that the Start and the End pair with each other, thus spanning the 
    range. Successive pairs count up from the Start and down from the End.

    Stem is intended to be _very_ lightweight and does no error checking. In
    other words, it will let you specify a Start that's after the End, a Length
    that can't exist because there's insufficient separation between the Start
    and the End, and so on. The same principle applies to __getitem__ (and hence
    __iter__): you can iterate through pairs in a stem that can't exist, e.g.
    Stem(6, 7, 10) will give you the 10 items from Stem(6,7,1) through 
    Stem(15, -2, 1) -- often not what you want. 
    
    Similarly, when you initialize a Stem it detects whether Start and End are 
    both set, and makes a pair accordingly, but updating the Start or End will 
    not re-do the check to see if a pair has formed or broken. The alternative
    is to make Start, End and Length into properties that reset the state of
    the others when updated, but this makes Stem too slow for applications such
    as BayesFold.
    """
    __slots__ = ['Start', 'End', 'Length']
    def __init__(self, Start=None, End=None, Length=0):
        """Returns a new Stem object."""
        self.Start = Start
        self.End = End
        #set length if specified
        if Length:
            self.Length = Length
        #otherwise, set to 1 if paired and 0 if unpaired
        elif Start is None or End is None:
            self.Length = 0
        else:
            self.Length = 1

    def __len__(self):
        """Returns self.Length."""
        return self.Length

    def __getitem__(self, item):
        """Masquerades as a list of single-base stems."""
        length = self.Length
        #bounds check
        if item < 0:
            item = length - item
        if (item < 0) or (item >= length):
            raise IndexError, "Index %s out of range." % item
        #return appropriate base pair
        return Stem(self.Start + item, self.End - item)
        
    def __cmp__(self, other):
        """Sorts by start, then by end, then by length (if possible)."""
        return cmp(self.Start, other.Start) or cmp(self.End, other.End) \
            or cmp(self.Length, other.Length)
                
    def extract(self, seq):
        """Returns bases in pairs as list of tuples.
        
        Note: always returns list, even if only one base pair.
        """
        if self.Length > 1:
            return flatten([p.extract(seq) for p in self])
        else:
            if self.Start is not None:
                start = seq[self.Start]
            else:
                start = None
            if self.End is not None:
                end = seq[self.End]
            else:
                end = None
            return [(start, end)]
    
    def __hash__(self):
        """Hashes the same as a tuple of (start, end, length).
        
        WARNING: if you change any of the values once an object is in a
        dict, you'll get unpleasant results. Don't do it!
        """
        return hash((self.Start, self.End, self.Length))

    def __str__(self):
        """String representation contains Start,End,Length."""
        return '(%s,%s,%s)' % (self.Start, self.End, self.Length)
    
    def __nonzero__(self):
        """Nonzero if length > 0."""
        return self.Length > 0

class Partners(list):
    """Holds list p such that p[i] is the index of the partner of i, or None.
    
    Primarily useful for testing whether a specified base is paired and, if so,
    extracting its partner.

    Each base may have precisely 0 or 1 partners. If A pairs with B, B must
    pair with A.
    All inconsistencies will be removed by setting previous partners to None.
    Checking for conflicts and raising errors should be done in method that 
    constructs the Partners.

    If constructing by hand, should initialize with list of [None] * seq_length.
    Typically, Partners will be constructed by code from some other data. Use
    the EmptyPartners(n) factory function to get an empty Partners list of
    length n.
    """
    def __setitem__(self, index, item):
        """Sets self[index] to item, enforcing integrity constraints."""
        if index == item:
            raise ValueError, "Cannot set base %s to pair with itself." % item
        #if item already paired, raise Error or make partner unpaired
        if item and self[item]:
            self[self[item]] = None
        #if already paired, need to make partner unpaired
        curr_partner = self[index]
        if curr_partner is not None:
            list.__setitem__(self, curr_partner, None)
        #set self[index] to item    
        list.__setitem__(self, index, item)
        #if item is not None, set self[item] to index
        if item is not None:
            list.__setitem__(self, item, index)
                
    def toPairs(self):
        """Converts the partners to sorted list of pairs."""
        result = Pairs()
        for first, second in enumerate(self):
            if first < second:
                result.append((first, second))
        return result

    def _not_implemented(self, *args, **kwargs):
        """Raises NotImplementedError for 'naughty' methods.
        
        Not allowed any methods that insert/remove items or that change the
        order of the items, including things like sort or reverse.
        """
        raise NotImplementedError
    
    __delitem__ = __delslice__ = __iadd__ = __imul__ = __setslice__ = append \
    = extend = insert = pop = remove = reverse = sort = _not_implemented

def EmptyPartners(length):
    """Returns empty list of Partners with specified length."""
    return Partners([None] * length)


class Pairs(list):
    """Holds list of base pairs, each of which is a 2-element sequence.
    
    This is a very lightweight object for storing base pairs, and does not
    perform any validation. Useful as an intermediate in many different
    calculations.
    """
    def toPartners(self, length, offset=0, strict=True):
        """Returns a Partners object, if possible.
        
        length of resulting sequence must be specified.
        offset is optional, and is added to each index.
        strict specifies whether collisions cause fatal errors. if not strict
        conflicts will be removed by the Partners object.
        """
        result = EmptyPartners(length)
        for up, down in self:
            upstream = up + offset
            downstream = down + offset
            
            if result[upstream] or result[downstream]:
                if strict:
                    raise ValueError, "Pairs contain conflicting partners: %s"\
                        % self
            result[upstream] = downstream
        return result
            
    def toVienna(self, length, offset=0, strict=True):
        """Returns a Vienna structure string, if possible.

        length of resulting sequence must be specified.
            Instead of parsing the sequence length, you can also throw in
            an object that has the required length (such as the sequence that
            the structure corresponds to).
        offset is optional, and is added to each index.
        strict specifies whether collisions cause fatal errors.
        """
        if self.hasPseudoknots():
            raise PairError, "Pairs contains pseudoknots %s"%(self)
        try:
            length = int(length)
        except ValueError: #raised when length can't be converted to int
            length = len(length)
        
        p = self.directed()
        result = ['.'] * length
        for up, down in p:
            try:
                upstream = up + offset
                downstream = down + offset
            except TypeError:
                continue

            if strict:
                if (result[upstream] != '.') or (result[downstream] != '.'):
                    raise ValueError, "Pairs contain conflicting partners: %s"\
                        % self
            result[upstream] = '('
            result[downstream] = ')'
        return ViennaStructure(''.join(result))

    def tuples(self):
        """Converts all pairs in self to tuples, in place.

        Useful for constructing dicts and for sorting (otherwise, pairs of
        different types, e.g. lists and tuples, will sort according to type
        rather than to position).
        """
        self[:] = map(tuple, self)

    def unique(self):
        """Returns copy of self omitting duplicate pairs, preserving order.

        Keeps the first occurrence of each pair.
        """
        seen = {}
        result = []
        for p in map(tuple, self):
            if p not in seen:
                seen[p] = True
                result.append(p)
        return Pairs(result)

    def directed(self):
        """Returns copy of self where all pairs are (upstream, downstream).
        
        Omits any unpaired bases and any duplicates. Result is in arbitrary 
        order.
        """
        seen = {}
        for up, down in self:
            if (up is None) or (down is None):
                continue    #omit unpaired bases
            if up > down:
                up, down = down, up
            seen[(up, down)] = True
        result = seen.keys()
        return Pairs(result)

    def symmetric(self):
        """Retruns copy of self where  each up, down pair has a down, up pair.
        
        Result is in arbitrary order. Double pairs and pairs containing None 
        are left out.
        """
        result = self.directed()
        result.extend([(down, up) for up, down in result])
        return Pairs(result)

    def paired(self):
        """Returns copy of self omitting items where a 'partner' is None."""
        return Pairs(filter(not_none, self))

        
    def hasPseudoknots(self):
        """Returns True if the pair list contains pseudoknots.
        
        (good_up,good_down) <=> (checked_up,checked_down)
        pseudoknot if checked_up<good_down and checked_down>good_down
        """
        pairs = self.directed()
        seen = [] # list of pairs against which you compare each time
        pairs.sort()
        for pair in pairs:
            if not seen:
                seen.append(pair)
            else:
                lastseen_up, lastseen_down = seen[-1]
                while pair[0] > lastseen_down:
                    seen.pop()
                    if not seen:
                        break
                    else:
                        lastseen_up,lastseen_down = seen[-1]
                if not seen:
                    seen.append(pair)
                    continue
                if pair[1]>lastseen_down:
                    #pseudoknot found
                    return True
                else:
                    #good pair
                    seen.append(pair)
        return False


    def hasConflicts(self):
        """Returns True if the pair list contains conflicts.

        Conflict occurs when a base has two different partners, or is asserted
        to be both paired and unpaired.
        """
        partners = {}
        for first, second in self:
            if first is None:
                if second is None:
                    continue    #no pairing info
                else:
                    first, second = second, first   #swap order so None is 2nd
            if second is None: #check first isn't paired
                if partners.get(first, None) is not None:
                    return True
                else:
                    partners[first] = None
            else:   #first and second were both non-empty: check partners
                if first in partners:
                    if partners[first] != second:
                        return True
                if second in partners:
                    if partners[second] != first:
                        return True
                #add current pair to the list of constraints
                partners[first] = second
                partners[second] = first
        #can only get here if there weren't conflicts
        return False
            
    def mismatches(self, sequence, pairs=None):
        """Counts pairs that can't form in sequence.

        Sequence must have a Pairs property that acts like a dictionary
        containing a 2-element tuple for each valid pair. Can also pass in
        the pairs explicitly.
        """
        mismatches = 0
        if pairs is None:
            try:
                pairs = sequence.Alphabet.Pairs
            except AttributeError:
                pairs = sequence.Pairs
            
        for up, down in self.directed():
            curr = (sequence[up], sequence[down])
            if curr not in pairs:
                mismatches += 1
        return mismatches
    
        
wuss_to_vienna_table = make_trans('<([{>)]} ', '(((()))) ', '.')

def wuss_to_vienna(data):
    """Converts WUSS format string to Vienna format.

    Any pseudoknots or unrecognized chars will convert to unpaired bases.
    Spaces will be preserved.
    """
    return ViennaStructure(data.translate(wuss_to_vienna_table))


class StructureString(str):
    """Base class for ViennaStructure and WussStructure. Immutable.
    
    StructureString holds a structure and a energy. By default energy is 
    set to None. 
    If you compare two StructureStrings the structure is the only important
    thing, since the energy is relative to the associated sequence.
    """
    Alphabet=None
    StartSymbols = ''      #dict of symbols that start base pairs
    EndSymbols = ''        #dict of symbols that end base pairs
    
    def __new__(cls, Structure, Energy=None):
        """Returns new StructureString."""
        a = cls.Alphabet
        if a:
            for i in Structure:
                if i not in a:
                    raise ValueError,\
                    "Tried to include unknown symbol '%s'" % i
        
        return str.__new__(cls,Structure)

    def __init__(self, Structure='', Energy=None):
        """Initializes StructureString with Structure and optionally Energy."""
        self.Energy = Energy
        self.toPartners()

    def __str__(self):
        """Returns string representaion of structure and energy, if known.

        Energy = 0 is different from Energy = None. Latter case is not printed.
        """
        if not self.Energy == None:
            return self + ' (' + str(self.Energy) + ')'
        else:
            return str.__str__(self)

    def toPartners(self):
        """Makes list containing partner of each position.
        
        Constructs a list from 0 to the number of bases, where each position
        contains the index of its pair (or None if it is unpaired).

        Note that the numbering starts at 0 for the first position.

        The algorithm here relies on the fact that any completely nested
        base-paired structure (no pseudoknots!) can be formally represented
        as a tree. Consequently, when you hit a closed base pair, you know
        that it it must pair with the last base pair you opened.
        """
        num_bases = len(self) #number of bases
        result = [None] * len(self) #array of None, one for each base
        stack = []
        start = self.StartSymbols
        end = self.EndSymbols
        for i, symbol in enumerate(self):
           if symbol in start:       #open a pair
               stack.append(i)
           elif symbol in end:     #close a pair
               curr = stack.pop()  #return and delete last element
               result[i] = curr #make i pair with the last element...
               result[curr] = i #...and the last element pair with i
               
        #test whether there are any open pairs left unaccounted for        
        if stack:
           raise IndexError, \
           "Too many open pairs in structure:\n%s" % self
        return Partners(result)

    def toPairs(self):
        """Makes list of (upstream,downstream) partners.
        
        Note that the numbering starts at 0 for the first position.
        Key will always be smaller than value.

        Result is in arbitrary order.
        """
        result = {}
        stack = []
        start = self.StartSymbols
        end = self.EndSymbols
        for i, symbol in enumerate(self):
           if symbol in start:       #open a pair
               stack.append(i)
           elif symbol in end:     #close a pair
               result[stack.pop()] = i
        #test whether there are any open pairs left unaccounted for        
        if stack:
           raise IndexError, \
           "Too many open pairs in structure:\n%s" % self
        return Pairs([(key,result[key]) for key in result])

    def toTree(self):
        """Returns tree version of structure.
        
        Each node in the tree corresponds to a loop. Runs of nodes with single
        non-leaf children correspond to stems.
        """
        root = StructureNode()
        curr_node = root
        start = self.StartSymbols
        end = self.EndSymbols
        for index, symbol in enumerate(self):
            if symbol in end:
                curr_node.End = index
                curr_node.Length = 1
                curr_node = curr_node.Parent
            else:
                new_node = StructureNode()
                new_node.Start = index
                curr_node.Children.append(new_node)
                new_node.Parent = curr_node
                if symbol in start:
                    curr_node = new_node
        return root

class ViennaStructure(StructureString):
    """Contains a Vienna dot-bracket structure, possibly with energy."""
    Alphabet = dict.fromkeys('(.)')
    StartSymbols = {'(':None}      #dict of symbols that start base pairs
    EndSymbols =   {')':None}      #dict of symbols that end base pairs
 
class WussStructure(StructureString):
    """Contains a Wuss Structure."""
    Alphabet = dict.fromkeys(
        '(<{[.~-,:_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)>}]')
    StartSymbols = dict.fromkeys('(<{[')#dict of symbols that start base pairs
    EndSymbols = dict.fromkeys(')>}]')   #dict of symbols that end base pairs
 
def Vienna(data,Energy=None):
    """Tries to extract structure and energy from string data.

    Returns (structure, energy) where energy might be None.

    structure is just anything before the first space: doesn't validate.
    """
    pieces = data.strip().split(None, 1)
    if not pieces:
        return ViennaStructure('', Energy)
    else:
        if not Energy:
            try:
                energy = float_from_string(pieces[1])
            except (TypeError, ValueError, IndexError):
                energy = Energy
        else: #energy given by user overrules the one in structure
            energy = Energy
    return ViennaStructure(pieces[0], energy)

class StructureNode(TreeNode):
    """Operations on StructureNodes, trees, and subtrees."""
    Types = ["Stem", "Loop", "Bulge", "Junction", "End", "Flexible"]

    def __init__(self, Data=None, Children=None, Parent=None):
        """Returns a new StructureNode object."""
        #coerce data into correct type
        if Data is None:
            Data = Stem()
        elif not isinstance(Data, Stem):
            Data = Stem(Data)
        #initalize as TreeNode: will forward attributes to Data
        super(StructureNode, self).__init__(None, Children, Parent)
        self.Data = Data

    def _get_stems(self):
        """Returns nodes corresponding to stems leaving self."""
        return [i for i in self if i.IsPaired]
    
    def _get_unpaired(self):
        """Returns nodes corresponding to unpaired bases leaving self."""
        return [i for i in self if not i.IsPaired]
    
    Stems = property(_get_stems)
    Unpaired = property(_get_unpaired)

    def _get_end(self):
        """Gets End from self's data."""
        return self.Data.End

    def _set_end(self, val):
        """Sets End in self's data."""
        self.Data.End = val

    def _get_start(self):
        """Gets Start from self's data."""
        return self.Data.Start
    
    def _set_start(self, val):
        """Sets Start in self's data."""
        self.Data.Start = val

    def _get_length(self):
        """Gets Length from self's data."""
        return self.Data.Length

    def _set_length(self, val):
        """Sets Length in self's data."""
        self.Data.Length = val

    def _get_index(self):
        """Accessor for index: returns position of self in parent's list."""
        if self.Parent is None:
            return None
        else:
            ids = map(id, self.Parent.Children)
            return ids.index(id(self))

    def _set_index(self, index):
        """Mutator for index: moves self to new location in parent's list.
        
        NOTE: index is relative to the new list after self is removed, not to
        the old list before self is removed.
        """
        if self.Parent is None:
            raise TreeError, "Can't set Index in node %s without parent." % self
        else:
            curr_parent = self.Parent
            curr_parent.removeNode(self)
            curr_parent.Children.insert(index, self)

    End = property(_get_end, _set_end)
    Start = property(_get_start, _set_start)
    Length = property(_get_length, _set_length)
    Index = property(_get_index, _set_index)
    

    def _is_paired(self):
        """Returns True if self is paired, false otherwise."""
        return self.End is not None

    IsPaired = property(_is_paired)

    def _get_type(self):
        """Returns type of the node, depending on stems in and out."""
        #easy cases first
        if self.Parent is None:
            return "Root"
        if self.IsPaired:
            return "Stem"
        #if grandparent is None (i.e. it is a child of the root node), either 
        #end or 'flexible' depending on whether the current node is outside
        #the first and last helices or between them.
        if self.Parent.Parent is None:
            stems = self.Parent.Stems
            if not stems:
                return "End"
            first, last = stems[0], stems[-1]
            index = self.Index
            if index < stems[0].Index or index > stems[-1].Index:
                return "End"
            else:
                return "Flexible"
        #otherwise, depends on number of stems coming out of parent: if none,
        #it's a loop, if one, it's a bulge, and otherwise it's a junction.
        else:
            out_stems = self.Parent.Stems
            if not out_stems:
                return "Loop"
            elif len(out_stems) == 1:
                return "Bulge"
            else:
                return "Junction"
        #should never get down here, since all cases are handled above
        raise ValueError, '_get_type failed on node with start %s, end %s' % \
            (self.Start, self.End)
    Type = property(_get_type)
        
    def renumber(self, start=0):
        """Renumbers self and all child nodes consecutively.
        
        Returns the next number to be used.
        """
        if self.Parent is None: #no number for root node
            curr = start
        else:
            self.Start = start
            curr = start + max(1, self.Length)  #still add 1 if it's unpaired
        for i in self:
            curr = i.renumber(curr)
        if self.IsPaired:
            curr += self.Length
            self.End = curr - 1
        return curr
            
    def classify(self, terminate=True):
        """Returns string containing site classification"""
        #check whether we're at the original node or in an internal node
        if not terminate:
            #if it's paired, need to add 'S' for self, plus handle children
            if self.IsPaired:
                result = ['S']
                for i in self:
                    result.extend(i.classify(False))
                result.append('S')
                return result
            #otherwise, it's unpaired and we need to figure out what it is
            else:
                return self.Type[0] #just want first letter
        else:   #if it's the root, just need to handle children
            result = []
            for i in self:
                result.extend(i.classify(False))
            return ''.join(result)

    def __str__(self):
        """Returns string representation of tree, in Vienna format."""
        if self.IsPaired:
            prefix = '(' * self.Length
            suffix = ')' * self.Length
            return prefix + ''.join(map(str, self)) + suffix
        elif self.Parent is None:   #root node
            return ''.join(map(str, self))
        else:                       #unpaired base
            return '.'

    def unpair(self):
        """Breaks the first pair represented by the current node, if any.

        Returns True if the node is changed, False if it wasn't.
        """
        if self.IsPaired:
            curr_idx = self.Index
            first = StructureNode(Data=Stem(self.Start))
            last = StructureNode(Data=Stem(self.End))
            
            if self.Length > 1: #not melting the whole helix
                self.Start += 1
                self.End -= 1
                self.Length -= 1
                result = [first, self, last]
            else:   #melting the whole helix
                result = [first] + self.Children + [last]
            #replace current record in parent with the result
            #note use of a slice assignment instead of an index! This is to
            #replace with the elements, not with a list of the elements.
            self.Parent[curr_idx:curr_idx+1] = result
            return True
        else:
            return False

    def _pair_before_indices(self):
        """Detects the pair of positions before the current node. 
        
        Returns a tuple of (upstream, downstream) if possible; None
        otherwise.
        """
        curr_idx = self.Index
        curr_parent = self.Parent
        #bail out if it's the first or last base
        if (curr_idx == 0) or (curr_idx == len(curr_parent) - 1):
            return None
        #bail out if the base before and after self aren't unpaired
        before = curr_parent[curr_idx - 1]
        after = curr_parent[curr_idx + 1]
        if before.IsPaired or after.IsPaired:
            return None
        return (before.Start, after.Start)

    def pairBefore(self):
        """Forms a pair before the current node, if possible.
        
        Returns True if the pair was successfully created, False otherwise.
        """
        #get the indices of the pair, if possible
        indices = self._pair_before_indices()
        if not indices:
            return False
        #cache the current index and parent
        curr_idx = self.Index
        curr_parent = self.Parent
        #create a new pair, and add self to it
        new_pair = StructureNode(Data=Stem(indices[0], indices[1], 1))
        new_pair.Children.append(self)
        self.Parent = new_pair
        #add the new pair to parent, and delete its unpaired siblings
        curr_parent.Children.insert(curr_idx, new_pair)
        new_pair.Parent = curr_parent
        del curr_parent.Children[curr_idx + 1]
        del curr_parent.Children[curr_idx - 1]
        return True

    def _pair_after_indices(self):
        """Returns indices of the pair after the current node, if possible.

        Returns tuple containing the indices if possible, None otherwise.
        """
        #can't work if one or fewer children
        if len(self) < 2:
            return None
        before = self[0]
        after = self[-1]
        if before.IsPaired or after.IsPaired:
            return None
        return (before.Start, after.Start)

    def pairAfter(self):
        """Forms a pair after the current node, if possible.

        Returns True if the pair was successfully created, False otherwise.
        Note that all children of self will become children of the newly-
        created pair.
        """
        indices = self._pair_after_indices()
        if not indices:
            return False
        #make the new pair and devolve children to it 
        new_pair = StructureNode(Data=Stem(indices[0], indices[1], 1))
        new_pair.Children[:] = self.Children[1:-1]    #all except start and end
        for c in new_pair.Children:
            c.Parent = new_pair
        self.Children[:] = [new_pair]
        new_pair.Parent = self
        return True

    def pairChildren(self, first, second):
        """Forms a pair using two of the children of self.

        first and second can be the node objects or the indices.

        Returns True if the pair was created, False otherwise.
        """
        #check that first and second are really instances of self, or
        #convert from indices
        if isinstance(first, int):
            if first >= 0:
                first_index = first
            else:
                first_index = len(self) + first
            first_node = self[first]
        else:
            first_index = first.Index
            first_node = first
        if isinstance(second, int):
            if second >= 0:
                second_index = second
            else:
                second_index = len(self) + second
            second_node = self[second]
        else:
            second_index = second.Index
            second_node = second
        #check that the nodes are really children of self, and that they're
        #not the same, and that they're not already paired
        if self is not first_node.Parent or self is not second_node.Parent:
            return False
        if first_node is second_node:
            return False
        if first_node.IsPaired or second_node.IsPaired:
            return False
        #create the new node and append the appropriate children
        new_node = StructureNode(Data=  \
            Stem(first_node.Start, second_node.Start, 1))
        del self.Children[second_index]
        new_node.Children[:] = self.Children[first_index + 1 : second_index]
        for c in new_node.Children:
            c.Parent = new_node
        self.Children[first_index] = new_node
        new_node.Parent = self
        return True

    def expand(self):
        """If self is a stem, expands self into n separate pair nodes.

        Returns True if the node was expanded, False otherwise.
        """
        if self.Length <= 1:
            return False
    
        children = self.Children[:]
        self.Children[:] = []
        curr_pair = self
        start = self.Start
        end = self.End
        for i in range(1, self.Length):
            new_pair = StructureNode(Data=Stem(start+i, end-i, 1))
            curr_pair.Children.append(new_pair)
            new_pair.Parent = curr_pair
            curr_pair = new_pair
        new_pair.Children[:] = children
        for c in new_pair.Children:
            c.Parent = new_pair
        self.Length = 1
        return True
            
    def expandAll(self):
        """Expands self and all children of self."""
        for i in self:
            i.expandAll()
        self.expand()

    def collapse(self):
        """If self is a stem, extends self with as many base pairs as possible.

        Returns True if the stem was extended, False otherwise.
        """
        #no effect if we're at the root, or if the position isn't paired
        if not self.IsPaired or self.Parent is None:
            return False
        extended = False
        while len(self) == 1 and self[0].IsPaired:
            self.Children[:] = self[0].Children[:]
            for c in self.Children:
                c.Parent = self
            self.Length += 1
            extended = True
        return extended

    def collapseAll(self):
        """Collapse self and all children of self."""
        self.collapse()
        for i in self:
            i.collapseAll()

    def breakBadPairs(self, seq):
        """Breaks all pairs in self that can't form in seq."""
        pairs = seq.Alphabet.Pairs
        self.expandAll()
        for i in self.traverse_recursive():
            if i.IsPaired:
                if not (seq[i.Start], seq[i.End]) in pairs:
                    i.unpair()

    def extendHelix(self, seq):
        """Extends helix represented by self as far as possible with seq.
        
        Assumes that helix has already been collapsed.
        """
        pairs = seq.Alphabet.Pairs
        #form as many contiguous pairs before as possible
        curr = self
        while 1:
            indices = curr._pair_before_indices()
            if indices and ((seq[indices[0]], seq[indices[1]]) in pairs):
                curr.pairBefore()
                #see if we can extend the newly-created pair
                curr = curr.Parent
            else:
                break
        #form as many contiguous pairs after as possible
        curr = self
        while 1:
            indices = curr._pair_after_indices()
            if indices and ((seq[indices[0]], seq[indices[1]]) in pairs) \
                and (curr.Stems or (len(curr.Unpaired) >= 5)):
                curr.pairAfter()
                #see if we can extend the newly-created pair
                curr = curr[0]
            else:
                break
    
    def extendHelices(self, seq):
        """Extends all helices in self and its children as far as possible."""
        for i in self.traverse_recursive():
            if i.IsPaired:
                i.extendHelix(seq)
    
    def fitSeq(self, seq):
        """Corrects the structure in self according to the specified sequence.

        seq must be a Sequence object with .Alphabet, etc.

        Note that fitSeq does not renumber the structure; it may be necessary
        to call renumber(), especially if the structure does not start at the
        beginning of the sequence.

        Assumes that it will be called starting with the root node; does not
        check backwards in the tree.
        """
        self.breakBadPairs(seq)
        self.extendHelices(seq)


def classify(struct, verbose=False):
    """Classifies a Vienna-format string into structural categories.
    
    struct may be a string or a real Vienna structure. It is tested on
    validity, because classifying invalid structures is very unreliable.
    If the number of closing brackets is larger than the number of opening
    brackets, an error would be raised, but if it's the other way around, 
    weird characters could be added to the string. For instance, classifying
    '.((.)' would give "\x00SSLS". This happens because the ends are not
    handled correctly if the stack is not back to its one-level state.
    """
    
    #Test whether structure is valid. Classifying invalid structures
    #is very unreliable.
    try:
        Vienna(struct)
    except IndexError:
        raise IndexError, "Trying to classify an invalid Vienna structure: %s"\
        %(struct)
    
    MAX_STEMS=1000

    #implement stack as three-item list
    PARENT = 0
    ITEMS = 1
    DEGREE = 2

    STEM, LOOP, BULGE, JUNCTION, END, FLEXIBLE = map(ord, 'SLBJEF')
    #WARNING: won't work if more than max_stems come off a junction
    LEVELS = [FLEXIBLE, LOOP, BULGE] + [JUNCTION]*MAX_STEMS

    length = len(struct)
    result = zeros((length,), 'B')
    stack = [None,[],0]
    curr_level = stack
    if verbose:
        print 'curr_level:', curr_level
        print 'result:', result

    for i, c in enumerate(struct):
        if verbose:
            print 'pos, char:',i,c
        #open parens add new level to stack
        if c == '(':
            curr_level = [curr_level,[],1]
            result[i] = STEM
        #unpaired base gets appended to current level
        elif c == '.':
            curr_level[ITEMS].append(i)
        #closed parens subtract level from stack and assign state
        elif c == ')':   #note: will handle end separately
            result[i] = STEM
            put(result, curr_level[ITEMS], LEVELS[curr_level[DEGREE]])
            curr_level = curr_level[PARENT]
            curr_level[DEGREE] += 1
        if verbose:
            print 'curr_level:', curr_level
            print 'result', result
            
    #handle ends and flexible bases
    end_items = curr_level[ITEMS]
    if end_items:
        first_start = struct.find('(')
        if first_start == -1:
            first_start = length+1
        last_end = struct.rfind(')')
        put(result, [i for i in end_items if first_start<i<last_end], FLEXIBLE)
        put(result, [i for i in end_items if not first_start<i<last_end], END)
    return result.tostring()