/var/lib/mobyle/programs/clustalw-multialign.xml

<?xml version='1.0' encoding='UTF-8'?>
<!-- XML Authors: Corinne Maufrais, Nicolas Joly and Bertrand Neron,             -->
<!-- 'Biological Software and Databases' Group, Institut Pasteur, Paris.         -->
<!-- Distributed under LGPLv2 License. Please refer to the COPYING.LIB document. -->
<program>
  <head>
    <name>clustalw-multialign</name>
    <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Entities/ClustalW_package.xml"/>
    <doc>
      <title>Clustalw: Multiple alignment</title>
      <description>
        <text lang="en">Do full multiple alignment</text>
      </description>
    </doc>
    <category>alignment:multiple</category>
    <command>clustalw -align </command>
  </head>
  <parameters>
    <paragraph>
      <name>input</name>
      <prompt lang="en">Data Input</prompt>
      <parameters>
    
    
    <parameter ismandatory="1" issimple="1" ismaininput="1">
      <name>sequences_input</name>
      <prompt lang="en">Sequences File ( a file containing several sequences ) (-infile)</prompt>
      <precond>
              <code proglang="perl">not $alignment_input or ($sequences_input and $alignment_input)</code>
             <code proglang="python">not alignment_input or (sequences_input and alignment_input)</code>
          </precond>
      <type>
        <datatype>
          <class>Sequence</class>
        </datatype>
        <dataFormat>FASTA</dataFormat>
        <dataFormat>NBRF</dataFormat>
        <dataFormat>EMBL</dataFormat>
        <dataFormat>GCG</dataFormat>
        <dataFormat>GDE</dataFormat>
        <dataFormat>SWISSPROT</dataFormat>
        <card>2,n</card>
      </type>
      <format>
        <code proglang="perl">" -infile=$value"</code>
        <code proglang="python">" -infile=" + str( value )</code>
      </format>
      <ctrl>
            <message>
              <text lang="en">Can not handle both Sequence and Alignment at the same time</text>
            </message>
            <code proglang="perl">not $alignment_input</code>
            <code proglang="python">not alignment_input</code>
          </ctrl>
      <argpos>1</argpos>
    </parameter>
    
            <parameter ismandatory="1" issimple="1" ismaininput="1">
          <name>alignment_input</name>
          <prompt lang="en">Aligned sequences</prompt>
          <precond>
        <code proglang="perl">not $sequences_input or ($sequences_input and $alignment_input)</code>
        <code proglang="python">not sequences_input or (sequences_input and alignment_input)</code>
      </precond>
          <type>
          <biotype>Protein</biotype>
          <biotype>DNA</biotype>
            <datatype>
              <class>Alignment</class>
            </datatype>
            <dataFormat>CLUSTAL</dataFormat>
            <dataFormat>FASTA</dataFormat>
            <card>1</card>
          </type>
          <format>
            <code proglang="perl">" -infile=$value"</code>
            <code proglang="python">" -infile=" + str( value )</code>
          </format>
          <ctrl>
            <message>
              <text lang="en">Can not handle both Sequence and Alignment at the same time</text>
            </message>
            <code proglang="perl">not $sequences_input</code>
            <code proglang="python">not sequences_input</code>
          </ctrl>
          <comment>
          <text lang="en"> When the sequences are aligned (all sequences
              have the same length and at least one sequence has at least one
              gap)</text>
          </comment>
        </parameter>
    
     </parameters>
    </paragraph>
    
    <paragraph>
      <name>general</name>
      <prompt lang="en">General settings</prompt>
      <argpos>2</argpos>
      <parameters>
        <parameter ismandatory="1" issimple="1">
          <name>quicktree</name>
          <prompt lang="en">Toggle Slow/Fast pairwise alignments (-quicktree)</prompt>
          <type>
            <datatype>
              <class>Choice</class>
            </datatype>
          </type>
          <vdef>
            <value>slow</value>
          </vdef>
          <vlist>
            <velem>
              <value>slow</value>
              <label>Slow</label>
            </velem>
            <velem>
              <value>fast</value>
              <label>Fast</label>
            </velem>
          </vlist>
          <format>
            <code proglang="perl">($value eq "fast") ? " -quicktree" : ""</code>
            <code proglang="python">( "" , " -quicktree")[ value == "fast"]</code>
          </format>
          <comment>
            <text lang="en">slow: by dynamic programming (slow but accurate)</text>
            <text lang="en">fast: method of Wilbur and Lipman (extremely fast but approximate)</text>
          </comment>
        </parameter>
        <parameter>
          <name>typeseq</name>
          <prompt lang="en">Protein or DNA (-type)</prompt>
          <type>
            <datatype>
              <class>Choice</class>
            </datatype>
          </type>
          <vdef>
            <value>auto</value>
          </vdef>
          <vlist>
            <velem undef="1">
              <value>auto</value>
              <label>Automatic</label>
            </velem>
            <velem>
              <value>protein</value>
              <label>Protein</label>
            </velem>
            <velem>
              <value>dna</value>
              <label>DNA</label>
            </velem>
          </vlist>
          <format>
            <code proglang="perl">(defined $value) ? " -type=$value" : ""</code>
            <code proglang="python">("", " -type="+str(value))[value is not None]</code>
          </format>
        </parameter>
      </parameters>
    </paragraph>
    <paragraph>
      <name>multalign</name>
      <prompt lang="en">Multiple Alignments parameters</prompt>
      <argpos>3</argpos>
      <comment>
        <text lang="en">Multiple alignments are carried out in 3 stages :</text>
        <text lang="en">1) all sequences are compared to each other (pairwise alignments);</text>
        <text lang="en">2) a dendrogram (like a phylogenetic tree) is constructed, describing the approximate groupings of the sequences by similarity (stored in a file).</text>
        <text lang="en">3) the final multiple alignment is carried out, using the dendrogram as a guide.</text>
        <text lang="en">Pairwise alignment parameters control the speed/sensitivity of the initial alignments.</text>
        <text lang="en">Multiple alignment parameters control the gaps in the final multiple alignments.</text>
      </comment>
      <parameters>
        <parameter>
          <name>gapopen</name>
          <prompt lang="en">Gap opening penalty (-gapopen)</prompt>
          <type>
            <datatype>
              <class>Float</class>
            </datatype>
          </type>
          <vdef>
            <value>10.00</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -gapopen=$value" : ""</code>
            <code proglang="python">( "" , " -gapopen=" + str( value ))[ value is not None and value != vdef ]</code>
          </format>
        </parameter>
        <parameter>
          <name>gapext</name>
          <prompt lang="en">Gap extension penalty (-gapext)</prompt>
          <type>
            <datatype>
              <class>Float</class>
            </datatype>
          </type>
          <vdef>
            <value>0.20</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -gapext=$value" : ""</code>
            <code proglang="python">( "" , " -gapext=" + str( value ))[ value is not None and value != vdef ]</code>
          </format>
        </parameter>
        <parameter>
          <name>endgaps</name>
          <prompt lang="en">No end gap separation penalty (-endgaps)</prompt>
          <type>
            <datatype>
              <class>Boolean</class>
            </datatype>
          </type>
          <vdef>
            <value>0</value>
          </vdef>
          <format>
            <code proglang="perl">($value) ? " -endgaps" : ""</code>
            <code proglang="python">( "" ," -endgaps" )[ value ]</code>
          </format>
          <comment>
            <text lang="en">End gap separation treats end gaps just
		like internal gaps for the purposes of avoiding gaps that
		are too close (set by GAP SEPARATION DISTANCE above). If
		you turn this off, end gaps will be ignored for this
		purpose. This is useful when you wish to align fragments
		where the end gaps are not biologically meaningful.</text>
          </comment>
        </parameter>
        <parameter>
          <name>gapdist</name>
          <prompt lang="en">Gap separation penalty range (-gapdist)</prompt>
          <type>
            <datatype>
              <class>Integer</class>
            </datatype>
          </type>
          <vdef>
            <value>8</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -gapdist=$value" : ""</code>
            <code proglang="python">( "" , " -gapdist=" + str( value ))[ value is not None and value != vdef]</code>
          </format>
          <comment>
            <text lang="en">Gap separation distance tries to decrease
		the chances of gaps being too close to each other. Gaps
		that are less than this distance apart are penalised more
		than other gaps. This does not prevent close gaps; it makes
		them less frequent, promoting a block-like appearance of
		the alignment.</text>
          </comment>
        </parameter>
        <parameter>
          <name>maxdiv</name>
          <prompt lang="en">Delay divergent sequences : % ident. for delay (-maxdiv)</prompt>
          <type>
            <datatype>
              <class>Integer</class>
            </datatype>
          </type>
          <vdef>
            <value>30</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -maxdiv=$value" : ""</code>
            <code proglang="python">( "" , " -maxdiv=" + str( value ))[ value is not None and value != vdef ]</code>
          </format>
          <comment>
            <text lang="en">Delays the alignment of the most distantly
		related sequences until after the most closely related
		sequences have been aligned. The setting shows the percent
		identity level required to delay the addition of a
		sequence; sequences that are less identical than this level
		to any other sequences will be aligned later.</text>
          </comment>
        </parameter>
        <parameter>
          <name>newtree</name>
          <prompt lang="en">File for new guide tree (-newtree)</prompt>
          <type>
            <datatype>
              <class>Filename</class>
            </datatype>
          </type>
          <format>
            <code proglang="perl">(defined $value) ? " -newtree=$value" : ""</code>
            <code proglang="python">( "" , " -newtree=" + str( value ))[value is not None]</code>
          </format>
        </parameter>
        <parameter isout="1">
          <name>newtreefile</name>
          <prompt>Output tree</prompt>
          <type>
            <datatype>
              <class>Tree</class>
            </datatype>
            <dataFormat>NEWICK</dataFormat>
          </type>
          <precond>
            <code proglang="perl">defined $newtree</code>
            <code proglang="python">newtree is not None</code>
          </precond>
          <filenames>
            <code proglang="perl">$newtree</code>
            <code proglang="python">newtree</code>
          </filenames>
        </parameter>
        <parameter>
          <name>usetree</name>
          <prompt lang="en">File for old guide tree (-usetree)</prompt>
          <type>
            <datatype>
              <class>Tree</class>
            </datatype>
            <dataFormat>NEWICK</dataFormat>
          </type>
          <format>
            <code proglang="perl">(defined $value) ? " -usetree=$value" : ""</code>
            <code proglang="python">( "" ," -usetree=" + str( value ))[value is not None]</code>
          </format>
          <comment>
            <text lang="en">You can give a previously computed tree (.dnd file) - on the same data</text>
          </comment>
        </parameter>
        <paragraph>
          <name>multalign_prot</name>
          <prompt lang="en">Protein parameters</prompt>
          <precond>
            <code proglang="perl">$typeseq eq "protein"</code>
            <code proglang="python">typeseq == "protein"</code>
          </precond>
          <parameters>
            <parameter>
              <name>matrix</name>
              <prompt lang="en">Protein weight matrix (-matrix)</prompt>
              <type>
                <datatype>
                  <class>Choice</class>
                </datatype>
              </type>
              <vdef>
                <value>gonnet</value>
              </vdef>
              <vlist>
                <velem>
                  <value>gonnet</value>
                  <label>Gonnet series</label>
                </velem>
                <velem>
                  <value>blosum</value>
                  <label>BLOSUM series</label>
                </velem>
                <velem>
                  <value>pam</value>
                  <label>PAM series</label>
                </velem>
                <velem>
                  <value>id</value>
                  <label>Identity matrix</label>
                </velem>
              </vlist>
              <format>
                <code proglang="perl">(defined $value and $value ne $vdef) ? " -matrix=$value" : ""</code>
                <code proglang="python">("", " -matrix="+str(value))[value is not None and value!=vdef]</code>
              </format>
              <comment>
                <text lang="en">There are three 'in-built' series of weight
		matrices offered. Each consists of several matrices which
		work differently at different evolutionary distances. To
		see the exact details, read the documentation. Crudely, we
		store several matrices in memory, spanning the full range
		of amino acid distance (from almost identical sequences to
		highly divergent ones). For very similar sequences, it is
		best to use a strict weight matrix which only gives a high
		score to identities and the most favoured conservative
		substitutions. For more divergent sequences, it is
		appropriate to use 'softer' matrices which give a high
		score to many other frequent substitutions.</text>
                <text lang="en">BLOSUM (Henikoff). These matrices appear to
		be the best available for carrying out data base similarity
		(homology searches). The matrices used are: Blosum80, 62,
		40 and 30.</text>
                <text lang="en">The Gonnet Pam 250 matrix has been reported
		as the best single matrix for alignment, if you only choose
		one matrix. Our experience with profile database searches
		is that the Gonnet series is unambiguously superior to the
		Blosum series at high divergence. However, we did not get
		the series to perform systematically better than the Blosum
		series in Clustal W (communication of the authors).</text>
                <text lang="en">PAM (Dayhoff). These have been extremely
		widely used since the late '70s. We use the PAM 120, 160,
		250 and 350 matrices.</text>
              </comment>
            </parameter>
            <parameter>
              <name>negative</name>
              <prompt lang="en">Negative values in matrix ? (-negative)</prompt>
              <type>
                <datatype>
                  <class>Boolean</class>
                </datatype>
              </type>
              <vdef>
                <value>0</value>
              </vdef>
              <format>
                <code proglang="perl">($value) ? " -negative" : ""</code>
                <code proglang="python">( "" , " -negative" )[ value ]</code>
              </format>
            </parameter>
            <parameter>
              <name>pgap</name>
              <prompt lang="en">Residue specific gaps off (-nopgap)</prompt>
              <type>
                <datatype>
                  <class>Boolean</class>
                </datatype>
              </type>
              <vdef>
                <value>1</value>
              </vdef>
              <format>
                <code proglang="perl">($value) ? " -nopgap" : ""</code>
                <code proglang="python">( "" , " -nopgap" )[ value ]</code>
              </format>
              <comment>
                <text lang="en">Residue specific penalties are amino acid
		specific gap penalties that reduce or increase the gap
		opening penalties at each position in the alignment or
		sequence. As an example, positions that are rich in glycine
		are more likely to have an adjacent gap than positions that
		are rich in valine.</text>
                <text lang="en">Table of residue specific gap modification
		factors:</text>
                <text lang="en">A 1.13 M 1.29</text>
                <text lang="en">C 1.13 N 0.63</text>
                <text lang="en">D 0.96 P 0.74</text>
                <text lang="en">E 1.31 Q 1.07</text>
                <text lang="en">F 1.20 R 0.72</text>
                <text lang="en">G 0.61 S 0.76</text>
                <text lang="en">H 1.00 T 0.89</text>
                <text lang="en">I 1.32 V 1.25</text>
                <text lang="en">K 0.96 Y 1.00</text>
                <text lang="en">L 1.21 W 1.23</text>
                <text lang="en">The values are normalised around a mean value of 1.0 for H. The lower the value, the greater the chance of having an adjacent gap. These are derived from the original table of relative frequencies of gaps adjacent to each residue (12) by subtraction from 2.0.</text>
              </comment>
            </parameter>
            <parameter>
              <name>hgap</name>
              <prompt lang="en">Hydrophilic gaps off (-nohgap)</prompt>
              <type>
                <datatype>
                  <class>Boolean</class>
                </datatype>
              </type>
              <vdef>
                <value>1</value>
              </vdef>
              <format>
                <code proglang="perl">($value) ? " -nohgap" : ""</code>
                <code proglang="python">( "" , " -nohgap" )[ value ]</code>
              </format>
              <comment>
                <text lang="en">Hydrophilic gap penalties are used to
		increase the chances of a gap within a run (5 or more
		residues) of hydrophilic amino acids; these are likely to
		be loop or random coil regions where gaps are more
		common. The residues that are 'considered' to be
		hydrophilic are set by menu item 3.</text>
              </comment>
            </parameter>
            <parameter>
              <name>hgapresidues</name>
              <prompt lang="en">Hydrophilic residues list (-hgapresidues)</prompt>
              <type>
                <datatype>
                  <class>MultipleChoice</class>
                </datatype>
              </type>
              <vdef>
                <value>R</value>
                <value>N</value>
                <value>D</value>
                <value>Q</value>
                <value>E</value>
                <value>G</value>
                <value>K</value>
                <value>P</value>
                <value>S</value>
              </vdef>
              <vlist>
                <velem>
                  <value>A</value>
                  <label>A</label>
                </velem>
                <velem>
                  <value>R</value>
                  <label>R</label>
                </velem>
                <velem>
                  <value>N</value>
                  <label>N</label>
                </velem>
                <velem>
                  <value>D</value>
                  <label>D</label>
                </velem>
                <velem>
                  <value>C</value>
                  <label>C</label>
                </velem>
                <velem>
                  <value>Q</value>
                  <label>Q</label>
                </velem>
                <velem>
                  <value>E</value>
                  <label>E</label>
                </velem>
                <velem>
                  <value>G</value>
                  <label>G</label>
                </velem>
                <velem>
                  <value>H</value>
                  <label>H</label>
                </velem>
                <velem>
                  <value>I</value>
                  <label>I</label>
                </velem>
                <velem>
                  <value>L</value>
                  <label>L</label>
                </velem>
                <velem>
                  <value>K</value>
                  <label>K</label>
                </velem>
                <velem>
                  <value>M</value>
                  <label>M</label>
                </velem>
                <velem>
                  <value>F</value>
                  <label>F</label>
                </velem>
                <velem>
                  <value>P</value>
                  <label>P</label>
                </velem>
                <velem>
                  <value>S</value>
                  <label>S</label>
                </velem>
                <velem>
                  <value>T</value>
                  <label>T</label>
                </velem>
                <velem>
                  <value>W</value>
                  <label>W</label>
                </velem>
                <velem>
                  <value>Y</value>
                  <label>Y</label>
                </velem>
                <velem>
                  <value>V</value>
                  <label>V</label>
                </velem>
              </vlist>
              <separator/>
              <format>
                <code proglang="perl">($value and $value ne $vdef) ? " -hgapresidues=\\"$value\\"" : ""</code>
                <code proglang="python">( '' , ' -hgapresidues="%s"' % str(value) )[ value and value != vdef ]</code>
              </format>
            </parameter>
          </parameters>
        </paragraph>
        <paragraph>
          <name>multalign_dna</name>
          <prompt lang="en">DNA parameters</prompt>
          <precond>
            <code proglang="perl">$typeseq eq "dna"</code>
            <code proglang="python">typeseq == "dna"</code>
          </precond>
          <parameters>
            <parameter>
              <name>dnamatrix</name>
              <prompt lang="en">DNA weight matrix (-dnamatrix)</prompt>
              <type>
                <datatype>
                  <class>Choice</class>
                </datatype>
              </type>
              <vdef>
                <value>iub</value>
              </vdef>
              <vlist>
                <velem>
                  <value>iub</value>
                  <label>IUB</label>
                </velem>
                <velem>
                  <value>clustalw</value>
                  <label>CLUSTALW</label>
                </velem>
              </vlist>
              <format>
                <code proglang="perl">(defined $value and $value ne $vdef) ? " -dnamatrix=$value" : ""</code>
                <code proglang="python">("", " -dnamatrix=" + str(value))[value is not None and value!=vdef]</code>
              </format>
              <comment>
                <text lang="en">1) IUB. This is the default scoring matrix
		used by BESTFIT for the comparison of nucleic acid
		sequences. X's and N's are treated as matches to any IUB
		ambiguity symbol. All matches score 1.9; all mismatches for
		IUB symbols score 0.</text>
                <text lang="en">2) CLUSTALW(1.6). The previous system used
		by ClustalW, in which matches score 1.0 and mismatches
		score 0. All matches for IUB symbols also score 0.</text>
              </comment>
            </parameter>
            <parameter>
              <name>transweight</name>
              <prompt lang="en">Transitions weight (between 0 and 1) (-transweight)</prompt>
              <type>
                <datatype>
                  <class>Float</class>
                </datatype>
              </type>
              <vdef>
                <value>0.5</value>
              </vdef>
              <format>
                <code proglang="perl">(defined $value and $value != $vdef) ? " -transweight=$value" : ""</code>
                <code proglang="python">( "" , " -transweight=" + str( value ) )[ value is not None and value != vdef ]</code>
              </format>
              <comment>
                <text lang="en">A weight of
		zero means that the transitions are scored as mismatches; a
		weight of 1 gives transitions the full match score. For
		distantly related DNA sequences, the weight should be near
		to zero; for closely related sequences it can be useful to
		assign a higher score.</text>
              </comment>
            </parameter>
          </parameters>
        </paragraph>
      </parameters>
    </paragraph>
    <paragraph>
      <name>fastpw</name>
      <prompt lang="en">Fast Pairwise Alignments parameters</prompt>
      <precond>
        <code proglang="perl">$quicktree eq "fast"</code>
        <code proglang="python">quicktree == "fast"</code>
      </precond>
      <argpos>4</argpos>
      <comment>
        <text lang="en">These similarity scores are calculated from fast,
	  approximate, global alignments, which are controlled by 4
	  parameters. 2 techniques are used to make these alignments very
	  fast: 1) only exactly matching fragments (k-tuples) are
	  considered; 2) only the 'best' diagonals (the ones with most
	  k-tuple matches) are used.</text>
      </comment>
      <parameters>
        <parameter>
          <name>ktuple</name>
          <prompt lang="en">Word size (-ktuple)</prompt>
          <type>
            <datatype>
              <class>Integer</class>
            </datatype>
          </type>
          <vdef>
            <value>1</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -ktuple=$value" : ""</code>
            <code proglang="python">( "" , " -ktuple=" + str( value ) )[value is not None and value != vdef ]</code>
          </format>
          <argpos>2</argpos>
          <comment>
            <text lang="en">K-TUPLE SIZE: This is the size of exactly matching fragment that is used. INCREASE for speed (max= 2 for proteins; 4 for DNA), DECREASE for sensitivity. For longer sequences (e.g. &gt;1000 residues) you may need to increase the default.</text>
          </comment>
        </parameter>
        <parameter>
          <name>topdiags</name>
          <prompt lang="en">Number of best diagonals (-topdiags)</prompt>
          <type>
            <datatype>
              <class>Integer</class>
            </datatype>
          </type>
          <vdef>
            <value>5</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -topdiags=$value" : ""</code>
            <code proglang="python">( "" , " -topdiags=" + str( value ))[value is not None and value != vdef ]</code>
          </format>
          <comment>
            <text lang="en">The number of k-tuple matches on each
		diagonal (in an imaginary dot-matrix plot) is
		calculated. Only the best ones (with most matches) are used
		in the alignment. This parameter specifies how
		many. Decrease for speed; increase for sensitivity.</text>
          </comment>
        </parameter>
        <parameter>
          <name>window</name>
          <prompt lang="en">Window around best diags (-window)</prompt>
          <type>
            <datatype>
              <class>Integer</class>
            </datatype>
          </type>
          <vdef>
            <value>5</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -window=$value" : ""</code>
            <code proglang="python">( "" , " -window=" + str( value ) )[ value is not None and value != vdef ]</code>
          </format>
          <comment>
            <text lang="en">WINDOW SIZE: This is the number of
		diagonals around each of the 'best' diagonals that will be
		used. Decrease for speed; increase for sensitivity</text>
          </comment>
        </parameter>
        <parameter>
          <name>pairgap</name>
          <prompt lang="en">Gap penalty (-pairgap)</prompt>
          <type>
            <datatype>
              <class>Float</class>
            </datatype>
          </type>
          <vdef>
            <value>3</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -pairgap=$value" : ""</code>
            <code proglang="python">( "" , " -pairgap=" + str( value ))[ value is not None and value != vdef ]</code>
          </format>
          <comment>
            <text lang="en">This is a penalty for each gap in the fast
		alignments. It has little affect on the speed or
		sensitivity except for extreme values.</text>
          </comment>
        </parameter>
        <parameter>
          <name>score</name>
          <prompt lang="en">Percent or absolute score ? (-score)</prompt>
          <type>
            <datatype>
              <class>Choice</class>
            </datatype>
          </type>
          <vdef>
            <value>percent</value>
          </vdef>
          <vlist>
            <velem>
              <value>percent</value>
              <label>Percent</label>
            </velem>
            <velem>
              <value>absolute</value>
              <label>Absolute</label>
            </velem>
          </vlist>
          <format>
            <code proglang="perl">(defined $value and $value ne $vdef) ? " -score=$value" : ""</code>
            <code proglang="python">( "" , " -score=" +str( value ) )[value is not None or value != vdef]</code>
          </format>
        </parameter>
      </parameters>
    </paragraph>
    <paragraph>
      <name>slowpw</name>
      <prompt lang="en">Slow Pairwise Alignments parameters</prompt>
      <precond>
        <code proglang="perl">$quicktree eq "slow"</code>
        <code proglang="python">quicktree == "slow"</code>
      </precond>
      <argpos>4</argpos>
      <comment>
        <text lang="en">These parameters do not have any affect on the
	  speed of the alignments. They are used to give initial alignments
	  which are then rescored to give percent identity scores. These %
	  scores are the ones which are displayed on the screen. The scores
	  are converted to distances for the trees.</text>
      </comment>
      <parameters>
        <parameter>
          <name>pwgapopen</name>
          <prompt lang="en">Gap opening penalty (-pwgapopen)</prompt>
          <type>
            <datatype>
              <class>Float</class>
            </datatype>
          </type>
          <vdef>
            <value>10.00</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -pwgapopen=$value" : ""</code>
            <code proglang="python">( "" , " -pwgapopen=" + str( value ) )[ value is not None and value != vdef ]</code>
          </format>
        </parameter>
        <parameter>
          <name>pwgapext</name>
          <prompt lang="en">Gap extension penalty (-pwgapext)</prompt>
          <type>
            <datatype>
              <class>Float</class>
            </datatype>
          </type>
          <vdef>
            <value>0.10</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -pwgapext=$value" : ""</code>
            <code proglang="python">( "" , " -pwgapext=" + str( value ) )[ value is not None and value != vdef ]</code>
          </format>
        </parameter>
        <paragraph>
          <name>slowpw_prot</name>
          <prompt lang="en">Protein parameters</prompt>
          <precond>
            <code proglang="perl">$typeseq eq "protein"</code>
            <code proglang="python">typeseq == "protein"</code>
          </precond>
          <parameters>
            <parameter>
              <name>pwmatrix</name>
              <prompt lang="en">Protein weight matrix (-pwmatrix)</prompt>
              <type>
                <datatype>
                  <class>Choice</class>
                </datatype>
              </type>
              <vdef>
                <value>gonnet</value>
              </vdef>
              <vlist>
                <velem>
                  <value>blosum</value>
                  <label>BLOSUM30 (Henikoff)</label>
                </velem>
                <velem>
                  <value>gonnet</value>
                  <label>Gonnet 250</label>
                </velem>
                <velem>
                  <value>pam</value>
                  <label>PAM350 (Dayhoff)</label>
                </velem>
                <velem>
                  <value>id</value>
                  <label>Identity matrix</label>
                </velem>
              </vlist>
              <format>
                <code proglang="perl">(defined $value and $value ne $vdef) ? " -pwmatrix=$value" : ""</code>
                <code proglang="python">( "" , " -pwmatrix=" + str(value) )[value is not None and value != vdef ]</code>
              </format>
              <comment>
                <text lang="en">The scoring table which describes the
		similarity of each amino acid to each other. For DNA, an
		identity matrix is used.</text>
                <text lang="en">BLOSUM (Henikoff). These matrices appear to
		be the best available for carrying out data base similarity
		(homology searches). The matrices used are: Blosum80, 62,
		40 and 30.</text>
                <text lang="en">The Gonnet Pam 250 matrix has been reported
		as the best single matrix for alignment, if you only choose
		one matrix. Our experience with profile database searches
		is that the Gonnet series is unambiguously superior to the
		Blosum series at high divergence. However, we did not get
		the series to perform systematically better than the Blosum
		series in Clustal W (communication of the authors).</text>
                <text lang="en">PAM (Dayhoff). These have been extremely
		widely used since the late '70s. We use the PAM 120, 160,
		250 and 350 matrices.</text>
              </comment>
            </parameter>
          </parameters>
        </paragraph>
        <paragraph>
          <name>slowpw_dna</name>
          <prompt lang="en">DNA parameters</prompt>
          <precond>
            <code proglang="perl">$typeseq eq "dna"</code>
            <code proglang="python">typeseq == "dna"</code>
          </precond>
          <parameters>
            <parameter>
              <name>pwdnamatrix</name>
              <prompt lang="en">DNA weight matrix (-pwdnamatrix)</prompt>
              <type>
                <datatype>
                  <class>Choice</class>
                </datatype>
              </type>
              <vdef>
                <value>iub</value>
              </vdef>
              <vlist>
                <velem>
                  <value>iub</value>
                  <label>IUB</label>
                </velem>
                <velem>
                  <value>clustalw</value>
                  <label>CLUSTALW</label>
                </velem>
              </vlist>
              <format>
                <code proglang="perl">(defined $value and $value ne $vdef) ? " -pwdnamatrix=$value" : ""</code>
                <code proglang="python">( "" , " -pwdnamatrix=" + str(value) )[ value is not None and value != vdef ]</code>
              </format>
              <comment>
                <text lang="en">For DNA, a single matrix (not a series) is
		used. Two hard-coded matrices are available:</text>
                <text lang="en">1) IUB. This is the default scoring matrix
		used by BESTFIT for the comparison of nucleic acid
		sequences. X's and N's are treated as matches to any IUB
		ambiguity symbol. All matches score 1.9; all mismatches for
		IUB symbols score 0.</text>
                <text lang="en">2) CLUSTALW(1.6). The previous system used
		by ClustalW, in which matches score 1.0 and mismatches
		score 0. All matches for IUB symbols also score 0.</text>
              </comment>
            </parameter>
          </parameters>
        </paragraph>
      </parameters>
    </paragraph>
    <paragraph>
      <name>outputparam</name>
      <prompt lang="en">Output parameters</prompt>
      <argpos>5</argpos>
      <parameters>
        <parameter>
          <name>outputformat</name>
          <prompt lang="en">Output format (-output)</prompt>
          <type>
            <datatype>
              <class>Choice</class>
            </datatype>
          </type>
          <vdef>
            <value>null</value>
          </vdef>
          <vlist>
            <velem undef="1">
              <value>null</value>
              <label>CLUSTAL</label>
            </velem>
            <velem>
              <value>FASTA</value>
              <label>FASTA</label>
            </velem>
            <velem>
              <value>GCG</value>
              <label>GCG</label>
            </velem>
            <velem>
              <value>GDE</value>
              <label>GDE</label>
            </velem>
            <velem>
              <value>PHYLIPI</value>
              <label>PHYLIP</label>
            </velem>
            <velem>
              <value>PIR</value>
              <label>PIR/NBRF</label>
            </velem>
            <velem>
              <value>NEXUS</value>
              <label>NEXUS</label>
            </velem>
          </vlist>
          <format>
            <code proglang="perl">(defined $value ) ? " -output=$value" : ""</code>
            <code proglang="python">( "" , " -output=" + str( value) )[ value is not None ]</code>
          </format>
        </parameter>
        <parameter>
          <name>seqnos</name>
          <prompt lang="en">Output sequence numbers in the output file (for clustalw output only) (-seqnos)</prompt>
          <type>
            <datatype>
              <class>Boolean</class>
            </datatype>
          </type>
          <precond>
            <code proglang="perl">not defined $outputformat</code>
            <code proglang="python">outputformat is None</code>
          </precond>
          <vdef>
            <value>0</value>
          </vdef>
          <format>
            <code proglang="perl">(defined $value and $value != $vdef) ? " -seqnos=on" : ""</code>
            <code proglang="python">( "" , " -seqnos=on")[ value is not None and value != vdef]</code>
          </format>
        </parameter>
        <parameter>
          <name>outorder</name>
          <prompt lang="en">Result order (-outorder)</prompt>
          <type>
            <datatype>
              <class>Choice</class>
            </datatype>
          </type>
          <vdef>
            <value>aligned</value>
          </vdef>
          <vlist>
            <velem>
              <value>input</value>
              <label>Input</label>
            </velem>
            <velem>
              <value>aligned</value>
              <label>Aligned</label>
            </velem>
          </vlist>
          <format>
            <code proglang="perl">(defined $value and $value ne $vdef) ? " -outorder=$value" : ""</code>
            <code proglang="python">( "" , " -outorder=" + str(value))[ value is not None and value != vdef ]</code>
          </format>
        </parameter>
        <parameter>
          <name>outfile</name>
          <prompt lang="en">Sequence alignment file name (-outfile)</prompt>
          <type>
            <datatype>
              <class>Filename</class>
            </datatype>
          </type>
          <format>
            <code proglang="perl">(defined $value) ? " -outfile=$value" : ""</code>
            <code proglang="python">( "" , " -outfile=" + str( value))[ value is not None ]</code>
          </format>
        </parameter>
        <parameter isout="1">
          <name>clustalaligfile</name>
          <prompt>Alignment file</prompt>
          <type>
            <datatype>
              <class>Alignment</class>
            </datatype>
            <dataFormat>CLUSTAL</dataFormat>
          </type>
          <precond>
            <code proglang="perl">not defined $outputformat</code>
            <code proglang="python">outputformat is None</code>
          </precond>
          <filenames>
            <code proglang="perl">(defined $outfile)? "$outfile":"*.aln"</code>
            <code proglang="python">("*.aln", str(outfile))[outfile is not None]</code>
          </filenames>
          <comment>
            <text lang="en">In the conservation line output in the clustal format alignment file, three characters are used:</text>
            <text lang="en">'*' indicates positions which have a single, fully conserved residue.</text>
            <text lang="en">':' indicates that one of the following 'strong' groups is fully conserved (STA,NEQK,NHQK,NDEQ,QHRK,MILV,MILF,HY,FYW).</text>
            <text lang="en">'.' indicates that one of the following 'weaker' groups is fully conserved (CSA,ATV,SAG,STNK,STPA,SGND,SNDEQK,NDEQHK,NEQHRK,FVLIM,HFY).</text>
            <text lang="en">These are all the positively scoring groups that occur in the Gonnet Pam250
matrix. The strong and weak groups are defined as strong score &gt;0.5 and weak
score =&lt;0.5 respectively.</text>
          </comment>
        </parameter>
        <parameter isout="1">
          <name>aligfile</name>
          <prompt>Alignment file</prompt>
          <type>
            <datatype>
              <class>Alignment</class>
            </datatype>
            <dataFormat>
              <ref param="outputformat"/>
            </dataFormat>
          </type>
          <precond>
            <code proglang="perl">$outputformat =~ /^(NEXUS|GCG|PHYLIPI|FASTA)$/</code>
            <code proglang="python">outputformat in [ "NEXUS", "GCG", "PHYLIPI","FASTA"]</code>
          </precond>
          <filenames>
            <code proglang="perl">(defined $outfile)? "$outfile":"*.fasta *.nxs *.phy *.msf"</code>
            <code proglang="python">{ "OUTFILE":outfile, "FASTA":"*.fasta", "NEXUS": "*.nxs", "PHYLIPI": "*.phy" , 'GCG': '*.msf' }[( "OUTFILE", outputformat)[outfile is None]]</code>
          </filenames>
        </parameter>
        
        <parameter isout="1">
          <name>seqfile</name>
          <prompt>Sequences file</prompt>
          <type>
            <datatype>
              <class>Sequence</class>
            </datatype>
            <dataFormat>
            	<test param="outputformat" eq="PIR">NBRF</test>
                <test param="outputformat" eq="GDE">GDE</test>
            </dataFormat>
          </type>
          <precond>
            <code proglang="perl">$outputformat =~ /^(GDE|PIR)$/</code>
            <code proglang="python">outputformat in [ 'GDE', 'PIR' ]</code>
          </precond>
          <filenames>
            <code proglang="perl">(defined $outfile)? "$outfile":"*.gde *.pir"</code>
            <code proglang="python">{ "OUTFILE":outfile,  'GDE':'*.gde', 'PIR':'*.pir}[( "OUTFILE", outputformat)[outfile is None]]</code>
          </filenames>
        </parameter>
        <parameter isout="1">
          <name>dndfile</name>
          <prompt>Tree file</prompt>
          <type>
            <datatype>
              <class>Tree</class>
            </datatype>
            <dataFormat>NEWICK</dataFormat>
          </type>
          <precond>
            <code proglang="perl">not defined $newtree</code>
            <code proglang="python">newtree is None</code>
          </precond>
          <filenames>
            <code proglang="perl">"*.dnd"</code>
            <code proglang="python">"*.dnd"</code>
          </filenames>
        </parameter>
        <parameter>
          <name>gde_lower</name>
          <prompt lang="en">Upper case (for GDE output only) (-case)</prompt>
          <type>
            <datatype>
              <class>Boolean</class>
            </datatype>
          </type>
          <precond>
            <code proglang="perl">$outputformat eq "GDE"</code>
            <code proglang="python">outputformat == "GDE"</code>
          </precond>
          <vdef>
            <value>0</value>
          </vdef>
          <format>
            <code proglang="perl">($value) ? " -case=upper" : ""</code>
            <code proglang="python">( "" , " -case=upper" )[ value ]</code>
          </format>
          <argpos>2</argpos>
        </parameter>
      </parameters>
    </paragraph>
  </parameters>
</program>
mobyle-programs 5.1.2-1 / var / lib / mobyle / programs / clustalw-multialign.xml