/var/lib/mobyle/programs/signalp.xml

<?xml version='1.0' encoding='UTF-8'?>
<!-- XML Authors: Corinne Maufrais, Nicolas Joly and Bertrand Neron,             -->
<!-- 'Biological Software and Databases' Group, Institut Pasteur, Paris.         -->
<!-- Distributed under LGPLv2 License. Please refer to the COPYING.LIB document. -->
<program>
  <head>
    <name>signalp</name>
    <version>4.0</version>
    <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Entities/cbs_package.xml"/>
    <doc>
      <title>signalp</title>
      <description>
        <text lang="en"> predict signal peptides in proteins</text>
      </description>
      <sourcelink>http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?signalp</sourcelink>
      <reference doi="10.1038/nmeth.1701" >SignalP 4.0: discriminating signal peptides from transmembrane regions
       Thomas Nordahl Petersen, Søren Brunak, Gunnar von Heijne &amp; Henrik Nielsen
       Nature Methods, 8:785-786, 2011
      </reference>
      <reference>Improved prediction of signal peptides: SignalP 3.0.
        Jannick Dyrløv Bendtsen, Henrik Nielsen, Gunnar von Heijne and Søren Brunak.
        J. Mol. Biol., 340:783-795, 2004.
      </reference>
      <reference>Identification of prokaryotic and eukaryotic signal peptides and prediction of their cleavage sites.
        Henrik Nielsen, Jacob Engelbrecht, Søren Brunak and Gunnar von Heijne.
        Protein Engineering, 10:1-6, 1997.
      </reference> 
      <reference>Prediction of signal peptides and signal anchors by a hidden Markov model.
        Henrik Nielsen and Anders Krogh.
        Proceedings of the Sixth International Conference on Intelligent Systems for Molecular Biology (ISMB 6),
        AAAI Press, Menlo Park, California, pp. 122-130, 1998.  
      </reference>
      
      <doclink>http://www.cbs.dtu.dk/services/SignalP/</doclink>
      <comment>
        <text lang="en">signalp  predicts  the  presence and location of signal peptide cleavage sites in amino acid sequences from
       different organisms: Gram-positive prokaryotes, Gram-negative prokaryotes, and eukaryotes.</text>  
       <text lang="en">The  method  incorporates a prediction of cleavage sites and a signal peptide/non-signal peptide prediction based on a 
       combination of several artificial neural networks.</text> 
      </comment>
    </doc>
    <category>sequence:protein:motifs</category>
    <category>sequence:protein:pattern</category>
  </head>
  <parameters>
  
    <parameter ishidden="1" iscommand="1">
      <name>signalp</name>
      <type>
        <datatype>
          <class>String</class>
        </datatype>
      </type>
      <format>
        <code proglang="perl">" signalp "</code>
        <code proglang="python">" signalp "</code>
      </format>
    </parameter>
   
    <parameter ismandatory="1" issimple="1" ismaininput="1">
      <name>sequence</name>
      <prompt lang="en">Input Sequence</prompt>
      <type>
        <datatype>
          <class>Sequence</class>
        </datatype>
        <dataFormat>FASTA</dataFormat>
      </type>
      <format>
        <code proglang="perl">" $value ""</code>
        <code proglang="python">" " + str( value )</code>
      </format>
      <argpos>100</argpos>
     <example>
&gt;IPI:IPI00000001.2 SWISS-PROT:O95793-1 TREMBL:A8K622;Q59F99  Isoform L ong of Double-stranded RNA-binding protein Staufen homolog 1
MSQVQVQVQNPSAALSGSQILNKNQSLLSQPLMSIPSTTSSLPSENAGRPIQNSALPSAS
ITSTSAAAESITPTVELNALCMKLGKKPMYKPVDPYSRMQSTYNYNMRGGAYPPRYFYPF
PVPPLLYQVELSVGGQQFNGKGKTRQAAKHDAAAKALRILQNEPLPERLEVNGRESEEEN
LNKSEISQVFEIALKRNLPVNFEVARESGPPHMKNFVTKVSVGEFVGEGEGKSKKISKKN
AAIAVLEELKKLPPLPAVERVKPRIKKKTKPIVKPQTSPEYGQGINPISRLAQIQQAKKE
KEPEYTLLTERGLPRRREFVMQVKVGNHTAEGTGTNKKVAKRNAAENMLEILGFKVPQAQ
PTKPALKSEEKTPIKKPGDGRKVTFFEPGSGDENGTSNKEDEFRMPYLSHQQLPAGILPM
VPEVAQAVGVSQGHHTKDFTRAAPNPAKATVTAMIARELLYGGTSPTAETILKNNISSGH
VPHGPLTRPSEQLDYLSRVQGFQVEYKDFPKNNKNEFVSLINCSSQPPLISHGIGKDVES
CHDMAALNILKLLSELDQQSTEMPRTGNGPMSVCGRC
&gt;IPI:IPI00000023.4 SWISS-PROT:P18507 TREMBL:B4DSA1 Gamma-aminobutyric acid receptor subunit gamma-2
MSSPNIWSTGSSVYSTPVFSQKMTVWILLLLSLYPGFTSQKSDDDYEDYASNKTWVLTPK
VPEGDVTVILNNLLEGYDNKLRPDIGVKPTLIHTDMYVNSIGPVNAINMEYTIDIFFAQT
WYDRRLKFNSTIKVLRLNSNMVGKIWIPDTFFRNSKKADAHWITTPNRMLRIWNDGRVLY
TLRLTIDAECQLQLHNFPMDEHSCPLEFSSYGYPREEIVYQWKRSSVEVGDTRSWRLYQF
SFVGLRNTTEVVKTTSGDYVVMSVYFDLSRRMGYFTIQTYIPCTLIVVLSWVSFWINKDA
VPARTSLGITTVLTMTTLSTIARKSLPKVSYVTAMDLFVSVCFIFVFSALVEYGTLHYFV
SNRKPSKDKDKKKKNPAPTIDIRPRSATIQMNNATHLQERDEEYGYECLDGKDCASFFCC
FEDCRTGAWRHGRIHIRIAKMDSYARIFFPTAFCLFNLVYWVSYLYL
     </example>
    </parameter>
    
    <parameter ismandatory="1" issimple="1">
      <name>type</name>
      <prompt lang="en">Use networks and models trained on sequences from the specified type of organisms</prompt>
      <type>
        <datatype>
          <class>Choice</class>
        </datatype>
      </type>
      <vdef>
        <value>null</value>
      </vdef>
      <vlist>
        <velem undef="1">
          <value>null</value>
          <label>Choose a type of organism</label>
        </velem>
        <velem>
          <value>gram-</value>
          <label>Gram-negative bacteria</label>
        </velem>
        <velem>
          <value>gram+</value>
          <label>Gram-positive bacteria</label>
        </velem>
        <velem>
          <value>euk</value>
          <label>eukaryotes</label>
        </velem>
      </vlist>
      <format>
        <code proglang="perl">(defined $value)? " -t " : ""</code>
        <code proglang="python">" -t " + value</code>
      </format>
      <argpos>10</argpos>
    </parameter>

    <parameter ismandatory="1">
      <name>format</name>
      <prompt lang="en">Produce output in the specified format.</prompt>
      <type>
        <datatype>
          <class>Choice</class>
        </datatype>
      </type>
      <vdef>
        <value>short</value>
      </vdef>
      <vlist>
        <velem>
          <value>short</value>
          <label>short</label>
        </velem>
        <velem>
          <value>long</value>
          <label>long</label>
        </velem>
        <velem>
          <value>all</value>
          <label>all</label>
        </velem>
        <velem>
          <value>summary</value>
          <label>summary</label>
        </velem>
      </vlist>
      <format>
        <code proglang="perl">(defined $value and $value ne $vdef)? " -f $value" : ""</code>
        <code proglang="python">( "" , " -f " + value)[ value is not None and value != vdef ]</code>
      </format>
      <comment>
      <div xmlns="http://www.w3.org/1999/xhtml">
        <p >The valid formats are:</p>
           <ul>
              <li><strong>short :</strong> Write  only  one line of concluding scores per sequence. Intended for
                     analysis of large datasets where machine-readable output is required.<em>This is the default</em>.</li>
              <li><strong>long :</strong> Write the scores for each position in each sequnce.</li>
              <li><strong>all :</strong>  Write predictions for both Signalp-TM and SignalP-noTM networks. Five
                     columns with cleavage site (CS) and Signal Peptide (SP)  predictions
                     for  both  SigP-noTM  and  SigP-TM methods and TM prediction for each
                     position.</li>
             <li><strong>summary :</strong> Write only the concluding scores for each sequence.  This  is  essen‐
                     tially the same information as the 'short' format.</li>
              </ul>      
        </div> 
      </comment>
      <argpos>10</argpos>
    </parameter>
    
    <parameter >
      <name>graphics</name>
      <prompt lang="en">generate graphics (-g).</prompt>
      <type>
        <datatype>
          <class>Choice</class>
        </datatype>
      </type>
      <vdef>
        <value>null</value>
      </vdef>
      <vlist>
        <velem undef="1">
          <value>null</value>
          <label>no graphics</label>
        </velem>
         <velem>
          <value>gif</value>
          <label>GIF</label>
        </velem>
        <velem>
          <value>gif+eps</value>
          <label>GIF and EPS</label>
        </velem>
      </vlist>
      <format>
        <code proglang="perl">( defined $value and $value ne $vdef) ? " -g $value" : ""</code>
        <code proglang="python">( "" , " -g "+str( value ) )[ bool( value ) ]</code>
      </format>
      <comment>
        <div xmlns="http://www.w3.org/1999/xhtml">
           <ul>
              <li><strong>gif :</strong>   Save  plots  in  Graphics Interchange Format (GIF) under the names 'plot.method.#.gif', where
                     method is nn or hmm, and # is the number of the input sequence.</li>
              <li><strong>gif+eps :</strong>  Save plots in both GIF and EPS formats as described above.</li>
           </ul>          
        </div>
      </comment>
      <argpos>20</argpos>
    </parameter>  

      <parameter>
      <name>Method</name>
      <prompt lang="en">Use the specified prediction method.</prompt>
      <type>
        <datatype>
          <class>Choice</class>
        </datatype>
      </type>
      <vdef>
        <value>best</value>
      </vdef>
      <vlist>
        <velem>
          <value>best</value>
          <label>best</label>
        </velem>
        <velem>
          <value>notm</value>
          <label>notm</label>
        </velem>
      </vlist>
      <format>
        <code proglang="perl">(defined $value and $value ne $vdef) ? " -s $value" : ""</code>
        <code proglang="python">( "" , " -s " + value)[ value is not None and value != vdef ]</code>
      </format>
      <comment>
      <div xmlns="http://www.w3.org/1999/xhtml">
           <p>Input sequences may include or not TM regions.</p>
           <ul>
              <li><strong>best :</strong>
                     The method decides which neural networks predictions  give  the  best
                     result  choosing  predictions  from either SignalP-TM or SignalP-noTM
                     networks. For 'gram+' organisms it is always SignalP-TM networks.<em>(default)</em></li>
              <li><strong>notm :</strong> The SignalP-noTM neural networks are specifically chosen.</li>
           </ul>
      </div>
      </comment>
      <argpos>30</argpos>
    </parameter>
     
     <parameter>
      <name>noTM_cutoff</name>
      <prompt>cutoff for noTM networks</prompt>
      <type>
        <datatype>
          <class>Float</class>
        </datatype>
      </type>
      <format>
        <code proglang="perl">(defined $value and $value ne $vdef) ? " -u" : ""</code>
        <code proglang="python">( "" , " -u " + str( value ) )[ value is not None]</code>
      </format>
      <ctrl>
            <message>
              <text lang="en">the cutoff must be &gt;= 0 and &lt;= 1</text>
            </message>
            <code proglang="python">value &gt;= 0 and value &lt;= 1</code>
      </ctrl>
      <comment>
       <div xmlns="http://www.w3.org/1999/xhtml">
         <p>user defined D-cutoff for noTM networks. A score above the specified
            cutoff will result in a positive prediction of a signal peptide. The cutoff
            determines the yes/no answer only, the prediction process is not affected.
            The default cutoffs are:</p>
            <ul>
               <li><strong>euk</strong> : 0.45</li>
               <li><strong>gram+</strong> : 0.57</li>
               <li><strong>gram-</strong> : 0.57</li>
           </ul>        
       </div>
      </comment>
    <argpos>50</argpos>
    </parameter> 
    
    <parameter>
      <name>TM_cutoff</name>
      <prompt>cutoff for TM networks</prompt>
      <type>
        <datatype>
          <class>Float</class>
        </datatype>
      </type>
      <format>
        <code proglang="perl">(defined $value and $value ne $vdef) ? " -c" : ""</code>
        <code proglang="python">( "" , " -U " + str( value ) )[ value is not None]</code>
      </format>
      <ctrl>
            <message>
              <text lang="en">the cutoff must be &gt;= 0 and &lt;= 1</text>
            </message>
            <code proglang="python">value &gt;= 0 and value &lt;= 1</code>
      </ctrl>
      <comment>
       <div xmlns="http://www.w3.org/1999/xhtml">user defined D-cutoff for TM networks. A score above  the  specified
              cutoff  will result in a positive prediction of a signal peptide. The cutoff
              determines the yes/no answer only, the prediction process is  not  affected.
              The default cutoffs are:
            <ul>
               <li><strong>euk</strong> : 0.50</li>
               <li><strong>gram+</strong> : 0.45</li>
               <li><strong>gram-</strong> : 0.51</li>
           </ul>        
       </div>
      </comment>
    <argpos>50</argpos>
    </parameter> 
     
         <parameter>
      <name>truncate</name>
      <prompt>Truncate  each  sequence  to  maximally n N-terminal residues</prompt>
      <type>
        <datatype>
          <class>Integer</class>
        </datatype>
      </type>
      <vdef>
        <value>70</value>
      </vdef>
      <format>
        <code proglang="perl">(defined $value and $value ne $vdef) ? " -c" : ""</code>
        <code proglang="python">( "" , " -c " + str( value ) )[ value is not None and value != vdef ]</code>
      </format>
      <ctrl>
            <message>
              <text lang="en">enter a positive value</text>
            </message>
            <code proglang="python">value &gt;= 0 </code>
      </ctrl>
      <comment>
        <text lang="en"> truncate the input sequences to the specified length from the  N-ter‐
              minal. The default is 70 residues. The value of "0" disables truncation.
              </text>
      </comment>
    <argpos>60</argpos>
    </parameter>    
    
    <parameter>
      <name>mature</name>
      <prompt lang="en">generate a FASTA file with mature sequences based on  the  predictions.</prompt>
      <type>
        <datatype>
        <class>Boolean</class>
        </datatype>
      </type>
      <vdef>
      <value>0</value>
      </vdef>
      <format>
        <code proglang="python">( "" , " -m %s_mature.fasta"%sequence)[value]</code>
      </format>
      <argpos>70</argpos>
    </parameter>
    
    <parameter>
      <name>n_s_e</name>
      <prompt lang="en"> generate a GFF (name-start-end) file with the predicted signal peptides.</prompt>
      <type>
      <datatype>
        <class>Boolean</class>
        </datatype>
      </type>
      <vdef>
      <value>0</value>
      </vdef>
      <format>
        <code proglang="python">( "" , " -n %s.gff"%sequence)[value]</code>
      </format>
      <argpos>70</argpos>
    </parameter>
    
    <parameter isstdout="1">
      <name>results</name>
      <prompt lang="en">signalp report</prompt>
      <type>
        <datatype>
          <superclass>Report</superclass>
          <class>signalp</class>
        </datatype>
      </type>
      <filenames>
        <code proglang="perl">"signalp.out"</code>
        <code proglang="python">"signalp.out"</code>
      </filenames>
      <comment>
        <div xmlns="http://www.w3.org/1999/xhtml">
        <p><strong>Neural network output</strong></p>

       <p>For  each input sequence the neural network (nn) module of signalp will first return three scores between 0
       and 1 for each sequence position:</p>
       <ul>
       <li><strong>C-score (raw cleavage site score)</strong>
              The output score from networks trained to recognize cleavage sites  vs.  other  sequence  positions.
              Trained  to be high at position +1 (immediately after the cleavage site), and low at all other posi‐
              tions.</li>

       <li><strong>S-score (signal peptide score)</strong>
              The output score from networks trained to recognize signal peptide vs. non-signal-peptide positions.
              Trained  to be high at all positions before the cleavage site, and low at positions after the cleav‐
              age site and in the N-terminals of non-secretory proteins.</li>

       <li><strong>Y-score (combined cleavage site score)</strong>
              The prediction of cleavage site location is optimized by observing where the C-score is high and the
              S-score  changes from a high to a low value.  The Y-score formalizes this by combining the height of
              the C-score with the slope of the S-score.<br />

              Specifically, the Y-score is a geometric average between the C-score and a  smoothed  derivative  of
              the  S-score  (i.e.  the difference between the mean S-score over d positions before and d positions
              after the current position, where d varies with the chosen network ensemble).</li>
       </ul>
       <p>signalp will then report the maximal C-, S-, and Y-scores, the mean S-score in the interval between  the
       N-terminal  and  the site with the maximal Y-score and, finally, the D-score, the average of the S-mean and
       Y-max score.</p>
       <p>The high detail level of the output is intended to allow for interpretation  of  borderline  cases  by  the
       user.</p>
       <p>If  the sequence is predicted to have a signal peptide, the predicted cleavage site
       is located immediately before the position with the maximal Y-score.</p>
        </div>
      </comment>
    </parameter> 
    
    <parameter isout="1">
      <name>gif</name>
      <prompt lang="en">graphic in GIF</prompt>
      <type>
        <datatype>
          <superclass>Binary</superclass>
          <class>signalp_graphic</class>
        </datatype>
        <dataFormat>GIF</dataFormat>
      </type>
      <precond>
        <code proglang="perl">$graphics eq "gif" or $graphics eq "gif+eps"</code>
        <code proglang="python">graphics == "gif" or graphics == "gif+eps"</code>
      </precond>
      <filenames>
        <code proglang="perl">"*.gif"</code>
        <code proglang="python">"*.gif"</code>
      </filenames>
    </parameter> 
    
   <parameter isout="1">
      <name>eps</name>
      <prompt lang="en">graphic in eps</prompt>
      <type>
        <datatype>
          <superclass>Binary</superclass>
          <class>signalp_graphic</class>
        </datatype>
        <dataFormat>EPS</dataFormat>
      </type>
      <precond>
        <code proglang="perl">$graphics eq "gif+eps"</code>
        <code proglang="python">graphics == "gif+eps"</code>
      </precond>
      <filenames>
        <code proglang="perl">"*.gif"</code>
        <code proglang="python">"*.gif"</code>
      </filenames>
    </parameter> 
    
    <parameter isout="1">
      <name>mature_result</name>
      <prompt lang="en">a FASTA file with mature sequences based on  the  predictions</prompt>
      <type>
        <datatype>
          <class>Sequence</class>
        </datatype>
        <dataFormat>FASTA</dataFormat>
      </type>
      <precond>
        <code proglang="perl">$mature</code>
        <code proglang="python">mature</code>
      </precond>
      <filenames>
        <code proglang="perl">"${sequence}_mature.fasta"</code>
        <code proglang="python">"%s_mature.fasta"%sequence</code>
      </filenames>
      </parameter>
      
      <parameter isout="1">
      <name>n_s_e_result</name>
      <prompt lang="en">a GFF (name-start-end) file with the predicted signal peptides</prompt>
      <type>
        <datatype>
          <class>Feature</class>
          <superclass>AbstractText</superclass>
        </datatype>
        <dataFormat>GFF</dataFormat>
      </type>
      <precond>
        <code proglang="perl">$n_s_e</code>
        <code proglang="python">n_s_e</code>
      </precond>
      <filenames>
        <code proglang="perl">"${sequence}.gff"</code>
        <code proglang="python">"%s.gff"%sequence</code>
      </filenames>
      </parameter>
    
  </parameters>
</program>
mobyle-programs 5.1.2-1 / var / lib / mobyle / programs / signalp.xml