/usr/include/OpenMS/METADATA/ProteinIdentification.h is in libopenms-dev 1.11.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | // --------------------------------------------------------------------------
// OpenMS -- Open-Source Mass Spectrometry
// --------------------------------------------------------------------------
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
// ETH Zurich, and Freie Universitaet Berlin 2002-2013.
//
// This software is released under a three-clause BSD license:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of any author or any participating institution
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// For a full list of authors, refer to the file AUTHORS.
// --------------------------------------------------------------------------
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// --------------------------------------------------------------------------
// $Maintainer: Chris Bielow $
// $Authors: Nico Pfeifer, Chris Bielow $
// --------------------------------------------------------------------------
#ifndef OPENMS_METADATA_PROTEINIDENTIFICATION_H
#define OPENMS_METADATA_PROTEINIDENTIFICATION_H
#include <OpenMS/METADATA/ProteinHit.h>
#include <OpenMS/METADATA/MetaInfoInterface.h>
#include <OpenMS/DATASTRUCTURES/DateTime.h>
#include <set>
namespace OpenMS
{
class PeptideIdentification;
/**
@brief Representation of a protein identification run
This class stores the general information and the protein hits of a protein identification run.
The actual peptide hits are stored in PeptideIdentification instances that are part of spectra or features.
In order to be able to connect the ProteinIdentification and the corresponding peptide identifications, both classes have a string identifier. We recommend using the search engine name and the date as identifier.
Setting this identifier is especially important when there are several protein identification runs for a map, i.e. several ProteinIdentification instances.
@todo Add MetaInfoInterface to modifications => update IdXMLFile and ProteinIdentificationVisualizer (Andreas)
@ingroup Metadata
*/
class OPENMS_DLLAPI ProteinIdentification :
public MetaInfoInterface
{
public:
/// Hit type definition
typedef ProteinHit HitType;
/**
@brief Bundles multiple (e.g. indistinguishable) proteins in a group
*/
struct ProteinGroup
{
/// Probability of this group
DoubleReal probability;
/// Accessions of (indistinguishable) proteins that belong to the same group
StringList accessions;
ProteinGroup() :
probability(0.0), accessions()
{}
bool operator==(const ProteinGroup rhs) const
{
return probability == rhs.probability &&
accessions == rhs.accessions;
}
};
/// Peak mass type
enum PeakMassType
{
MONOISOTOPIC,
AVERAGE,
SIZE_OF_PEAKMASSTYPE
};
/// Names corresponding to peak mass types
static const std::string NamesOfPeakMassType[SIZE_OF_PEAKMASSTYPE];
enum DigestionEnzyme
{
TRYPSIN,
PEPSIN_A,
PROTEASE_K,
CHYMOTRYPSIN,
NO_ENZYME,
UNKNOWN_ENZYME,
SIZE_OF_DIGESTIONENZYME
};
/// Names corresponding to digestion enzymes
static const std::string NamesOfDigestionEnzyme[SIZE_OF_DIGESTIONENZYME];
/// Search parameters of the DB search
struct SearchParameters :
public MetaInfoInterface
{
String db; ///< The used database
String db_version; ///< The database version
String taxonomy; ///< The taxonomy restriction
String charges; ///< The allowed charges for the search
PeakMassType mass_type; ///< Mass type of the peaks
std::vector<String> fixed_modifications; ///< Used fixed modifications
std::vector<String> variable_modifications; ///< Allowed variable modifications
DigestionEnzyme enzyme; ///< The enzyme used for cleavage
UInt missed_cleavages; ///< The number of allowed missed cleavages
DoubleReal peak_mass_tolerance; ///< Mass tolerance of fragment ions (Dalton)
DoubleReal precursor_tolerance; ///< Mass tolerance of precursor ions (Dalton)
SearchParameters() :
db(),
db_version(),
taxonomy(),
charges(),
mass_type(MONOISOTOPIC),
fixed_modifications(),
variable_modifications(),
enzyme(UNKNOWN_ENZYME),
missed_cleavages(0),
peak_mass_tolerance(0.0),
precursor_tolerance(0.0)
{
}
bool operator==(const SearchParameters & rhs) const
{
return db == rhs.db &&
db_version == rhs.db_version &&
taxonomy == rhs.taxonomy &&
charges == rhs.charges &&
mass_type == rhs.mass_type &&
fixed_modifications == rhs.fixed_modifications &&
variable_modifications == rhs.variable_modifications &&
enzyme == rhs.enzyme &&
missed_cleavages == rhs.missed_cleavages &&
peak_mass_tolerance == rhs.peak_mass_tolerance &&
precursor_tolerance == rhs.precursor_tolerance;
}
bool operator!=(const SearchParameters & rhs) const
{
return !(*this == rhs);
}
};
/** @name Constructors, destructors, assignment operator <br> */
//@{
/// Default constructor
ProteinIdentification();
/// Destructor
virtual ~ProteinIdentification();
/// Copy constructor
ProteinIdentification(const ProteinIdentification & source);
/// Assignment operator
ProteinIdentification & operator=(const ProteinIdentification & source);
/// Equality operator
bool operator==(const ProteinIdentification & rhs) const;
/// Inequality operator
bool operator!=(const ProteinIdentification & rhs) const;
//@}
///@name Protein hit information (public members)
//@{
/// Returns the protein hits
const std::vector<ProteinHit> & getHits() const;
/// Returns the protein hits (mutable)
std::vector<ProteinHit> & getHits();
/// Appends a protein hit
void insertHit(const ProteinHit & input);
/// Sets the protein hits
void setHits(const std::vector<ProteinHit> & hits);
/// Finds a protein hit by accession (returns past-the-end iterator if not found)
std::vector<ProteinHit>::iterator findHit(const String & accession);
/// Returns the protein groups
const std::vector<ProteinGroup> & getProteinGroups() const;
/// Returns the protein groups (mutable)
std::vector<ProteinGroup> & getProteinGroups();
/// Appends a new protein group
void insertProteinGroup(const ProteinGroup & group);
/// Returns the indistinguishable proteins
const std::vector<ProteinGroup> & getIndistinguishableProteins() const;
/// Returns the indistinguishable proteins (mutable)
std::vector<ProteinGroup> & getIndistinguishableProteins();
/// Appends new indistinguishable proteins
void insertIndistinguishableProteins(const ProteinGroup & group);
/// Returns the protein significance threshold value
DoubleReal getSignificanceThreshold() const;
/// Sets the protein significance threshold value
void setSignificanceThreshold(DoubleReal value);
/// Returns the protein score type
const String & getScoreType() const;
/// Sets the protein score type
void setScoreType(const String & type);
/// Returns true if a higher score represents a better score
bool isHigherScoreBetter() const;
/// Sets the orientation of the score (is higher better?)
void setHigherScoreBetter(bool higher_is_better);
/// Sorts the protein hits according to their score
void sort();
/// Sorts the protein hits by score and assigns ranks (best score has rank 1)
void assignRanks();
/**
@brief Compute the coverage (in percent) of all ProteinHits given PeptideHits
@throws Exception::MissingInformation if ProteinsHits do not have sequence information
@return The number of Proteins referenced by the @p pep_ids that are not contained in this ProteinIdentification set (should be 0)
*/
Size computeCoverage(const std::vector<PeptideIdentification> & pep_ids);
//@}
///@name General information
//@{
/// Returns the date of the protein identification run
const DateTime & getDateTime() const;
/// Sets the date of the protein identification run
void setDateTime(const DateTime & date);
/// Sets the search engine type
void setSearchEngine(const String & search_engine);
/// Returns the type of search engine used
const String & getSearchEngine() const;
/// Sets the search engine version
void setSearchEngineVersion(const String & search_engine_version);
/// Returns the search engine version
const String & getSearchEngineVersion() const;
/// Sets the search parameters
void setSearchParameters(const SearchParameters & search_parameters);
/// Returns the search parameters
const SearchParameters & getSearchParameters() const;
/// Returns the identifier
const String & getIdentifier() const;
/// Sets the identifier
void setIdentifier(const String & id);
//@}
protected:
///@name General information (search engine, parameters and database)
//@{
String id_;
String search_engine_;
String search_engine_version_;
SearchParameters search_parameters_;
DateTime date_;
//@}
///@name Protein hit information (protected members)
//@{
String protein_score_type_;
bool higher_score_better_;
std::vector<ProteinHit> protein_hits_;
std::vector<ProteinGroup> protein_groups_;
/// Indistinguishable proteins: @p accessions[0] is "group leader", @p probability is meaningless
std::vector<ProteinGroup> indistinguishable_proteins_;
DoubleReal protein_significance_threshold_;
//@}
};
} //namespace OpenMS
#endif // OPENMS_METADATA_PROTEINIDENTIFICATION_H
|