/usr/include/rostlab/readFasta.h is in librostlab3-dev 1.0.20-7.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | /*
Copyright (C) 2011 Laszlo Kajan, Technical University of Munich, Germany
This file is part of librostlab.
librostlab is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ROSTLAB_READFASTA
#define ROSTLAB_READFASTA 1
#include <boost/regex.hpp>
#include <iostream>
#include <fstream>
#include "rostlab/rostlab_stdexcept.h"
namespace bo = boost;
namespace rostlab {
namespace bio {
namespace fmt {
class fasta{}; // fasta format class
};
template<typename _FmtT>
class seq {
private:
std::string _desc;
std::string _display_id;
std::string _seqstr;
public:
seq(){};
seq( const std::string& __desc, const std::string& __display_id, const std::string& __seqstr ) : _desc(__desc), _display_id(__display_id), _seqstr(__seqstr) {};
virtual ~seq(){};
std::string& seqstr(){ return _seqstr; };
};
/*template<> // could specialize it...
class seq<bio::fmt::fasta>
{
private:
public:
};*/
/*template<typename _FmtT>
std::istream& operator>>( std::istream& __is, bio::seq<_FmtT>& __n )
{
return __is;
}*/
inline std::istream& operator>>( std::istream& __is, bio::seq<bio::fmt::fasta>& __seq )
{
// based on Bio/SeqIO/fasta.pm
std::string rec; rec.reserve(1024);
while( __is.peek() != std::istream::traits_type::eof() )
{
if(rec.capacity() == rec.size()) rec.reserve(rec.capacity() * 2);
if( rec.size() && __is.peek() == '>' && *rec.rbegin() == '\n' ) break;
else rec += __is.get();
}
if( !rec.size() || *rec.begin() != '>' ) throw runtime_error( std::string("FASTA syntax error in record '") + rec + "': no leading '>'" );
rec = bo::regex_replace( rec, bo::regex("^>"), "" ); // $entry =~ s/^>//;
bo::sregex_token_iterator i(rec.begin(), rec.end(), bo::regex("\n"), -1); // split(/\n/,$entry,2);
if( i == boost::sregex_token_iterator() ) throw runtime_error( std::string("FASTA syntax error in record '") + rec + "': only one line" );
std::string top = *i++;
std::string sequence( i->first, static_cast<std::string::const_iterator>( rec.end() ) );
sequence = bo::regex_replace( sequence, bo::regex(">"), "" ); // $sequence =~ s/>//g;
bo::match_results<std::string::const_iterator> what;
std::string id, fulldesc;
if( bo::regex_search( top, what, bo::regex("^[[:space:]]*([^[:space:]]+)[:space:]*(.*)") ) )
{ id = std::string( what[1].first, what[1].second ); fulldesc = std::string( what[2].first, what[2].second ); }
if( id.empty() ) id = fulldesc;
sequence = bo::regex_replace( sequence, bo::regex("[ \t\n\r]"), "" );
// alphabet? would be good to have this
__seq = bio::seq<bio::fmt::fasta>( fulldesc, id, sequence );
return __is;
}
}; // namespace bio
}; // namespace rostlab
#endif /* ROSTLAB_READFASTA */
// vim:et:ts=2:ai:
|