/usr/include/shark/Data/SparseData.h is in libshark-dev 3.1.4+ds1-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | //===========================================================================
/*!
*
*
* \brief Support for importing and exporting data from and to sparse data (libSVM) formatted data files
*
*
* \par
* The most important application of the methods provided in this
* file is the import of data from LIBSVM files to Shark Data containers.
*
*
*
*
* \author M. Tuma, T. Glasmachers, C. Igel
* \date 2010
*
*
* \par Copyright 1995-2015 Shark Development Team
*
* <BR><HR>
* This file is part of Shark.
* <http://image.diku.dk/shark/>
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
*
*/
//===========================================================================
#ifndef SHARK_DATA_SPARSEDATA_H
#define SHARK_DATA_SPARSEDATA_H
#include <shark/Core/DLLSupport.h>
#include <fstream>
#include <shark/Data/Dataset.h>
namespace shark {
namespace detail {
typedef std::pair< unsigned int, size_t > LabelSortPair;
static inline bool cmpLabelSortPair(const LabelSortPair& left, const LabelSortPair& right) {
return left.first > right.first; // for sorting in decreasing order
}
} // namespace detail
/**
* \ingroup shark_globals
*
* @{
*/
/// \brief Import data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData<RealVector, unsigned int>& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData<RealVector, unsigned int>::DefaultBatchSize
);
/// \brief Import data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData<CompressedRealVector, unsigned int>& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData<RealVector, unsigned int>::DefaultBatchSize
);
/// \brief Import data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData<RealVector, unsigned int>& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData<RealVector, unsigned int>::DefaultBatchSize
);
/// \brief Import data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData<CompressedRealVector, unsigned int>& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData<RealVector, unsigned int>::DefaultBatchSize
);
/// \brief Export data to sparse data (libSVM) format.
///
/// \param dataset Container storing the data
/// \param fn Output file
/// \param dense Flag for using dense output format
/// \param oneMinusOne Flag for applying the transformation y<-2y-1 to binary labels
/// \param sortLabels Flag for sorting data points according to labels
/// \param append Flag for appending to the output file instead of overwriting it
template<typename InputType>
void exportSparseData(LabeledData<InputType, unsigned int>& dataset, const std::string &fn, bool dense=false, bool oneMinusOne = true, bool sortLabels = false, bool append = false) {
std::size_t elements = dataset.numberOfElements();
std::ofstream ofs;
// shall we append only or overwrite?
if (append == true) {
ofs.open (fn.c_str(), std::fstream::out | std::fstream::app );
} else {
ofs.open (fn.c_str());
}
if( !ofs ) {
throw( SHARKEXCEPTION( "[exportSparseData] file can not be opened for writing" ) );
}
size_t dim = inputDimension(dataset);
if(numberOfClasses(dataset)!=2) oneMinusOne = false;
std::vector<detail::LabelSortPair> L;
if(sortLabels) {
for(std::size_t i = 0; i < elements; i++)
L.push_back(detail::LabelSortPair(dataset.element(i).label, i));
std::sort (L.begin(), L.end(), detail::cmpLabelSortPair);
}
for(std::size_t ii = 0; ii < elements; ii++) {
// apply mapping to sorted indices
std::size_t i = 0;
if(sortLabels) i = L[ii].second;
else i = ii;
// apply transformation to label and write it to file
if(oneMinusOne) ofs << 2*int(dataset.element(i).label)-1 << " ";
//libsvm file format documentation is scarce, but by convention the first class seems to be 1..
else ofs << dataset.element(i).label+1 << " ";
// write input data to file
for(std::size_t j=0; j<dim; j++) {
if(dense)
ofs << " " << j+1 << ":" <<dataset.element(i).input(j);
else if(dataset.element(i).input(j) != 0)
ofs << " " << j+1 << ":" << dataset.element(i).input(j);
}
ofs << std::endl;
}
}
/** @}*/
}
#endif
|