/usr/include/libGIFTAcInvertedFile/include/CAcIFFileSystem.h is in libgnuift0-dev 0.1.14-12.1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | /* -*- mode: c++ -*-
*/
/*
GIFT, a flexible content based image retrieval system.
Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
Copyright (C) 2003, 2004 Bayreuth University
2005 Bamberg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
// -*- mode: c++ -*-
class CXMLElement;
/*
*
*
* This class manages the access to the inverted file as well
* as its generation
*
*
*
* modification history:
*
* WM 1099 changed documentation format
* completed documentation
* HM 090399 created the documentation
* WM 1098 created the file
*
*
*
* compiler defines used:
*
*
*/
#ifndef _CACIFFILESYSTEM
#define _CACIFFILESYSTEM
#include "libGIFTAcInvertedFile/include/uses-declarations.h"
#include <string>
#include "libMRML/include/TID.h"
#include "libMRML/include/CSelfDestroyPointer.h"
#include "libMRML/include/CArraySelfDestroyPointer.h"
#include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
#include "libMRML/include/CMutex.h" // multi threading
//#include "CCollectionFrequencyList.h"
#include "libGIFTAcInvertedFile/include/CADIHash.h"
#include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
#include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#ifdef HAS_HASH_MAP
#include <hash_map>
#define HASH_MAP hash_map
#else
#define HASH_MAP map
#endif
#include <functional>
#include <algorithm>
#include "libMRML/include/CMagic.h"
typedef TID TFeatureID ;
/**
An accessor to an inverted file. This access is done
"by hand".
For a long time we wanted to move to memory mapped files (like SWISH++)
but currently I think this is not the best idea.
*/
class CAcIFFileSystem:public CAcInvertedFile{
protected:
/** the mutex for multi threading */
CMutex mMutex;
/** In order to have just one parent,
I have to limit on single inheritance.
I cannot use virtual base classes, because then I
cannot downcast
*/
CSelfDestroyPointer<CAcURL2FTS> mURL2FTS;
/** the maximum feature ID arising in this file */
TID mMaximumFeatureID;
/** A buffer, if the inverted file is to be
held in ram */
#ifndef V295
string mInvertedFileBuffer;
#else
CArraySelfDestroyPointer<char> mInvertedFileBuffer;
#endif
/** Some place for putting temporary indexing data*/
string mTemporaryIndexingFileBase;
/** The inverted file */
mutable CSelfDestroyPointer<istream> mInvertedFile;
/** Feature -> Offset in inverted file */
mutable ifstream mOffsetFile;
/** File of feature descriptions */
ifstream mFeatureDescriptionFile;
/** Name of the inverted file */
string mInvertedFileName;
/** Name of the Offset file */
string mOffsetFileName;
/** Name for the file with the feature description */
string mFeatureDescriptionFileName;
/** map from feature id to the offset for this feature */
typedef HASH_MAP<TID,streampos> CIDToOffset;//new hash
/** map from feature id to the offset for this feature */
CIDToOffset mIDToOffset;
/** map from feature to the collection frequency */
mutable HASH_MAP<TID,double> mFeatureToCollectionFrequency;//new hash
/**@name for fast access...*/
//@{
/** map from the feature ID to the feature description */
HASH_MAP<TID,unsigned int> mFeatureDescription;//new hash_
/** additional information about the document like, e.g.
the euclidean length of the feature list.
*/
CADIHash mDocumentInformation;
//@}
/** add a pair of FeatureID,Offset to the open offset file
(helper function for inverted file construction)
*/
void writeOffsetFileElement(TID inFeatureID,
streampos inPosition,
ostream& inOpenOffsetFile);
/** loads a *.fts file. and returns the feature list*/
CDocumentFrequencyList* getFeatureFile(string inFileName)const;
public:
/** for testing if the inverted file is correctly constructed*/
bool operator()()const;
/** This opens an exsisting inverted file, and then
inits this structure. After that it is fully
usable
As a paramter it takes an XMLElement which contains
a "collection" element and its content.
If the attribute cui-generate-inverted-file is true,
then a new inverted file will be generated using
the parameters given in inCollectionElement. you will
NOT be able to use *this afterwards.
Like every accessor, this accessor takes a <collection />
MRML element as input (@see CXMLElement for how to access
the attributes of this element). Currently this accessor
understands the following attributes
cui-base-dir: the directory containing the following files
cui-inverted-file-location: the location of the inverted file
cui-offset-file-location: a file containing offsets into the
inverted file
cui-feature-file-location: the location of the "url2fts" file
which translates urls to feature
file names.
*/
CAcIFFileSystem(const CXMLElement& inCollectionElement);
/** called by constructors */
bool init(bool);
/** Destructor */
~CAcIFFileSystem();
/** Translate a DocumentID to a URL (for output) */
string IDToURL(TID inID)const;
/**@name The proper inverted file access*/
//@{
/** List of documents containing the feature */
CDocumentFrequencyList* FeatureToList(TFeatureID)const;
/** List of features contained by a document */
CDocumentFrequencyList* URLToFeatureList(string inURL)const;
/** List of features contained by a document with ID inDID */
CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
//@}
/**@name Accessing information about features*/
//@{
/** Collection frequency for a given feature */
double FeatureToCollectionFrequency(TFeatureID)const;
/** What kind of feature is the feature with ID inFeatureID? */
unsigned int getFeatureDescription(TID inFeatureID)const;
//@}
/**@name Accessing additional document information*/
//@{
/** returns the maximum document frequency for one document ID */
double DIDToMaxDocumentFrequency(TID)const;
/** Returns the document-frequency square sum for a given document ID */
double DIDToDFSquareSum(TID)const;
/** Returns this function for a given document ID */
double DIDToSquareDFLogICFSum(TID)const;
//@}
/*@name Inverted File Generation and Consistency Checking*/
//@{
/** Generating an inverted File, if there is none.
Fast but stupid in-memory method. This method is
very fast, if all the inverted file (and a bit more)
can be kept in memory at runtime. If this is not the
case, extensive swapping is the result, virtually halting
the inverted file creation.
*/
bool generateInvertedFile();
/** Generating an inverted File, if there is none.
Employing the two-way-merge method described
in "managing gigabytes", chapter 5.2. Sort-based
inversion. (Page 181)
*/
bool newGenerateInvertedFile();
/**Check the consistency of the inverted file system accessed
by this accessor.*/
bool checkConsistency();
/**Is the Document with inDocumentID contained in the
document frequency list of the feature inFeatureID and
is the associated document frequency the same?
@param inFeature<id the
*/
bool findWithinStream(TID inFeatureID,
TID inDocumentID,
double inDocumentFrequency)const;
//@}
/**
*
* Translate an URL to its document ID
*
*/
virtual pair<bool,TID> URLToID(const string& inURL)const;
/** List of the IDs of all documents present in the inverted file */
void getAllIDs(list<TID>&)const;
/** List of triplets (ID,imageURL,thumbnailURL) of all
the documents present in the inverted file */
void getAllAccessorElements(list<CAccessorElement>&)const;
/** get a given number of random C-AccessorElement-s
@param inoutResultList the list which will contain the result
@param inSize the desired size of the inoutResultList
*/
void getRandomIDs(list<TID>&,
list<TID>::size_type)const;
/** For drawing random sets. Why is this part of an CAccessorImplementation?
The way the accessor is organised might influence the way
random sets can be drawn. At present everything happens in
RAM, but we do not want to be fixed on that.
@param inoutResultList the list which will contain the result
@param inSize the desired size of the inoutResultList
*/
void getRandomAccessorElements(list<CAccessorElement>& outResult,
list<CAccessorElement>::size_type inSize)const;
/** The number of images in this accessor */
int size()const;
//@}
/** This is interesting for browsing*/
TID getMaximumFeatureID()const;
/** Getting a list of all features contained in this.
This function is necessary, because in the present
system only about 50 percent of the features are
really used.
A feature is considered used if it arises in mIDToOffset.
*/
list<TID>* getAllFeatureIDs()const;
/**
*
* Translate a DocumentID to an accessor Element
*
*/
virtual pair<bool,CAccessorElement> IDToAccessorElement(TID inID)const;
/** is this well constructed? */
operator bool()const;
};
#endif
|