This file is indexed.

/usr/include/libGIFTAcInvertedFile/include/CAcIFFileSystem.h is in libgnuift0-dev 0.1.14-12.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
/* -*- mode: c++ -*- 
*/
/* 

    GIFT, a flexible content based image retrieval system.
    Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva

     Copyright (C) 2003, 2004 Bayreuth University
      2005 Bamberg University
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
// -*- mode: c++ -*-


class CXMLElement;

/*
*
* 
*  This class manages the access to the inverted file as well 
*    as its generation
*
*
*
* modification history:
*
* WM   1099 changed documentation format
*           completed documentation
* HM 090399 created the documentation
* WM   1098 created the file
*
*
*
* compiler defines used:
*
*
*/

#ifndef _CACIFFILESYSTEM
#define _CACIFFILESYSTEM
#include "libGIFTAcInvertedFile/include/uses-declarations.h"
#include <string>
#include "libMRML/include/TID.h"
#include "libMRML/include/CSelfDestroyPointer.h"
#include "libMRML/include/CArraySelfDestroyPointer.h"
#include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
#include "libMRML/include/CMutex.h" // multi threading
//#include "CCollectionFrequencyList.h"
#include "libGIFTAcInvertedFile/include/CADIHash.h"
#include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
#include "libGIFTAcInvertedFile/include/CAcInvertedFile.h"
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#ifdef HAS_HASH_MAP
#include <hash_map>
#define HASH_MAP hash_map
#else
#define HASH_MAP map
#endif
#include <functional>
#include <algorithm>

#include "libMRML/include/CMagic.h"


typedef TID TFeatureID ;

/**
   An accessor to an inverted file. This access is done
   "by hand".

   For a long time we wanted to move to memory mapped files (like SWISH++)
   but currently I think this is not the best idea. 

   

 */
class CAcIFFileSystem:public CAcInvertedFile{  

protected:
  /** the mutex for multi threading */
  CMutex mMutex;
  /** In order to have just one parent,
      I have to limit on single inheritance.
      I cannot use virtual base classes, because then I
      cannot downcast
  */
  CSelfDestroyPointer<CAcURL2FTS> mURL2FTS;
  /** the maximum feature ID arising in this file */
  TID mMaximumFeatureID;
  /** A buffer, if the inverted file is to be 
      held in ram */
#ifndef V295
  string mInvertedFileBuffer;
#else
  CArraySelfDestroyPointer<char> mInvertedFileBuffer;
#endif

  /** Some place for putting temporary indexing data*/
  string mTemporaryIndexingFileBase;
  /** The inverted file */
  mutable CSelfDestroyPointer<istream> mInvertedFile;

  /** Feature -> Offset in inverted file */
  mutable ifstream mOffsetFile;

  /** File of feature descriptions */
  ifstream mFeatureDescriptionFile;

  /** Name of the inverted file */
  string mInvertedFileName;

  /** Name of the Offset file */
  string mOffsetFileName;

  /** Name for the file with the feature description */
  string mFeatureDescriptionFileName;

  /** map from feature id to the offset for this feature */
  typedef HASH_MAP<TID,streampos> CIDToOffset;//new hash
  /** map from feature id to the offset for this feature */
  CIDToOffset mIDToOffset;

  /** map from feature to the collection frequency */
  mutable HASH_MAP<TID,double> mFeatureToCollectionFrequency;//new hash

  /**@name for fast access...*/
  //@{
  /**  map from the feature ID to the feature description */
  HASH_MAP<TID,unsigned int> mFeatureDescription;//new hash_

  /**  additional information about the document like, e.g.
       the euclidean length of the feature list.
   */
  CADIHash mDocumentInformation;
  //@}
  /** add a pair of FeatureID,Offset to the open offset file 
      (helper function for inverted file construction)
   */
  void writeOffsetFileElement(TID inFeatureID,
			      streampos inPosition,
			      ostream& inOpenOffsetFile);
  /** loads a *.fts file. and returns the feature list*/
  CDocumentFrequencyList* getFeatureFile(string inFileName)const;
public:
  /** for testing if the inverted file is correctly constructed*/
  bool operator()()const;

  /**  This opens an exsisting inverted file, and then 
       inits this structure. After that it is fully
       usable 

       As a paramter it takes an XMLElement which contains
       a "collection" element and its content.

       If the attribute cui-generate-inverted-file is true,
       then a new inverted file will be generated using
       the parameters given in inCollectionElement. you will
       NOT be able to use *this afterwards.

       Like every accessor, this accessor takes a <collection />
       MRML element as input (@see CXMLElement for how to access 
       the attributes of this element). Currently this accessor
       understands  the following attributes

       cui-base-dir:      the directory containing the following files
       cui-inverted-file-location: the location of the inverted file
       cui-offset-file-location:   a file containing offsets into the
                                   inverted file
       cui-feature-file-location:  the location of the "url2fts" file
                                   which translates urls to feature
				   file names.

  */
  CAcIFFileSystem(const CXMLElement& inCollectionElement);
  /**  called by constructors */
  bool init(bool);

  /** Destructor */
  ~CAcIFFileSystem();
  
  /** Translate a DocumentID to a URL (for output) */
  string IDToURL(TID inID)const;

  /**@name The proper inverted file access*/
  //@{
  /** List of documents containing the feature */
  CDocumentFrequencyList* FeatureToList(TFeatureID)const;

  /** List of features contained by a document */
  CDocumentFrequencyList* URLToFeatureList(string inURL)const;

  /** List of features contained by a document with ID inDID */
  CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;

  //@}


  /**@name Accessing information about features*/
  //@{
  /** Collection frequency for a given feature */
  double FeatureToCollectionFrequency(TFeatureID)const;

  /** What kind of feature is the feature with ID inFeatureID? */
  unsigned int getFeatureDescription(TID inFeatureID)const;
  //@}

  /**@name Accessing additional document information*/
  //@{
  /**  returns the maximum document frequency for one document ID */
  double DIDToMaxDocumentFrequency(TID)const;

  /**  Returns the document-frequency square sum for a given document ID */
  double DIDToDFSquareSum(TID)const;

  /**  Returns this function for a given document ID */
  double DIDToSquareDFLogICFSum(TID)const;
  //@}

  /*@name Inverted File Generation and Consistency Checking*/
  //@{

  /** Generating an inverted File, if there is none.
      Fast but stupid in-memory method. This method is 
      very fast, if all the inverted file (and a bit more)
      can be kept in memory at runtime. If this is not the 
      case, extensive swapping is the result, virtually halting
      the inverted file creation.
   */
  bool generateInvertedFile();

  /** Generating an inverted File, if there is none.
      
      Employing the two-way-merge method described
      in "managing gigabytes", chapter 5.2. Sort-based
      inversion. (Page 181)

   */
  bool newGenerateInvertedFile();

  /**Check the consistency of the inverted file system accessed
     by this accessor.*/
  bool checkConsistency();

  /**Is the Document with inDocumentID contained in the 
    document frequency list of the feature inFeatureID and
    is the associated document frequency the same?
    
    @param inFeature<id the 
  */
  bool findWithinStream(TID inFeatureID,
			TID inDocumentID,
			double inDocumentFrequency)const;
  
  //@}

  /**
   *
   * Translate an URL to its document ID
   *
   */
  virtual pair<bool,TID> URLToID(const string& inURL)const;
  
  /** List of the IDs of all documents present in the inverted file */
  void getAllIDs(list<TID>&)const;
  /** List of triplets (ID,imageURL,thumbnailURL) of all
      the documents present in the inverted file */
  void getAllAccessorElements(list<CAccessorElement>&)const;
  /** get a given number of random C-AccessorElement-s 
      @param inoutResultList the list which will contain the result
      @param inSize          the desired size of the inoutResultList
  */
  void getRandomIDs(list<TID>&,
		    list<TID>::size_type)const;
  /** For drawing random sets. Why is this part of an CAccessorImplementation?
      The way the accessor is organised might influence the way
      random sets can be drawn. At present everything happens in
      RAM, but we do not want to be fixed on that.

      @param inoutResultList the list which will contain the result
      @param inSize          the desired size of the inoutResultList
   */
  void getRandomAccessorElements(list<CAccessorElement>& outResult,
				  list<CAccessorElement>::size_type inSize)const;
  /** The number of images in this accessor */
  int size()const;
  //@}
  /** This is interesting for browsing*/
  TID getMaximumFeatureID()const;
  /** Getting a list of all features contained in this.
      This function is necessary, because in the present 
      system only about 50 percent of the features are 
      really used.

      A feature is considered used if it arises in mIDToOffset.
   */
  list<TID>* getAllFeatureIDs()const;
  /**
   *
   * Translate a DocumentID to an accessor Element
   *
   */
  virtual pair<bool,CAccessorElement> IDToAccessorElement(TID inID)const;
  /** is this well constructed? */
  operator bool()const;

};

#endif