/usr/include/CLucene/index/DocumentWriter.h is in libclucene-dev 0.9.21b-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | /*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#ifndef _lucene_index_DocumentWriter_
#define _lucene_index_DocumentWriter_
#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif
#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/document/Document.h"
#include "CLucene/store/Directory.h"
#include "FieldInfos.h"
#include "IndexWriter.h"
#include "CLucene/util/VoidMap.h"
#include "CLucene/document/Field.h"
#include "TermInfo.h"
#include "CLucene/search/Similarity.h"
#include "TermInfosWriter.h"
#include "FieldsWriter.h"
#include "Term.h"
CL_NS_DEF(index)
class DocumentWriter :LUCENE_BASE{
public:
class Posting :LUCENE_BASE{ // info about a Term in a doc
public:
Term* term; // the Term
int32_t freq; // its frequency in doc
Array<int32_t> positions; // positions it occurs at
Array<TermVectorOffsetInfo> offsets;
Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset);
~Posting();
};
private:
CL_NS(analysis)::Analyzer* analyzer;
CL_NS(store)::Directory* directory;
FieldInfos* fieldInfos; //array
const int32_t maxFieldLength;
CL_NS(search)::Similarity* similarity;
int32_t termIndexInterval;
// Keys are Terms, values are Postings.
// Used to buffer a document before it is written to the index.
typedef CL_NS(util)::CLHashtable<Term*,Posting*,Term::Compare, Term::Equals> PostingTableType;
PostingTableType postingTable;
int32_t* fieldLengths; //array
int32_t* fieldPositions; //array
int32_t* fieldOffsets; //array
float_t* fieldBoosts; //array
Term* termBuffer;
public:
/** This ctor used by test code only.
*
* @param directory The directory to write the document information to
* @param analyzer The analyzer to use for the document
* @param similarity The Similarity function
* @param maxFieldLength The maximum number of tokens a field may have
*/
DocumentWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a, CL_NS(search)::Similarity* similarity, const int32_t maxFieldLength);
DocumentWriter(CL_NS(store)::Directory* directory, CL_NS(analysis)::Analyzer* analyzer, IndexWriter* writer);
~DocumentWriter();
void addDocument(const char* segment, CL_NS(document)::Document* doc);
private:
// Tokenizes the fields of a document into Postings.
void invertDocument(const CL_NS(document)::Document* doc);
void addPosition(const TCHAR* field, const TCHAR* text, const int32_t position, TermVectorOffsetInfo* offset);
void sortPostingTable(Posting**& array, int32_t& arraySize);
static void quickSort(Posting**& postings, const int32_t lo, const int32_t hi);
void writePostings(Posting** postings, const int32_t postingsLength, const char* segment);
void writeNorms(const char* segment);
void clearPostingTable();
};
CL_NS_END
#endif
|