This file is indexed.

/usr/include/shogun/classifier/vw/VwParser.h is in libshogun-dev 1.1.0-4ubuntu2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*
 * Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
 * embodied in the content of this file are licensed under the BSD
 * (revised) open source license.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2011 Shashwat Lal Das
 * Adaptation of Vowpal Wabbit v5.1.
 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
 */

#ifndef _VW_PARSER_H__
#define _VW_PARSER_H__

#include <shogun/base/SGObject.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/Hash.h>
#include <shogun/classifier/vw/vw_common.h>
#include <shogun/classifier/vw/cache/VwCacheWriter.h>

namespace shogun
{
/// The type of input to parse
enum E_VW_PARSER_TYPE
{
	T_VW = 1,
	T_SVMLIGHT = 2,
	T_DENSE = 3
};

/** @brief CVwParser is the object which provides the
 * functions to parse examples from buffered input.
 *
 * An instance of this class can be created in
 * CStreamingVwFile and the appropriate read_*_features
 * function called to parse examples from different formats.
 *
 * It also encapsulates a CVwCacheWriter object which may
 * be used in case a cache file is to be generated simultaneously
 * with parsing.
 */
class CVwParser: public CSGObject
{
public:
	/**
	 * Default constructor
	 */
	CVwParser();

	/**
	 * Constructor taking environment as parameter.
	 *
	 * @param env_to_use CVwEnvironment to use
	 */
	CVwParser(CVwEnvironment* env_to_use);

	/**
	 * Destructor
	 */
	virtual ~CVwParser();

	/**
	 * Get the environment
	 *
	 * @return environment as CVwEnvironment*
	 */
	CVwEnvironment* get_env()
	{
		SG_REF(env);
		return env;
	}

	/**
	 * Set the environment
	 *
	 * @param env_to_use environment as CVwEnvironment*
	 */
	void set_env(CVwEnvironment* env_to_use)
	{
		env = env_to_use;
		SG_REF(env);
	}

	/**
	 * Set the cache parameters
	 *
	 * @param fname name of the cache file
	 * @param type type of cache as one in EVwCacheType
	 */
	void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
	{
		init_cache(fname, type);
	}

	/**
	 * Return the type of cache
	 *
	 * @return cache type as EVwCacheType
	 */
	EVwCacheType get_cache_type()
	{
		return cache_type;
	}

	/**
	 * Set whether to write cache file or not
	 *
	 * @param wr_cache write cache or not
	 */
	void set_write_cache(bool wr_cache)
	{
		write_cache = wr_cache;
		if (wr_cache)
			init_cache(NULL);
		else
			if (cache_writer)
				SG_UNREF(cache_writer);
	}

	/**
	 * Return whether cache will be written or not
	 *
	 * @return will cache be written?
	 */
	bool get_write_cache()
	{
		return write_cache;
	}

	/**
	 * Update min and max labels seen in the environment
	 *
	 * @param label current label based on which to update
	 */
	void set_mm(float64_t label)
	{
		env->min_label = CMath::min(env->min_label, label);
		if (label != FLT_MAX)
			env->max_label = CMath::max(env->max_label, label);
	}

	/**
	 * A dummy function performing no operation in case training
	 * is not to be performed.
	 *
	 * @param label label
	 */
	void noop_mm(float64_t label) { }

	/**
	 * Function which is actually called to update min and max labels
	 * Should be set to one of the functions implemented for this.
	 *
	 * @param label label based on which to update
	 */
	void set_minmax(float64_t label)
	{
		set_mm(label);
	}

	/**
	 * Reads input from the buffer and parses it into a VwExample
	 *
	 * @param buf IOBuffer which contains input
	 * @param ex parsed example
	 *
	 * @return number of characters read for this example
	 */
	int32_t read_features(CIOBuffer* buf, VwExample*& ex);

	/**
	 * Read an example from an SVMLight file
	 *
	 * @param buf IOBuffer which contains input
	 * @param ae parsed example
	 *
	 * @return number of characters read for this example
	 */
	int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);

	/**
	 * Read an example from a file with dense vectors
	 *
	 * @param buf IOBuffer which contains input
	 * @param ae parsed example
	 *
	 * @return number of characters read for this example
	 */
	int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);

	/**
	 * Return the name of the object
	 *
	 * @return VwParser
	 */
	virtual const char* get_name() const { return "VwParser"; }

protected:
	/**
	 * Initialize the cache writer
	 *
	 * @param fname cache file name
	 * @param type cache type as EVwCacheType, default is C_NATIVE
	 */
	void init_cache(char * fname, EVwCacheType type = C_NATIVE);

	/**
	 * Get value of feature from a given substring.
	 * A default of 1 is assumed if no explicit value is specified.
	 *
	 * @param s substring, usually a feature:value string
	 * @param name returned array of substrings, split into name and value
	 * @param v value of feature, set by reference
	 */
	void feature_value(substring &s, v_array<substring>& name, float32_t &v);

	/**
	 * Split a given substring into an array of substrings
	 * based on a specified delimiter
	 *
	 * @param delim delimiter to use
	 * @param s substring to tokenize
	 * @param ret array of substrings, returned
	 */
	void tokenize(char delim, substring s, v_array<substring> &ret);

	/**
	 * Get the index of a character in a memory location
	 * taking care not to go beyond the max pointer.
	 *
	 * @param start start memory location, char*
	 * @param v character to search for
	 * @param max last location to look in
	 *
	 * @return index of found location as char*
	 */
	inline char* safe_index(char *start, char v, char *max)
	{
		while (start != max && *start != v)
			start++;
		return start;
	}

public:
	/// Hash function to use, of type hash_func_t
	hash_func_t hasher;

protected:
	/// Environment of VW - used by parser
	CVwEnvironment* env;
	/// Object which will be used for writing cache
	CVwCacheWriter* cache_writer;
	/// Type of cache
	EVwCacheType cache_type;
	/// Whether to write cache or not
	bool write_cache;

private:
	/// Used during parsing
	v_array<substring> channels;
	v_array<substring> words;
	v_array<substring> name;
};

}
#endif // _VW_PARSER_H__