This file is indexed.

/usr/include/shogun/lib/NGramTokenizer.h is in libshogun-dev 3.2.0-7.3build4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2013 Evangelos Anagnostopoulos
 * Copyright (C) 2013 Evangelos Anagnostopoulos
 */

#ifndef _NGRAMTOKENIZER__H__
#define	_NGRAMTOKENIZER__H__

#include <shogun/lib/Tokenizer.h>

namespace shogun
{
class CTokenizer;

/** @brief The class CNGramTokenizer is used to tokenize
 *  a SGVector<char> into n-grams
 */
class CNGramTokenizer: public CTokenizer
{
public:
    /** Constructor
	 *
	 * @param ns N-grams' size
	 */
    CNGramTokenizer(int32_t ns=3);

    /** copy constructor
	 *
	 * @param orig the original NGramTokenizer
	 */
    CNGramTokenizer(const CNGramTokenizer& orig);

    /** destructor */
    virtual ~CNGramTokenizer() {}

	/** Set the char array that requires tokenization
	 *
	 * @param txt the text to tokenize
	 */
	virtual void set_text(SGVector<char> txt);

	/** Returns true or false based on whether
	 * there exists another token in the text
	 *
	 * @return if another token exists
	 */
	virtual bool has_next();

	/** Method that returns the indices, start and end, of
	 *  the next token in line.
	 *
	 * @param start token's starting index
	 * @return token's ending index (exclusive)
	 */
	virtual index_t next_token_idx(index_t& start);

	/** Returns the name of the SGSerializable instance.  It MUST BE
	 *  the CLASS NAME without the prefixed `C'.
	 *
	 *  @return name of the SGSerializable
	 */
    virtual const char* get_name() const;

	virtual CNGramTokenizer* get_copy();

private:
	void init();

protected:

	/** n-grams' size */
	int32_t n;

	/** last index returned */
	index_t last_idx;
};
}
#endif	/* _NGRAMTOKENIZER__H__ */