This file is indexed.

/usr/include/sdsl/suffix_tree_algorithm.hpp is in libsdsl-dev 2.0.3-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/* sdsl - succinct data structures library
    Copyright (C) 2010-2013 Simon Gog

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see http://www.gnu.org/licenses/ .
*/
/*! \file suffix_tree_algorithm.hpp
    \brief suffix_tree_algorithm.hpp contains algorithms on CSTs
    \author Simon Gog
*/
#ifndef INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM
#define INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM

#include <iterator>
#include "suffix_array_algorithm.hpp"

namespace sdsl
{

//! Forward search for a character c on the path on depth \f$d\f$ to node \f$v\f$.
/*!
 * \param cst      The CST object
 * \param v        The node at the endpoint of the current edge.
 * \param d        The current depth of the path starting with 0.
 * \param c        The character c which should be matched with pathlabel_{root()..v}[d]
 * \param char_pos T[char_pos-d+1..char_pos] matches with the already matched pattern P[0..d-1] or
 *                 d=0 => char_pos=0.
 * \return The number of the matching substrings in T.
 *
 *    \par Time complexity
 *        \f$ \Order{ t_{\Psi} } \f$ or \f$ \Order{t_{cst.child}} \f$
 */
template<class t_cst>
typename t_cst::size_type
forward_search(
    const t_cst& cst,
    typename t_cst::node_type& v,
    const typename t_cst::size_type d,
    const typename t_cst::char_type c,
    typename t_cst::size_type& char_pos,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    unsigned char cc = cst.csa.char2comp[c]; // check if c occurs in the text of the csa
    if (cc==0 and cc!=c)                     //   "    " "    "     "  "    "   "  "   "
        return 0;
    typename t_cst::size_type depth_node = cst.depth(v);
    if (d < depth_node) {         // in an edge, no  branching
        char_pos = cst.csa.psi[char_pos];
        if (char_pos < cst.csa.C[cc] or char_pos >= cst.csa.C[cc+1])
            return 0;
        return cst.size(v);
    } else if (d == depth_node) { // at a node,  branching
        v = cst.child(v, c, char_pos);
        if (v == cst.root())
            return 0;
        else
            return cst.size(v);
    } else {
        return 0;
    }
}

//! Forward search for a pattern pat on the path on depth \f$d\f$ to node \f$v\f$.
/*!
 *    \param cst      The compressed suffix tree.
 *    \param v        The node at the endpoint of the current edge.
 *    \param d        The current depth of the path. 0 = first character on each edge of the root node.
 *    \param pat      The character c which should be matched at the path on depth \f$d\f$ to node \f$v\f$.
 *    \param char_pos One position in the text, which corresponds to the text that is already matched. If v=cst.root() and d=0 => char_pos=0.
 *
 *    \par Time complexity
 *        \f$ \Order{ t_{\Psi} } \f$ or \f$ \Order{t_{cst.child}} \f$
 */
template<class t_cst, class t_pat_iter>
typename t_cst::size_type
forward_search(const t_cst& cst,
               typename t_cst::node_type& v,
               typename t_cst::size_type d,
               t_pat_iter begin,
               t_pat_iter end,
               typename t_cst::size_type& char_pos,
               SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
              )
{
    if (begin==end)
        return cst.size(v);
    typename t_cst::size_type size=0;
    t_pat_iter it = begin;
    while (it != end and (size=forward_search(cst, v, d, *it, char_pos))) {
        ++d;
        ++it;
    }
    return size;
}

//! Counts the number of occurrences of a pattern in a CST.
/*!
 * \tparam t_cst      CST type.
 * \tparam t_pat_iter Pattern iterator type.
 *
 * \param cst   The CST object.
 * \param begin Iterator to the begin of the pattern (inclusive).
 * \param end   Iterator to the end of the pattern (exclusive).
 * \return The number of occurrences of the pattern in the CSA.
 *
 * \par Time complexity
 *        \f$ \Order{ t_{backward\_search} } \f$
 */
template<class t_cst, class t_pat_iter>
typename t_cst::size_type count(
    const t_cst& cst,
    t_pat_iter begin,
    t_pat_iter end,
    cst_tag
)
{
    return count(cst.csa, begin, end);
}


//! Calculates all occurrences of a pattern pat in a CST.
/*!
 * \tparam t_cst      CST type.
 * \tparam t_pat_iter Pattern iterator type.
 * \tparam t_rac      Resizeable random access container.
 *
 * \param cst   The CST object.
 * \param begin Iterator to the begin of the pattern (inclusive).
 * \param end   Iterator to the end of the pattern (exclusive).
 * \return A vector containing the occurrences of the pattern  in the CST.
 *
 * \par Time complexity
 *        \f$ \Order{ t_{backward\_search} + z \cdot t_{SA} } \f$, where \f$z\f$ is the number of
 *         occurrences of pattern in the CST.
 */
template<class t_cst, class t_pat_iter, class t_rac=int_vector<64>>
t_rac locate(
    const t_cst& cst,
    t_pat_iter begin,
    t_pat_iter end,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    return locate(cst.csa, begin, end);
}

//! Calculate the concatenation of edge labels from the root to the node v of a CST.
/*!
 * \tparam t_cst       CST type.
 * \tparam t_text_iter Random access iterator type.
 *
 * \param cst   The CST object.
 * \param v     The node where the concatenation of the edge label ends.
 * \param text  Random access iterator pointing to the start of an container, which can hold at least (end-begin+1) character.
 * \returns The length of the extracted edge label.
 * \pre text has to be initialized with enough memory (\f$ cst.depth(v)+1\f$ bytes) to hold the extracted text.
 */
template<class t_cst, class t_text_iter>
typename t_cst::size_type extract(
    const t_cst& cst,
    const typename t_cst::node_type& v,
    t_text_iter text,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    if (v == cst.root()) {
        text[0] = 0;
        return 0;
    }
    // first get the suffix array entry of the leftmost leaf in the subtree rooted at v
    typename t_cst::size_type begin = cst.csa[cst.lb(v)];
    // then call the extract method on the compressed suffix array
    extract(cst.csa, begin, begin + cst.depth(v) - 1, text);
}

//! Calculate the concatenation of edge labels from the root to the node v of of c CST.
/*!
 * \tparam t_rac Random access container which should hold the result.
 * \tparam t_cst CSA type.

 * \param cst The CST object.
 * \return A t_rac object holding the extracted edge label.
 * \return The string of the concatenated edge labels from the root to the node v.
 */
template<class t_cst>
typename t_cst::csa_type::string_type
extract(
    const t_cst& cst,
    const typename t_cst::node_type& v,
    SDSL_UNUSED typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value, cst_tag>::type x = cst_tag()
)
{
    typedef typename t_cst::csa_type::string_type t_rac;
    if (v==cst.root()) {
        return t_rac(0);
    }
    // first get the suffix array entry of the leftmost leaf in the subtree rooted at v
    typename t_cst::size_type begin = cst.csa[cst.lb(v)];
    // then call the extract method on the compressed suffix array
    return extract(cst.csa, begin, begin + cst.depth(v) - 1);
}



//! Calculate the zeroth order entropy of the text that follows a certain substring s
/*!
 * \param v     A suffix tree node v. The label of the path from the root to v is s.
 * \param cst   The suffix tree of v.
 * \return      The zeroth order entropy of the concatenation of all characters that follow
                s in the original text.
 */
template<class t_cst>
double H0(const typename t_cst::node_type& v, const t_cst& cst)
{
    if (cst.is_leaf(v)) {
        return 0;
    } else {
        double h0=0;
        auto n = cst.size(v);
        for (const auto& child : cst.children(v)) {
            double p = ((double)cst.size(child))/n;
            h0 -= p*log2(p);
        }
        return h0;
    }
}

//! Calculate the k-th order entropy of a text
/*!
 * \param cst The suffix tree.
 * \param k   Parameter k for which H_k should be calculated.
 * \return    H_k and the number of contexts.
 */
template<class t_cst>
std::pair<double,size_t> Hk(const t_cst& cst, typename t_cst::size_type k)
{
    double hk = 0;
    size_t context = 0;
    std::set<typename t_cst::size_type> leafs_with_d_smaller_k;
    for (typename t_cst::size_type d = 1; d < k; ++d) {
        leafs_with_d_smaller_k.insert(cst.csa.isa[cst.csa.size()-d]);
    }
    for (typename t_cst::const_iterator it = cst.begin(), end=cst.end(); it != end; ++it) {
        if (it.visit() == 1) {
            if (!cst.is_leaf(*it)) {
                typename t_cst::size_type d = cst.depth(*it);
                if (d >= k) {
                    if (d == k) {
                        hk += cst.size(*it) * H0(*it, cst);
                    }
                    ++context;
                    it.skip_subtree();
                }
            } else {
                // if d of leaf is >= k, add context
                if (leafs_with_d_smaller_k.find(cst.lb(*it)) == leafs_with_d_smaller_k.end()) {
                    ++context;
                }
            }
        }
    }
    hk /= cst.size();
    return {hk,context};
}


} // end namespace
#endif