This file is indexed.

/usr/include/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp is in libmlpack-dev 1.0.10-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/**
 * @file mean_split_impl.hpp
 * @author Yash Vadalia
 * @author Ryan Curtin
 *
 * Implementation of class(MeanSplit) to split a binary space partition tree.
 *
 * This file is part of MLPACK 1.0.10.
 *
 * MLPACK is free software: you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option) any
 * later version.
 *
 * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 * details (LICENSE.txt).
 *
 * You should have received a copy of the GNU General Public License along with
 * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
 */
#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_IMPL_HPP
#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_IMPL_HPP

#include "mean_split.hpp"

namespace mlpack {
namespace tree {

template<typename BoundType, typename MatType>
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
                                              MatType& data,
                                              const size_t begin,
                                              const size_t count,
                                              size_t& splitDimension,
                                              size_t& splitCol)
{
  splitDimension = data.n_rows; // Indicate invalid.
  double maxWidth = -1;

  // Find the split dimension.
  for (size_t d = 0; d < data.n_rows; d++)
  {
    double width = bound[d].Width();

    if (width > maxWidth)
    {
      maxWidth = width;
      splitDimension = d;
    }
  }

  if (maxWidth == 0) // All these points are the same.  We can't split.
    return false;

  // Split in the middle of that dimension.
  double splitVal = bound[splitDimension].Mid();

  // Perform the actual splitting.  This will order the dataset such that points
  // with value in dimension splitDimension less than or equal to splitVal are
  // on the left of splitCol, and points with value in dimension splitDimension
  // greater than splitVal are on the right side of splitCol.
  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal);

  return true;
}

template<typename BoundType, typename MatType>
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
                                              MatType& data,
                                              const size_t begin,
                                              const size_t count,
                                              size_t& splitDimension,
                                              size_t& splitCol,
                                              std::vector<size_t>& oldFromNew)
{
  splitDimension = data.n_rows; // Indicate invalid.
  double maxWidth = -1;

  // Find the split dimension.
  for (size_t d = 0; d < data.n_rows; d++)
  {
    double width = bound[d].Width();

    if (width > maxWidth)
    {
      maxWidth = width;
      splitDimension = d;
    }
  }

  if (maxWidth == 0) // All these points are the same.  We can't split.
    return false;

  // Split in the middle of that dimension.
  double splitVal = bound[splitDimension].Mid();

  // Perform the actual splitting.  This will order the dataset such that points
  // with value in dimension splitDimension less than or equal to splitVal are
  // on the left of splitCol, and points with value in dimension splitDimension
  // greater than splitVal are on the right side of splitCol.
  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
      oldFromNew);

  return true;
}

template<typename BoundType, typename MatType>
size_t MeanSplit<BoundType, MatType>::
    PerformSplit(MatType& data,
                 const size_t begin,
                 const size_t count,
                 const size_t splitDimension,
                 const double splitVal)
{
  // This method modifies the input dataset.  We loop both from the left and
  // right sides of the points contained in this node.  The points less than
  // splitVal should be on the left side of the matrix, and the points greater
  // than splitVal should be on the right side of the matrix.
  size_t left = begin;
  size_t right = begin + count - 1;

  // First half-iteration of the loop is out here because the termination
  // condition is in the middle.
  while ((data(splitDimension, left) < splitVal) && (left <= right))
    left++;
  while ((data(splitDimension, right) >= splitVal) && (left <= right))
    right--;

  while (left <= right)
  {
    // Swap columns.
    data.swap_cols(left, right);

    // See how many points on the left are correct.  When they are correct,
    // increase the left counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it later.
    while ((data(splitDimension, left) < splitVal) && (left <= right))
      left++;

    // Now see how many points on the right are correct.  When they are correct,
    // decrease the right counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it with the wrong point we found in the
    // previous loop.
    while ((data(splitDimension, right) >= splitVal) && (left <= right))
      right--;
  }

  Log::Assert(left == right + 1);

  return left;
}

template<typename BoundType, typename MatType>
size_t MeanSplit<BoundType, MatType>::
    PerformSplit(MatType& data,
                 const size_t begin,
                 const size_t count,
                 const size_t splitDimension,
                 const double splitVal,
                 std::vector<size_t>& oldFromNew)
{
  // This method modifies the input dataset.  We loop both from the left and
  // right sides of the points contained in this node.  The points less than
  // splitVal should be on the left side of the matrix, and the points greater
  // than splitVal should be on the right side of the matrix.
  size_t left = begin;
  size_t right = begin + count - 1;

  // First half-iteration of the loop is out here because the termination
  // condition is in the middle.
  while ((data(splitDimension, left) < splitVal) && (left <= right))
    left++;
  while ((data(splitDimension, right) >= splitVal) && (left <= right))
    right--;

  while (left <= right)
  {
    // Swap columns.
    data.swap_cols(left, right);

    // Update the indices for what we changed.
    size_t t = oldFromNew[left];
    oldFromNew[left] = oldFromNew[right];
    oldFromNew[right] = t;  

    // See how many points on the left are correct.  When they are correct,
    // increase the left counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it later.
    while ((data(splitDimension, left) < splitVal) && (left <= right))
      left++;

    // Now see how many points on the right are correct.  When they are correct,
    // decrease the right counter accordingly.  When we encounter one that isn't
    // correct, stop.  We will switch it with the wrong point we found in the
    // previous loop.
    while ((data(splitDimension, right) >= splitVal) && (left <= right))
      right--;
  }

  Log::Assert(left == right + 1);

  return left;
}

}; // namespace tree
}; // namespace mlpack

#endif