/usr/include/mlpack/core/tree/binary_space_tree/mean_split_impl.hpp is in libmlpack-dev 1.0.10-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | /**
* @file mean_split_impl.hpp
* @author Yash Vadalia
* @author Ryan Curtin
*
* Implementation of class(MeanSplit) to split a binary space partition tree.
*
* This file is part of MLPACK 1.0.10.
*
* MLPACK is free software: you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details (LICENSE.txt).
*
* You should have received a copy of the GNU General Public License along with
* MLPACK. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_IMPL_HPP
#define __MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_IMPL_HPP
#include "mean_split.hpp"
namespace mlpack {
namespace tree {
template<typename BoundType, typename MatType>
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
size_t& splitDimension,
size_t& splitCol)
{
splitDimension = data.n_rows; // Indicate invalid.
double maxWidth = -1;
// Find the split dimension.
for (size_t d = 0; d < data.n_rows; d++)
{
double width = bound[d].Width();
if (width > maxWidth)
{
maxWidth = width;
splitDimension = d;
}
}
if (maxWidth == 0) // All these points are the same. We can't split.
return false;
// Split in the middle of that dimension.
double splitVal = bound[splitDimension].Mid();
// Perform the actual splitting. This will order the dataset such that points
// with value in dimension splitDimension less than or equal to splitVal are
// on the left of splitCol, and points with value in dimension splitDimension
// greater than splitVal are on the right side of splitCol.
splitCol = PerformSplit(data, begin, count, splitDimension, splitVal);
return true;
}
template<typename BoundType, typename MatType>
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
MatType& data,
const size_t begin,
const size_t count,
size_t& splitDimension,
size_t& splitCol,
std::vector<size_t>& oldFromNew)
{
splitDimension = data.n_rows; // Indicate invalid.
double maxWidth = -1;
// Find the split dimension.
for (size_t d = 0; d < data.n_rows; d++)
{
double width = bound[d].Width();
if (width > maxWidth)
{
maxWidth = width;
splitDimension = d;
}
}
if (maxWidth == 0) // All these points are the same. We can't split.
return false;
// Split in the middle of that dimension.
double splitVal = bound[splitDimension].Mid();
// Perform the actual splitting. This will order the dataset such that points
// with value in dimension splitDimension less than or equal to splitVal are
// on the left of splitCol, and points with value in dimension splitDimension
// greater than splitVal are on the right side of splitCol.
splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
oldFromNew);
return true;
}
template<typename BoundType, typename MatType>
size_t MeanSplit<BoundType, MatType>::
PerformSplit(MatType& data,
const size_t begin,
const size_t count,
const size_t splitDimension,
const double splitVal)
{
// This method modifies the input dataset. We loop both from the left and
// right sides of the points contained in this node. The points less than
// splitVal should be on the left side of the matrix, and the points greater
// than splitVal should be on the right side of the matrix.
size_t left = begin;
size_t right = begin + count - 1;
// First half-iteration of the loop is out here because the termination
// condition is in the middle.
while ((data(splitDimension, left) < splitVal) && (left <= right))
left++;
while ((data(splitDimension, right) >= splitVal) && (left <= right))
right--;
while (left <= right)
{
// Swap columns.
data.swap_cols(left, right);
// See how many points on the left are correct. When they are correct,
// increase the left counter accordingly. When we encounter one that isn't
// correct, stop. We will switch it later.
while ((data(splitDimension, left) < splitVal) && (left <= right))
left++;
// Now see how many points on the right are correct. When they are correct,
// decrease the right counter accordingly. When we encounter one that isn't
// correct, stop. We will switch it with the wrong point we found in the
// previous loop.
while ((data(splitDimension, right) >= splitVal) && (left <= right))
right--;
}
Log::Assert(left == right + 1);
return left;
}
template<typename BoundType, typename MatType>
size_t MeanSplit<BoundType, MatType>::
PerformSplit(MatType& data,
const size_t begin,
const size_t count,
const size_t splitDimension,
const double splitVal,
std::vector<size_t>& oldFromNew)
{
// This method modifies the input dataset. We loop both from the left and
// right sides of the points contained in this node. The points less than
// splitVal should be on the left side of the matrix, and the points greater
// than splitVal should be on the right side of the matrix.
size_t left = begin;
size_t right = begin + count - 1;
// First half-iteration of the loop is out here because the termination
// condition is in the middle.
while ((data(splitDimension, left) < splitVal) && (left <= right))
left++;
while ((data(splitDimension, right) >= splitVal) && (left <= right))
right--;
while (left <= right)
{
// Swap columns.
data.swap_cols(left, right);
// Update the indices for what we changed.
size_t t = oldFromNew[left];
oldFromNew[left] = oldFromNew[right];
oldFromNew[right] = t;
// See how many points on the left are correct. When they are correct,
// increase the left counter accordingly. When we encounter one that isn't
// correct, stop. We will switch it later.
while ((data(splitDimension, left) < splitVal) && (left <= right))
left++;
// Now see how many points on the right are correct. When they are correct,
// decrease the right counter accordingly. When we encounter one that isn't
// correct, stop. We will switch it with the wrong point we found in the
// previous loop.
while ((data(splitDimension, right) >= splitVal) && (left <= right))
right--;
}
Log::Assert(left == right + 1);
return left;
}
}; // namespace tree
}; // namespace mlpack
#endif
|