This file is indexed.

/usr/include/TiledArray/tensor/permute.h is in libtiledarray-dev 0.6.0-5.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/*
 *  This file is a part of TiledArray.
 *  Copyright (C) 2015  Virginia Tech
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  Justus Calvin
 *  Department of Chemistry, Virginia Tech
 *
 *  permute.h
 *  May 31, 2015
 *
 */

#ifndef TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
#define TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED

#include <TiledArray/perm_index.h>
#include <TiledArray/math/transpose.h>

namespace TiledArray {
  namespace detail {


    /// Compute the fused dimensions for permutation

    /// This function will compute the fused dimensions of a tensor for use in
    /// permutation algorithms. The idea is to partition the stride 1 dimensions
    /// in both the input and output tensor, which yields a forth-order tensor
    /// (second- and third-order tensors have size of 1 and stride of 0 in the
    /// unused dimensions).
    /// \tparam SizeType An unsigned integral type
    /// \param[out] fused_size An array for the fused size output
    /// \param[out] fused_weight An array for the fused weight output
    /// \param[in] size An array that holds the unfused size information of the
    /// argument tensor
    /// \param[in] perm The permutation that will be applied to the argument
    /// tensor(s).
    template <typename SizeType>
    inline void fuse_dimensions(SizeType * restrict const fused_size,
        SizeType * restrict const fused_weight,
        const SizeType * restrict const size, const Permutation& perm)
    {
      const unsigned int ndim1 = perm.dim() - 1u;

      int i = ndim1;
      fused_size[3] = size[i--];
      while((i >= 0) && (perm[i + 1u] == (perm[i] + 1u)))
        fused_size[3] *= size[i--];
      fused_weight[3] = 1u;


      if((i >= 0) && (perm[i] != ndim1)) {
        fused_size[2] = size[i--];
        while((i >= 0) && (perm[i] != ndim1))
          fused_size[2] *= size[i--];

        fused_weight[2] = fused_size[3];

        fused_size[1] = size[i--];
        while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
          fused_size[1] *= size[i--];

        fused_weight[1] = fused_size[2] * fused_weight[2];
      } else {
        fused_size[2] = 1ul;
        fused_weight[2] = 0ul;

        fused_size[1] = size[i--];
        while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
          fused_size[1] *= size[i--];

        fused_weight[1] = fused_size[3];
      }

      if(i >= 0) {
        fused_size[0] = size[i--];
        while(i >= 0)
          fused_size[0] *= size[i--];

        fused_weight[0] = fused_size[1] * fused_weight[1];
      } else {
        fused_size[0] = 1ul;
        fused_weight[0] = 0ul;
      }
    }

    /// Construct a permuted tensor copy

    /// The expected signature of the input operations is:
    /// \code
    /// Result::value_type input_op(const Arg0::value_type, const Args::value_type...)
    /// \endcode
    /// The expected signature of the output operations is:
    /// \code
    /// void output_op(Result::value_type*, const Result::value_type)
    /// \endcode
    /// \tparam InputOp The input operation type
    /// \tparam OutputOp The output operation type
    /// \tparam Result The result tensor type
    /// \tparam Arg0 The first tensor argument type
    /// \tparam Args The the remaining tensor argument types
    /// \param input_op The operation that is used to generate the output value
    /// from the input arguments
    /// \param output_op The operation that is used to set the value of the
    /// result tensor given the element pointer and the result value
    /// \param args The data pointers of the tensors to be permuted
    /// \param perm The permutation that will be applied to the copy
    template <typename InputOp, typename OutputOp, typename Result,
        typename Arg0, typename... Args>
    inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
        const Permutation& perm, const Arg0& arg0, const Args&... args)
    {
      detail::PermIndex perm_index_op(arg0.range(), perm);

      // Cache constants
      const unsigned int ndim = arg0.range().rank();
      const unsigned int ndim1 = ndim - 1;
      const typename Result::size_type volume = arg0.range().volume();

      // Get pointer to arg extent
      const auto* restrict const arg0_extent = arg0.range().extent_data();

      if(perm[ndim1] == ndim1) {
        // This is the simple case where the last dimension is not permuted.
        // Therefore, it can be shuffled in chunks.

        // Determine which dimensions can be permuted with the least significant
        // dimension.
        typename Result::size_type block_size = arg0_extent[ndim1];
        for(int i = int(ndim1) - 1 ; i >= 0; --i) {
          if(int(perm[i]) != i)
            break;
          block_size *= arg0_extent[i];
        }

        // Combine the input and output operations
        auto op = [=] (typename Result::pointer result,
            typename Arg0::const_reference a0, typename Args::const_reference... as)
        { output_op(result, input_op(a0, as...)); };

        // Permute the data
        for(typename Result::size_type index = 0ul; index < volume; index += block_size) {
          const typename Result::size_type perm_index = perm_index_op(index);

          // Copy the block
          math::vector_ptr_op(op, block_size, result.data() + perm_index,
              arg0.data() + index, (args.data() + index)...);
        }

      } else {
        // This is the more complicated case. Here we permute in terms of matrix
        // transposes. The data layout of the input and output matrices are
        // chosen such that they both contain stride one dimensions.

        // Here we partition the n dimensional index space, I, of the permute
        // tensor with up to four parts
        // {I_1, ..., I_i, I_i+1, ..., I_j, I_j+1, ..., I_k, I_k+1, ..., I_n}
        // where the subrange {I_k+1, ..., I_n} is the (fused) inner dimension
        // in the input tensor, and the subrange {I_i+1, ..., I_j} is the
        // (fused) inner dimension in the output tensor that has been mapped to
        // the input tensor. These ranges are used to form a set of matrices in
        // the input tensor that are transposed and copied to the output tensor.
        // The remaining (fused) index ranges {I_1, ..., I_i} and
        // {I_j+1, ..., I_k} are used to form the outer loop around the matrix
        // transpose operations. These outer ranges may or may not be zero size.
        typename Result::size_type other_fused_size[4];
        typename Result::size_type other_fused_weight[4];
        fuse_dimensions(other_fused_size, other_fused_weight,
            arg0.range().extent_data(), perm);

        // Compute the fused stride for the result matrix transpose.
        const auto* restrict const result_extent = result.range().extent_data();
        typename Result::size_type  result_outer_stride = 1ul;
        for(unsigned int i = perm[ndim1] + 1u; i < ndim; ++i)
          result_outer_stride *= result_extent[i];

        // Copy data from the input to the output matrix via a series of matrix
        // transposes.
        for(typename Result::size_type i = 0ul; i < other_fused_size[0]; ++i) {
          typename Result::size_type index = i * other_fused_weight[0];
          for(typename Result::size_type j = 0ul; j < other_fused_size[2]; ++j, index += other_fused_weight[2]) {
            // Compute the ordinal index of the input and output matrices.
            typename Result::size_type perm_index = perm_index_op(index);

            math::transpose(input_op, output_op,
                other_fused_size[1], other_fused_size[3],
                result_outer_stride, result.data() + perm_index,
                other_fused_weight[1], arg0.data() + index, (args.data() + index)...);
          }
        }
      }
    }


  }  // namespace detail
} // namespace TiledArray

#endif // TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED