/usr/lib/x86_64-linux-gnu/beignet/include/ocl_vload.h is in beignet-opencl-icd 1.1.1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | /*
* Copyright © 2012 - 2014 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef __OCL_VLOAD_H__
#define __OCL_VLOAD_H__
#include "ocl_types.h"
/////////////////////////////////////////////////////////////////////////////
// Vector loads and stores
/////////////////////////////////////////////////////////////////////////////
// These loads and stores will use untyped reads and writes, so we can just
// cast to vector loads / stores. Not C99 compliant BTW due to aliasing issue.
// Well we do not care, we do not activate TBAA in the compiler
#define DECL_UNTYPED_RW_SPACE_N(TYPE, DIM, SPACE) \
OVERLOADABLE TYPE##DIM vload##DIM(size_t offset, const SPACE TYPE *p); \
OVERLOADABLE void vstore##DIM(TYPE##DIM v, size_t offset, SPACE TYPE *p);
#define DECL_UNTYPED_RD_SPACE_N(TYPE, DIM, SPACE) \
OVERLOADABLE TYPE##DIM vload##DIM(size_t offset, const SPACE TYPE *p);
#define DECL_UNTYPED_V3_SPACE(TYPE, SPACE) \
OVERLOADABLE void vstore3(TYPE##3 v, size_t offset, SPACE TYPE *p); \
OVERLOADABLE TYPE##3 vload3(size_t offset, const SPACE TYPE *p);
#define DECL_UNTYPED_RDV3_SPACE(TYPE, SPACE) \
OVERLOADABLE TYPE##3 vload3(size_t offset, const SPACE TYPE *p);
#define DECL_UNTYPED_RW_ALL_SPACE(TYPE, SPACE) \
DECL_UNTYPED_RW_SPACE_N(TYPE, 2, SPACE) \
DECL_UNTYPED_V3_SPACE(TYPE, SPACE) \
DECL_UNTYPED_RW_SPACE_N(TYPE, 4, SPACE) \
DECL_UNTYPED_RW_SPACE_N(TYPE, 8, SPACE) \
DECL_UNTYPED_RW_SPACE_N(TYPE, 16, SPACE)
#define DECL_UNTYPED_RD_ALL_SPACE(TYPE, SPACE) \
DECL_UNTYPED_RD_SPACE_N(TYPE, 2, SPACE) \
DECL_UNTYPED_RDV3_SPACE(TYPE, SPACE) \
DECL_UNTYPED_RD_SPACE_N(TYPE, 4, SPACE) \
DECL_UNTYPED_RD_SPACE_N(TYPE, 8, SPACE) \
DECL_UNTYPED_RD_SPACE_N(TYPE, 16, SPACE)
#define DECL_UNTYPED_RW_ALL(TYPE) \
DECL_UNTYPED_RW_ALL_SPACE(TYPE, __global) \
DECL_UNTYPED_RW_ALL_SPACE(TYPE, __local) \
DECL_UNTYPED_RD_ALL_SPACE(TYPE, __constant) \
DECL_UNTYPED_RW_ALL_SPACE(TYPE, __private)
#define DECL_BYTE_RD_SPACE(TYPE, SPACE) \
OVERLOADABLE TYPE##2 vload2(size_t offset, const SPACE TYPE *p); \
OVERLOADABLE TYPE##3 vload3(size_t offset, const SPACE TYPE *p); \
OVERLOADABLE TYPE##4 vload4(size_t offset, const SPACE TYPE *p); \
OVERLOADABLE TYPE##8 vload8(size_t offset, const SPACE TYPE *p); \
OVERLOADABLE TYPE##16 vload16(size_t offset, const SPACE TYPE *p);
#define DECL_BYTE_WR_SPACE(TYPE, SPACE) \
OVERLOADABLE void vstore2(TYPE##2 v, size_t offset, SPACE TYPE *p); \
OVERLOADABLE void vstore3(TYPE##3 v, size_t offset, SPACE TYPE *p); \
OVERLOADABLE void vstore4(TYPE##4 v, size_t offset, SPACE TYPE *p); \
OVERLOADABLE void vstore8(TYPE##8 v, size_t offset, SPACE TYPE *p); \
OVERLOADABLE void vstore16(TYPE##16 v, size_t offset, SPACE TYPE *p);
#define DECL_BYTE_RW_ALL(TYPE) \
DECL_BYTE_RD_SPACE(TYPE, __global) \
DECL_BYTE_RD_SPACE(TYPE, __local) \
DECL_BYTE_RD_SPACE(TYPE, __private) \
DECL_BYTE_RD_SPACE(TYPE, __constant) \
DECL_BYTE_WR_SPACE(TYPE, __global) \
DECL_BYTE_WR_SPACE(TYPE, __local) \
DECL_BYTE_WR_SPACE(TYPE, __private)
DECL_BYTE_RW_ALL(char)
DECL_BYTE_RW_ALL(uchar)
DECL_BYTE_RW_ALL(short)
DECL_BYTE_RW_ALL(ushort)
DECL_UNTYPED_RW_ALL(int)
DECL_UNTYPED_RW_ALL(uint)
DECL_UNTYPED_RW_ALL(long)
DECL_UNTYPED_RW_ALL(ulong)
DECL_UNTYPED_RW_ALL(float)
DECL_UNTYPED_RW_ALL(double)
#undef DECL_UNTYPED_RW_ALL
#undef DECL_UNTYPED_RW_ALL_SPACE
#undef DECL_UNTYPED_RD_ALL_SPACE
#undef DECL_UNTYPED_RW_SPACE_N
#undef DECL_UNTYPED_RD_SPACE_N
#undef DECL_UNTYPED_V3_SPACE
#undef DECL_UNTYPED_RDV3_SPACE
#undef DECL_BYTE_RD_SPACE
#undef DECL_BYTE_WR_SPACE
#undef DECL_BYTE_RW_ALL
#define DECL_HALF_LD_SPACE(SPACE) \
OVERLOADABLE float vload_half(size_t offset, const SPACE half *p); \
OVERLOADABLE float2 vload_half2(size_t offset, const SPACE half *p); \
OVERLOADABLE float3 vload_half3(size_t offset, const SPACE half *p); \
OVERLOADABLE float3 vloada_half3(size_t offset, const SPACE half *p); \
OVERLOADABLE float4 vload_half4(size_t offset, const SPACE half *p); \
OVERLOADABLE float8 vload_half8(size_t offset, const SPACE half *p); \
OVERLOADABLE float16 vload_half16(size_t offset, const SPACE half *p);
#define DECL_HALF_ST_SPACE_ROUND(SPACE, ROUND, FUNC) \
OVERLOADABLE void vstore_half##ROUND(float data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half##ROUND(float data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstore_half2##ROUND(float2 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half2##ROUND(float2 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstore_half3##ROUND(float3 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half3##ROUND(float3 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstore_half4##ROUND(float4 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half4##ROUND(float4 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstore_half8##ROUND(float8 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half8##ROUND(float8 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstore_half16##ROUND(float16 data, size_t offset, SPACE half *p); \
OVERLOADABLE void vstorea_half16##ROUND(float16 data, size_t offset, SPACE half *p);
#define DECL_HALF_ST_SPACE(SPACE) \
DECL_HALF_ST_SPACE_ROUND(SPACE, , dummy) \
DECL_HALF_ST_SPACE_ROUND(SPACE, _rte, dummy) \
DECL_HALF_ST_SPACE_ROUND(SPACE, _rtz, dummy) \
DECL_HALF_ST_SPACE_ROUND(SPACE, _rtp, dummy) \
DECL_HALF_ST_SPACE_ROUND(SPACE, _rtn, dummy) \
DECL_HALF_LD_SPACE(__global)
DECL_HALF_LD_SPACE(__local)
DECL_HALF_LD_SPACE(__constant)
DECL_HALF_LD_SPACE(__private)
DECL_HALF_ST_SPACE(__global)
DECL_HALF_ST_SPACE(__local)
DECL_HALF_ST_SPACE(__private)
//#undef DECL_UNTYPED_RW_ALL_SPACE
#undef DECL_HALF_LD_SPACE
#undef DECL_HALF_ST_SPACE
#undef DECL_HALF_ST_SPACE_ROUND
#define vloada_half vload_half
#define vloada_half2 vload_half2
#define vloada_half4 vload_half4
#define vloada_half8 vload_half8
#define vloada_half16 vload_half16
#endif /* __OCL_VLOAD_H__ */
|