/usr/lib/x86_64-linux-gnu/beignet/include/ocl_simd.h is in beignet-opencl-icd 1.3.2-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | /*
* Copyright © 2015 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef __OCL_SIMD_H__
#define __OCL_SIMD_H__
#include "ocl_types.h"
/////////////////////////////////////////////////////////////////////////////
// SIMD level function
/////////////////////////////////////////////////////////////////////////////
int sub_group_any(int);
int sub_group_all(int);
uint get_simd_size(void);
uint get_sub_group_size(void);
uint get_max_sub_group_size(void);
uint get_num_sub_groups(void);
uint get_sub_group_id(void);
uint get_sub_group_local_id(void);
/* broadcast */
OVERLOADABLE int sub_group_broadcast(int a,uint local_id);
OVERLOADABLE uint sub_group_broadcast(uint a, uint local_id);
OVERLOADABLE long sub_group_broadcast(long a, uint local_id);
OVERLOADABLE ulong sub_group_broadcast(ulong a, uint local_id);
OVERLOADABLE half sub_group_broadcast(half a, uint local_id);
OVERLOADABLE float sub_group_broadcast(float a, uint local_id);
OVERLOADABLE double sub_group_broadcast(double a, uint local_id);
OVERLOADABLE short sub_group_broadcast(short a,uint local_id);
OVERLOADABLE ushort sub_group_broadcast(ushort a, uint local_id);
OVERLOADABLE short intel_sub_group_broadcast(short a, uint local_id);
OVERLOADABLE ushort intel_sub_group_broadcast(ushort a, uint local_id);
/* reduce add */
OVERLOADABLE int sub_group_reduce_add(int x);
OVERLOADABLE uint sub_group_reduce_add(uint x);
OVERLOADABLE long sub_group_reduce_add(long x);
OVERLOADABLE ulong sub_group_reduce_add(ulong x);
OVERLOADABLE half sub_group_reduce_add(half x);
OVERLOADABLE float sub_group_reduce_add(float x);
OVERLOADABLE double sub_group_reduce_add(double x);
OVERLOADABLE short sub_group_reduce_add(short x);
OVERLOADABLE ushort sub_group_reduce_add(ushort x);
OVERLOADABLE short intel_sug_group_reduce_add(short x);
OVERLOADABLE ushort intel_sug_group_reduce_add(ushort x);
/* reduce min */
OVERLOADABLE int sub_group_reduce_min(int x);
OVERLOADABLE uint sub_group_reduce_min(uint x);
OVERLOADABLE long sub_group_reduce_min(long x);
OVERLOADABLE ulong sub_group_reduce_min(ulong x);
OVERLOADABLE half sub_group_reduce_min(half x);
OVERLOADABLE float sub_group_reduce_min(float x);
OVERLOADABLE double sub_group_reduce_min(double x);
OVERLOADABLE short sub_group_reduce_min(short x);
OVERLOADABLE ushort sub_group_reduce_min(ushort x);
OVERLOADABLE short intel_sug_group_reduce_min(short x);
OVERLOADABLE ushort intel_sug_group_reduce_min(ushort x);
/* reduce max */
OVERLOADABLE int sub_group_reduce_max(int x);
OVERLOADABLE uint sub_group_reduce_max(uint x);
OVERLOADABLE long sub_group_reduce_max(long x);
OVERLOADABLE ulong sub_group_reduce_max(ulong x);
OVERLOADABLE half sub_group_reduce_max(half x);
OVERLOADABLE float sub_group_reduce_max(float x);
OVERLOADABLE double sub_group_reduce_max(double x);
OVERLOADABLE short sub_group_reduce_max(short x);
OVERLOADABLE ushort sub_group_reduce_max(ushort x);
OVERLOADABLE short intel_sug_group_reduce_max(short x);
OVERLOADABLE ushort intel_sug_group_reduce_max(ushort x);
/* scan_inclusive add */
OVERLOADABLE int sub_group_scan_inclusive_add(int x);
OVERLOADABLE uint sub_group_scan_inclusive_add(uint x);
OVERLOADABLE long sub_group_scan_inclusive_add(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_add(half x);
OVERLOADABLE float sub_group_scan_inclusive_add(float x);
OVERLOADABLE double sub_group_scan_inclusive_add(double x);
OVERLOADABLE short sub_group_scan_inclusive_add(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_add(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_add(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_add(ushort x);
/* scan_inclusive min */
OVERLOADABLE int sub_group_scan_inclusive_min(int x);
OVERLOADABLE uint sub_group_scan_inclusive_min(uint x);
OVERLOADABLE long sub_group_scan_inclusive_min(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_min(half x);
OVERLOADABLE float sub_group_scan_inclusive_min(float x);
OVERLOADABLE double sub_group_scan_inclusive_min(double x);
OVERLOADABLE short sub_group_scan_inclusive_min(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_min(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_min(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_min(ushort x);
/* scan_inclusive max */
OVERLOADABLE int sub_group_scan_inclusive_max(int x);
OVERLOADABLE uint sub_group_scan_inclusive_max(uint x);
OVERLOADABLE long sub_group_scan_inclusive_max(long x);
OVERLOADABLE ulong sub_group_scan_inclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_inclusive_max(half x);
OVERLOADABLE float sub_group_scan_inclusive_max(float x);
OVERLOADABLE double sub_group_scan_inclusive_max(double x);
OVERLOADABLE short sub_group_scan_inclusive_max(short x);
OVERLOADABLE ushort sub_group_scan_inclusive_max(ushort x);
OVERLOADABLE short intel_sug_group_scan_inclusive_max(short x);
OVERLOADABLE ushort intel_sug_group_scan_inclusive_max(ushort x);
/* scan_exclusive add */
OVERLOADABLE int sub_group_scan_exclusive_add(int x);
OVERLOADABLE uint sub_group_scan_exclusive_add(uint x);
OVERLOADABLE long sub_group_scan_exclusive_add(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_add(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_add(half x);
OVERLOADABLE float sub_group_scan_exclusive_add(float x);
OVERLOADABLE double sub_group_scan_exclusive_add(double x);
OVERLOADABLE short sub_group_scan_exclusive_add(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_add(ushort x);
OVERLOADABLE short intel_sub_group_scan_exclusive_add(short x);
OVERLOADABLE ushort intel_sub_group_scan_exclusive_add(ushort x);
/* scan_exclusive min */
OVERLOADABLE int sub_group_scan_exclusive_min(int x);
OVERLOADABLE uint sub_group_scan_exclusive_min(uint x);
OVERLOADABLE long sub_group_scan_exclusive_min(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_min(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_min(half x);
OVERLOADABLE float sub_group_scan_exclusive_min(float x);
OVERLOADABLE double sub_group_scan_exclusive_min(double x);
OVERLOADABLE short sub_group_scan_exclusive_min(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_min(ushort x);
OVERLOADABLE short intel_sug_group_scan_exclusive_min(short x);
OVERLOADABLE ushort intel_sug_group_scan_exclusive_min(ushort x);
/* scan_exclusive max */
OVERLOADABLE int sub_group_scan_exclusive_max(int x);
OVERLOADABLE uint sub_group_scan_exclusive_max(uint x);
OVERLOADABLE long sub_group_scan_exclusive_max(long x);
OVERLOADABLE ulong sub_group_scan_exclusive_max(ulong x);
OVERLOADABLE half sub_group_scan_exclusive_max(half x);
OVERLOADABLE float sub_group_scan_exclusive_max(float x);
OVERLOADABLE double sub_group_scan_exclusive_max(double x);
OVERLOADABLE short sub_group_scan_exclusive_max(short x);
OVERLOADABLE ushort sub_group_scan_exclusive_max(ushort x);
OVERLOADABLE short intel_sug_group_scan_exclusive_max(short x);
OVERLOADABLE ushort intel_sug_group_scan_exclusive_max(ushort x);
/* shuffle */
OVERLOADABLE half intel_sub_group_shuffle(half x, uint c);
OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);
OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);
OVERLOADABLE short intel_sub_group_shuffle(short x, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle(ushort x, uint c);
OVERLOADABLE float intel_sub_group_shuffle_down(float x, float y, uint c);
OVERLOADABLE int intel_sub_group_shuffle_down(int x, int y, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_down(uint x, uint y, uint c);
OVERLOADABLE short intel_sub_group_shuffle_down(short x, short y, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_down(ushort x, ushort y, uint c);
OVERLOADABLE float intel_sub_group_shuffle_up(float x, float y, uint c);
OVERLOADABLE int intel_sub_group_shuffle_up(int x, int y, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_up(uint x, uint y, uint c);
OVERLOADABLE short intel_sub_group_shuffle_up(short x, short y, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_up(ushort x, ushort y, uint c);
OVERLOADABLE float intel_sub_group_shuffle_xor(float x, uint c);
OVERLOADABLE int intel_sub_group_shuffle_xor(int x, uint c);
OVERLOADABLE uint intel_sub_group_shuffle_xor(uint x, uint c);
OVERLOADABLE short intel_sub_group_shuffle_xor(short x, uint c);
OVERLOADABLE ushort intel_sub_group_shuffle_xor(ushort x, uint c);
/* blocak read/write */
OVERLOADABLE uint intel_sub_group_block_read(const global uint* p);
OVERLOADABLE uint2 intel_sub_group_block_read2(const global uint* p);
OVERLOADABLE uint4 intel_sub_group_block_read4(const global uint* p);
OVERLOADABLE uint8 intel_sub_group_block_read8(const global uint* p);
OVERLOADABLE void intel_sub_group_block_write(__global uint* p, uint data);
OVERLOADABLE void intel_sub_group_block_write2(__global uint* p, uint2 data);
OVERLOADABLE void intel_sub_group_block_write4(__global uint* p, uint4 data);
OVERLOADABLE void intel_sub_group_block_write8(__global uint* p, uint8 data);
OVERLOADABLE uint intel_sub_group_block_read(image2d_t image, int2 byte_coord);
OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t image, int2 byte_coord);
OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t image, int2 byte_coord);
OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t image, int2 byte_coord);
OVERLOADABLE void intel_sub_group_block_write(image2d_t image, int2 byte_coord, uint data);
OVERLOADABLE void intel_sub_group_block_write2(image2d_t image, int2 byte_coord, uint2 data);
OVERLOADABLE void intel_sub_group_block_write4(image2d_t image, int2 byte_coord, uint4 data);
OVERLOADABLE void intel_sub_group_block_write8(image2d_t image, int2 byte_coord, uint8 data);
OVERLOADABLE uint intel_sub_group_block_read_ui(const global uint* p);
OVERLOADABLE uint2 intel_sub_group_block_read_ui2(const global uint* p);
OVERLOADABLE uint4 intel_sub_group_block_read_ui4(const global uint* p);
OVERLOADABLE uint8 intel_sub_group_block_read_ui8(const global uint* p);
OVERLOADABLE void intel_sub_group_block_write_ui(__global uint* p, uint data);
OVERLOADABLE void intel_sub_group_block_write_ui2(__global uint* p, uint2 data);
OVERLOADABLE void intel_sub_group_block_write_ui4(__global uint* p, uint4 data);
OVERLOADABLE void intel_sub_group_block_write_ui8(__global uint* p, uint8 data);
OVERLOADABLE uint intel_sub_group_block_read_ui(image2d_t image, int2 byte_coord);
OVERLOADABLE uint2 intel_sub_group_block_read_ui2(image2d_t image, int2 byte_coord);
OVERLOADABLE uint4 intel_sub_group_block_read_ui4(image2d_t image, int2 byte_coord);
OVERLOADABLE uint8 intel_sub_group_block_read_ui8(image2d_t image, int2 byte_coord);
OVERLOADABLE void intel_sub_group_block_write_ui(image2d_t image, int2 byte_coord, uint data);
OVERLOADABLE void intel_sub_group_block_write_ui2(image2d_t image, int2 byte_coord, uint2 data);
OVERLOADABLE void intel_sub_group_block_write_ui4(image2d_t image, int2 byte_coord, uint4 data);
OVERLOADABLE void intel_sub_group_block_write_ui8(image2d_t image, int2 byte_coord, uint8 data);
OVERLOADABLE ushort intel_sub_group_block_read_us(const global ushort* p);
OVERLOADABLE ushort2 intel_sub_group_block_read_us2(const global ushort* p);
OVERLOADABLE ushort4 intel_sub_group_block_read_us4(const global ushort* p);
OVERLOADABLE ushort8 intel_sub_group_block_read_us8(const global ushort* p);
OVERLOADABLE void intel_sub_group_block_write_us(__global ushort* p, ushort data);
OVERLOADABLE void intel_sub_group_block_write_us2(__global ushort* p, ushort2 data);
OVERLOADABLE void intel_sub_group_block_write_us4(__global ushort* p, ushort4 data);
OVERLOADABLE void intel_sub_group_block_write_us8(__global ushort* p, ushort8 data);
OVERLOADABLE ushort intel_sub_group_block_read_us(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort2 intel_sub_group_block_read_us2(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort4 intel_sub_group_block_read_us4(image2d_t image, int2 byte_coord);
OVERLOADABLE ushort8 intel_sub_group_block_read_us8(image2d_t image, int2 byte_coord);
OVERLOADABLE void intel_sub_group_block_write_us(image2d_t image, int2 byte_coord, ushort data);
OVERLOADABLE void intel_sub_group_block_write_us2(image2d_t image, int2 byte_coord, ushort2 data);
OVERLOADABLE void intel_sub_group_block_write_us4(image2d_t image, int2 byte_coord, ushort4 data);
OVERLOADABLE void intel_sub_group_block_write_us8(image2d_t image, int2 byte_coord, ushort8 data);
//Begin from this part is autogenerated.
//Don't modify it manually.
//simd level functions builtin functions
//floatn intel_sub_group_shuffle(floatn x, uint c)
OVERLOADABLE float2 intel_sub_group_shuffle (float2 param0, uint param1);
OVERLOADABLE float3 intel_sub_group_shuffle (float3 param0, uint param1);
OVERLOADABLE float4 intel_sub_group_shuffle (float4 param0, uint param1);
OVERLOADABLE float8 intel_sub_group_shuffle (float8 param0, uint param1);
OVERLOADABLE float16 intel_sub_group_shuffle (float16 param0, uint param1);
//intn intel_sub_group_shuffle(intn x, uint c)
OVERLOADABLE int2 intel_sub_group_shuffle (int2 param0, uint param1);
OVERLOADABLE int3 intel_sub_group_shuffle (int3 param0, uint param1);
OVERLOADABLE int4 intel_sub_group_shuffle (int4 param0, uint param1);
OVERLOADABLE int8 intel_sub_group_shuffle (int8 param0, uint param1);
OVERLOADABLE int16 intel_sub_group_shuffle (int16 param0, uint param1);
//uintn intel_sub_group_shuffle(uintn x, uint c)
OVERLOADABLE uint2 intel_sub_group_shuffle (uint2 param0, uint param1);
OVERLOADABLE uint3 intel_sub_group_shuffle (uint3 param0, uint param1);
OVERLOADABLE uint4 intel_sub_group_shuffle (uint4 param0, uint param1);
OVERLOADABLE uint8 intel_sub_group_shuffle (uint8 param0, uint param1);
OVERLOADABLE uint16 intel_sub_group_shuffle (uint16 param0, uint param1);
//floatn intel_sub_group_shuffle_down(floatn x, floatn y, uint c)
OVERLOADABLE float2 intel_sub_group_shuffle_down (float2 param0, float2 param1, uint param2);
OVERLOADABLE float3 intel_sub_group_shuffle_down (float3 param0, float3 param1, uint param2);
OVERLOADABLE float4 intel_sub_group_shuffle_down (float4 param0, float4 param1, uint param2);
OVERLOADABLE float8 intel_sub_group_shuffle_down (float8 param0, float8 param1, uint param2);
OVERLOADABLE float16 intel_sub_group_shuffle_down (float16 param0, float16 param1, uint param2);
//intn intel_sub_group_shuffle_down(intn x, intn y, uint c)
OVERLOADABLE int2 intel_sub_group_shuffle_down (int2 param0, int2 param1, uint param2);
OVERLOADABLE int3 intel_sub_group_shuffle_down (int3 param0, int3 param1, uint param2);
OVERLOADABLE int4 intel_sub_group_shuffle_down (int4 param0, int4 param1, uint param2);
OVERLOADABLE int8 intel_sub_group_shuffle_down (int8 param0, int8 param1, uint param2);
OVERLOADABLE int16 intel_sub_group_shuffle_down (int16 param0, int16 param1, uint param2);
//uintn intel_sub_group_shuffle_down(uintn x, uintn y, uint c)
OVERLOADABLE uint2 intel_sub_group_shuffle_down (uint2 param0, uint2 param1, uint param2);
OVERLOADABLE uint3 intel_sub_group_shuffle_down (uint3 param0, uint3 param1, uint param2);
OVERLOADABLE uint4 intel_sub_group_shuffle_down (uint4 param0, uint4 param1, uint param2);
OVERLOADABLE uint8 intel_sub_group_shuffle_down (uint8 param0, uint8 param1, uint param2);
OVERLOADABLE uint16 intel_sub_group_shuffle_down (uint16 param0, uint16 param1, uint param2);
//floatn intel_sub_group_shuffle_up(floatn x, floatn y, uint c)
OVERLOADABLE float2 intel_sub_group_shuffle_up (float2 param0, float2 param1, uint param2);
OVERLOADABLE float3 intel_sub_group_shuffle_up (float3 param0, float3 param1, uint param2);
OVERLOADABLE float4 intel_sub_group_shuffle_up (float4 param0, float4 param1, uint param2);
OVERLOADABLE float8 intel_sub_group_shuffle_up (float8 param0, float8 param1, uint param2);
OVERLOADABLE float16 intel_sub_group_shuffle_up (float16 param0, float16 param1, uint param2);
//intn intel_sub_group_shuffle_up(intn x, intn y, uint c)
OVERLOADABLE int2 intel_sub_group_shuffle_up (int2 param0, int2 param1, uint param2);
OVERLOADABLE int3 intel_sub_group_shuffle_up (int3 param0, int3 param1, uint param2);
OVERLOADABLE int4 intel_sub_group_shuffle_up (int4 param0, int4 param1, uint param2);
OVERLOADABLE int8 intel_sub_group_shuffle_up (int8 param0, int8 param1, uint param2);
OVERLOADABLE int16 intel_sub_group_shuffle_up (int16 param0, int16 param1, uint param2);
//uintn intel_sub_group_shuffle_up(uintn x, uintn y, uint c)
OVERLOADABLE uint2 intel_sub_group_shuffle_up (uint2 param0, uint2 param1, uint param2);
OVERLOADABLE uint3 intel_sub_group_shuffle_up (uint3 param0, uint3 param1, uint param2);
OVERLOADABLE uint4 intel_sub_group_shuffle_up (uint4 param0, uint4 param1, uint param2);
OVERLOADABLE uint8 intel_sub_group_shuffle_up (uint8 param0, uint8 param1, uint param2);
OVERLOADABLE uint16 intel_sub_group_shuffle_up (uint16 param0, uint16 param1, uint param2);
//floatn intel_sub_group_shuffle_xor(floatn x, uint c)
OVERLOADABLE float2 intel_sub_group_shuffle_xor (float2 param0, uint param1);
OVERLOADABLE float3 intel_sub_group_shuffle_xor (float3 param0, uint param1);
OVERLOADABLE float4 intel_sub_group_shuffle_xor (float4 param0, uint param1);
OVERLOADABLE float8 intel_sub_group_shuffle_xor (float8 param0, uint param1);
OVERLOADABLE float16 intel_sub_group_shuffle_xor (float16 param0, uint param1);
//intn intel_sub_group_shuffle_xor(intn x, uint c)
OVERLOADABLE int2 intel_sub_group_shuffle_xor (int2 param0, uint param1);
OVERLOADABLE int3 intel_sub_group_shuffle_xor (int3 param0, uint param1);
OVERLOADABLE int4 intel_sub_group_shuffle_xor (int4 param0, uint param1);
OVERLOADABLE int8 intel_sub_group_shuffle_xor (int8 param0, uint param1);
OVERLOADABLE int16 intel_sub_group_shuffle_xor (int16 param0, uint param1);
//uintn intel_sub_group_shuffle_xor(uintn x, uint c)
OVERLOADABLE uint2 intel_sub_group_shuffle_xor (uint2 param0, uint param1);
OVERLOADABLE uint3 intel_sub_group_shuffle_xor (uint3 param0, uint param1);
OVERLOADABLE uint4 intel_sub_group_shuffle_xor (uint4 param0, uint param1);
OVERLOADABLE uint8 intel_sub_group_shuffle_xor (uint8 param0, uint param1);
OVERLOADABLE uint16 intel_sub_group_shuffle_xor (uint16 param0, uint param1);
#endif
|