/usr/include/glm/simd/common.h is in libglm-dev 0.9.9~a2-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | /// @ref simd
/// @file glm/simd/common.h
#pragma once
#include "platform.h"
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_add(glm_vec4 a, glm_vec4 b)
{
return _mm_add_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_add(glm_vec4 a, glm_vec4 b)
{
return _mm_add_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_sub(glm_vec4 a, glm_vec4 b)
{
return _mm_sub_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_sub(glm_vec4 a, glm_vec4 b)
{
return _mm_sub_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_mul(glm_vec4 a, glm_vec4 b)
{
return _mm_mul_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_mul(glm_vec4 a, glm_vec4 b)
{
return _mm_mul_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_div(glm_vec4 a, glm_vec4 b)
{
return _mm_div_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_div(glm_vec4 a, glm_vec4 b)
{
return _mm_div_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_div_lowp(glm_vec4 a, glm_vec4 b)
{
return glm_vec4_mul(a, _mm_rcp_ps(b));
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_swizzle_xyzw(glm_vec4 a)
{
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
return _mm_permute_ps(a, _MM_SHUFFLE(3, 2, 1, 0));
# else
return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0));
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_fma(glm_vec4 a, glm_vec4 b, glm_vec4 c)
{
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
return _mm_fmadd_ss(a, b, c);
# else
return _mm_add_ss(_mm_mul_ss(a, b), c);
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_fma(glm_vec4 a, glm_vec4 b, glm_vec4 c)
{
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
return _mm_fmadd_ps(a, b, c);
# else
return glm_vec4_add(glm_vec4_mul(a, b), c);
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_abs(glm_vec4 x)
{
return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
}
GLM_FUNC_QUALIFIER glm_ivec4 glm_ivec4_abs(glm_ivec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSSE3_BIT
return _mm_sign_epi32(x, x);
# else
glm_ivec4 const sgn0 = _mm_srai_epi32(x, 31);
glm_ivec4 const inv0 = _mm_xor_si128(x, sgn0);
glm_ivec4 const sub0 = _mm_sub_epi32(inv0, sgn0);
return sub0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_sign(glm_vec4 x)
{
glm_vec4 const zro0 = _mm_setzero_ps();
glm_vec4 const cmp0 = _mm_cmplt_ps(x, zro0);
glm_vec4 const cmp1 = _mm_cmpgt_ps(x, zro0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(-1.0f));
glm_vec4 const and1 = _mm_and_ps(cmp1, _mm_set1_ps(1.0f));
glm_vec4 const or0 = _mm_or_ps(and0, and1);;
return or0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_round(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
# else
glm_vec4 const sgn0 = _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)));
glm_vec4 const and0 = _mm_and_ps(sgn0, x);
glm_vec4 const or0 = _mm_or_ps(and0, _mm_set_ps1(8388608.0f));
glm_vec4 const add0 = glm_vec4_add(x, or0);
glm_vec4 const sub0 = glm_vec4_sub(add0, or0);
return sub0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_floor(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_floor_ps(x);
# else
glm_vec4 const rnd0 = glm_vec4_round(x);
glm_vec4 const cmp0 = _mm_cmplt_ps(x, rnd0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f));
glm_vec4 const sub0 = glm_vec4_sub(rnd0, and0);
return sub0;
# endif
}
/* trunc TODO
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_trunc(glm_vec4 x)
{
return glm_vec4();
}
*/
//roundEven
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_roundEven(glm_vec4 x)
{
glm_vec4 const sgn0 = _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)));
glm_vec4 const and0 = _mm_and_ps(sgn0, x);
glm_vec4 const or0 = _mm_or_ps(and0, _mm_set_ps1(8388608.0f));
glm_vec4 const add0 = glm_vec4_add(x, or0);
glm_vec4 const sub0 = glm_vec4_sub(add0, or0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_ceil(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_ceil_ps(x);
# else
glm_vec4 const rnd0 = glm_vec4_round(x);
glm_vec4 const cmp0 = _mm_cmpgt_ps(x, rnd0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f));
glm_vec4 const add0 = glm_vec4_add(rnd0, and0);
return add0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_fract(glm_vec4 x)
{
glm_vec4 const flr0 = glm_vec4_floor(x);
glm_vec4 const sub0 = glm_vec4_sub(x, flr0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_mod(glm_vec4 x, glm_vec4 y)
{
glm_vec4 const div0 = glm_vec4_div(x, y);
glm_vec4 const flr0 = glm_vec4_floor(div0);
glm_vec4 const mul0 = glm_vec4_mul(y, flr0);
glm_vec4 const sub0 = glm_vec4_sub(x, mul0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_clamp(glm_vec4 v, glm_vec4 minVal, glm_vec4 maxVal)
{
glm_vec4 const min0 = _mm_min_ps(v, maxVal);
glm_vec4 const max0 = _mm_max_ps(min0, minVal);
return max0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_mix(glm_vec4 v1, glm_vec4 v2, glm_vec4 a)
{
glm_vec4 const sub0 = glm_vec4_sub(_mm_set1_ps(1.0f), a);
glm_vec4 const mul0 = glm_vec4_mul(v1, sub0);
glm_vec4 const mad0 = glm_vec4_fma(v2, a, mul0);
return mad0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_step(glm_vec4 edge, glm_vec4 x)
{
glm_vec4 const cmp = _mm_cmple_ps(x, edge);
return _mm_movemask_ps(cmp) == 0 ? _mm_set1_ps(1.0f) : _mm_setzero_ps();
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_smoothstep(glm_vec4 edge0, glm_vec4 edge1, glm_vec4 x)
{
glm_vec4 const sub0 = glm_vec4_sub(x, edge0);
glm_vec4 const sub1 = glm_vec4_sub(edge1, edge0);
glm_vec4 const div0 = glm_vec4_sub(sub0, sub1);
glm_vec4 const clp0 = glm_vec4_clamp(div0, _mm_setzero_ps(), _mm_set1_ps(1.0f));
glm_vec4 const mul0 = glm_vec4_mul(_mm_set1_ps(2.0f), clp0);
glm_vec4 const sub2 = glm_vec4_sub(_mm_set1_ps(3.0f), mul0);
glm_vec4 const mul1 = glm_vec4_mul(clp0, clp0);
glm_vec4 const mul2 = glm_vec4_mul(mul1, sub2);
return mul2;
}
// Agner Fog method
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_nan(glm_vec4 x)
{
glm_ivec4 const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer
glm_ivec4 const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit
glm_ivec4 const t3 = _mm_set1_epi32(int(0xFF000000)); // exponent mask
glm_ivec4 const t4 = _mm_and_si128(t2, t3); // exponent
glm_ivec4 const t5 = _mm_andnot_si128(t3, t2); // fraction
glm_ivec4 const Equal = _mm_cmpeq_epi32(t3, t4);
glm_ivec4 const Nequal = _mm_cmpeq_epi32(t5, _mm_setzero_si128());
glm_ivec4 const And = _mm_and_si128(Equal, Nequal);
return _mm_castsi128_ps(And); // exponent = all 1s and fraction != 0
}
// Agner Fog method
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_inf(glm_vec4 x)
{
glm_ivec4 const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer
glm_ivec4 const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(int(0xFF000000)))); // exponent is all 1s, fraction is 0
}
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|