From a8b3118e8305fd1c668ea25e07157b625c9747ff Mon Sep 17 00:00:00 2001 From: Joursoir Date: Sat, 10 Apr 2021 17:29:02 +0000 Subject: add glm headers --- src/include/glm/detail/func_common_simd.inl | 231 ++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 src/include/glm/detail/func_common_simd.inl (limited to 'src/include/glm/detail/func_common_simd.inl') diff --git a/src/include/glm/detail/func_common_simd.inl b/src/include/glm/detail/func_common_simd.inl new file mode 100644 index 0000000..c04e6e5 --- /dev/null +++ b/src/include/glm/detail/func_common_simd.inl @@ -0,0 +1,231 @@ +/// @ref core +/// @file glm/detail/func_common_simd.inl + +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + +#include "../simd/common.h" + +#include + +namespace glm{ +namespace detail +{ + template + struct compute_abs_vector<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> result; + result.data = glm_vec4_abs(v.data); + return result; + } + }; + + template + struct compute_abs_vector<4, int, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v) + { + vec<4, int, Q> result; + result.data = glm_ivec4_abs(v.data); + return result; + } + }; + + template + struct compute_floor<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> result; + result.data = glm_vec4_floor(v.data); + return result; + } + }; + + template + struct compute_ceil<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> result; + result.data = glm_vec4_ceil(v.data); + return result; + } + }; + + template + struct compute_fract<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> result; + result.data = glm_vec4_fract(v.data); + return result; + } + }; + + template + struct compute_round<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> result; + result.data = glm_vec4_round(v.data); + return result; + } + }; + + template + struct compute_mod<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) + { + vec<4, float, Q> result; + result.data = glm_vec4_mod(x.data, y.data); + return result; + } + }; + + template + struct compute_min_vector<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) + { + vec<4, float, Q> result; + result.data = _mm_min_ps(v1.data, v2.data); + return result; + } + }; + + template + struct compute_min_vector<4, int, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) + { + vec<4, int, Q> result; + result.data = _mm_min_epi32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_min_vector<4, uint, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2) + { + vec<4, uint, Q> result; + result.data = _mm_min_epu32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) + { + vec<4, float, Q> result; + result.data = _mm_max_ps(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector<4, int, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) + { + vec<4, int, Q> result; + result.data = _mm_max_epi32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_max_vector<4, uint, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2) + { + vec<4, uint, Q> result; + result.data = _mm_max_epu32(v1.data, v2.data); + return result; + } + }; + + template + struct compute_clamp_vector<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& minVal, vec<4, float, Q> const& maxVal) + { + vec<4, float, Q> result; + result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_clamp_vector<4, int, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& x, vec<4, int, Q> const& minVal, vec<4, int, Q> const& maxVal) + { + vec<4, int, Q> result; + result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_clamp_vector<4, uint, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& x, vec<4, uint, Q> const& minVal, vec<4, uint, Q> const& maxVal) + { + vec<4, uint, Q> result; + result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data); + return result; + } + }; + + template + struct compute_mix_vector<4, float, bool, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& x, vec<4, float, Q> const& y, vec<4, bool, Q> const& a) + { + __m128i const Load = _mm_set_epi32(-static_cast(a.w), -static_cast(a.z), -static_cast(a.y), -static_cast(a.x)); + __m128 const Mask = _mm_castsi128_ps(Load); + + vec<4, float, Q> Result; +# if 0 && GLM_ARCH & GLM_ARCH_AVX + Result.data = _mm_blendv_ps(x.data, y.data, Mask); +# else + Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data)); +# endif + return Result; + } + }; +/* FIXME + template + struct compute_step_vector + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge, vec<4, float, Q> const& x) + { + vec<4, float, Q> Result; + result.data = glm_vec4_step(edge.data, x.data); + return result; + } + }; +*/ + template + struct compute_smoothstep_vector<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& edge0, vec<4, float, Q> const& edge1, vec<4, float, Q> const& x) + { + vec<4, float, Q> Result; + Result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data); + return Result; + } + }; +}//namespace detail +}//namespace glm + +#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT -- cgit v1.2.3-18-g5258