From a8b3118e8305fd1c668ea25e07157b625c9747ff Mon Sep 17 00:00:00 2001 From: Joursoir Date: Sat, 10 Apr 2021 17:29:02 +0000 Subject: add glm headers --- src/include/glm/detail/func_geometric_simd.inl | 163 +++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 src/include/glm/detail/func_geometric_simd.inl (limited to 'src/include/glm/detail/func_geometric_simd.inl') diff --git a/src/include/glm/detail/func_geometric_simd.inl b/src/include/glm/detail/func_geometric_simd.inl new file mode 100644 index 0000000..7a8e127 --- /dev/null +++ b/src/include/glm/detail/func_geometric_simd.inl @@ -0,0 +1,163 @@ +/// @ref core +/// @file glm/detail/func_geometric_simd.inl + +#include "../simd/geometric.h" + +#if GLM_ARCH & GLM_ARCH_SSE2_BIT + +namespace glm{ +namespace detail +{ + template + struct compute_length<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) + { + return _mm_cvtss_f32(glm_vec4_length(v.data)); + } + }; + + template + struct compute_distance<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) + { + return _mm_cvtss_f32(glm_vec4_distance(p0.data, p1.data)); + } + }; + + template + struct compute_dot, float, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) + { + return _mm_cvtss_f32(glm_vec1_dot(x.data, y.data)); + } + }; + + template + struct compute_cross + { + GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b) + { + __m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x); + __m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x); + __m128 const xpd0 = glm_vec4_cross(set0, set1); + + vec<4, float, Q> Result; + Result.data = xpd0; + return vec<3, float, Q>(Result); + } + }; + + template + struct compute_normalize<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + vec<4, float, Q> Result; + Result.data = glm_vec4_normalize(v.data); + return Result; + } + }; + + template + struct compute_faceforward<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& N, vec<4, float, Q> const& I, vec<4, float, Q> const& Nref) + { + vec<4, float, Q> Result; + Result.data = glm_vec4_faceforward(N.data, I.data, Nref.data); + return Result; + } + }; + + template + struct compute_reflect<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N) + { + vec<4, float, Q> Result; + Result.data = glm_vec4_reflect(I.data, N.data); + return Result; + } + }; + + template + struct compute_refract<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& I, vec<4, float, Q> const& N, float eta) + { + vec<4, float, Q> Result; + Result.data = glm_vec4_refract(I.data, N.data, _mm_set1_ps(eta)); + return Result; + } + }; +}//namespace detail +}//namespace glm + +#elif GLM_ARCH & GLM_ARCH_NEON_BIT +namespace glm{ +namespace detail +{ + template + struct compute_length<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) + { + return sqrt(compute_dot, float, true>::call(v, v)); + } + }; + + template + struct compute_distance<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) + { + return compute_length<4, float, Q, true>::call(p1 - p0); + } + }; + + + template + struct compute_dot, float, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) + { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + float32x4_t v = vmulq_f32(x.data, y.data); + return vaddvq_f32(v); +#else // Armv7a with Neon + float32x4_t p = vmulq_f32(x.data, y.data); + float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); + v = vpadd_f32(v, v); + return vget_lane_f32(v, 0); +#endif + } + }; + + template + struct compute_normalize<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + float32x4_t p = vmulq_f32(v.data, v.data); +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + p = vpaddq_f32(p, p); + p = vpaddq_f32(p, p); +#else + float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); + t = vpadd_f32(t, t); + p = vcombine_f32(t, t); +#endif + + float32x4_t vd = vrsqrteq_f32(p); + vec<4, float, Q> Result; + Result.data = vmulq_f32(v.data, vd); + return Result; + } + }; +}//namespace detail +}//namespace glm + +#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT -- cgit v1.2.3-18-g5258