Version
menu_open
link
Wwise SDK 2019.2.15
AkSimdMath.h
Go to the documentation of this file.
1 /***********************************************************************
2  The content of this file includes source code for the sound engine
3  portion of the AUDIOKINETIC Wwise Technology and constitutes "Level
4  Two Source Code" as defined in the Source Code Addendum attached
5  with this file. Any use of the Level Two Source Code shall be
6  subject to the terms and conditions outlined in the Source Code
7  Addendum and the End User License Agreement for Wwise(R).
8 
9  Version: <VERSION> Build: <BUILDNUMBER>
10  Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc.
11  ***********************************************************************/
12 
13 //////////////////////////////////////////////////////////////////////
14 //
15 // AkSimdMath.h
16 //
17 // Library of static functions for math computations with SIMD in mind.
18 //
19 //////////////////////////////////////////////////////////////////////
20 #ifndef _AKSIMDMATH_H_
21 #define _AKSIMDMATH_H_
22 
25 #include <AkMath.h>
26 
27 namespace AkMath
28 {
29  //Take 4 vectors <x,y,z> and return <x,x,x,x>, <y,y,y,y> and <z,z,z,z>
30  AkForceInline void PermuteVectors3(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
31  AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz)
32  {
33  AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
34  AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
35  out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
36  out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
37 
38  AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
39  AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
40  out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
41  }
42 
43  //Take 3 vectors <x3,x2,x1,x0>, <y,y,y,y> and <z,z,z,z> and return 4 vectors <x,y,z,z>
44  AkForceInline void UnpermuteVectors3(const AKSIMD_V4F32& xxxx, const AKSIMD_V4F32& yyyy, const AKSIMD_V4F32& zzzz,
45  AKSIMD_V4F32& out_v0, AKSIMD_V4F32& out_v1, AKSIMD_V4F32& out_v2, AKSIMD_V4F32& out_v3)
46  {
47  /*__m128 _mm_shuffle_ps(__m128 lo, __m128 hi, _MM_SHUFFLE(hi3, hi2, lo1, lo0))
48  Interleave inputs into low 2 floats and high 2 floats of output.Basically
49  out[0] = lo[lo0];
50  out[1] = lo[lo1];
51  out[2] = hi[hi2];
52  out[3] = hi[hi3];
53  */
54 
55  AKSIMD_V4F32 x0x1y0y1 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(1, 0, 1, 0));
56  AKSIMD_V4F32 z0z1z0z1 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(1, 0, 1, 0));
57 
58  out_v0 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(2, 0, 2, 0));
59  out_v1 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(3, 1, 3, 1));
60 
61  AKSIMD_V4F32 x2x3y2y3 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(3, 2, 3, 2));
62  AKSIMD_V4F32 z2z3z2z3 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(3, 2, 3, 2));
63 
64  out_v2 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(2, 0, 2, 0));
65  out_v3 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(3, 1, 3, 1));
66  }
67 
68  //Take 4 vectors <x,y,z,w> and return <x,x,x,x>, <y,y,y,y>, <z,z,z,z> and <w,w,w,w>
69  AkForceInline void PermuteVectors4(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
70  AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz, AKSIMD_V4F32& out_wwww)
71  {
72  AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
73  AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
74  out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
75  out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
76 
77  AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
78  AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
79  out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
80  out_wwww = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(3, 1, 3, 1));
81  }
82 
83  // 3-element dot product of 4 vectors.
85  const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
86  {
87  return AKSIMD_ADD_V4F32(AKSIMD_ADD_V4F32(AKSIMD_MUL_V4F32(v0_x, v1_x), AKSIMD_MUL_V4F32(v0_y, v1_y)), AKSIMD_MUL_V4F32(v0_z, v1_z));
88  }
89 
90  // 3-element dot product of 1 common vector with 4 vectors
91  AkForceInline AKSIMD_V4F32 DotPoduct3_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
92  {
93  const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
94  const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
95  const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
96  return DotPoduct3_4x4(v0_x, v0_y, v0_z, v1_x, v1_y, v1_z);
97  }
98 
99  // 4-element dot product of 4 vectors.
100  AkForceInline AKSIMD_V4F32 DotPoduct4_4x4(const AKSIMD_V4F32& v0_x, const AKSIMD_V4F32& v0_y, const AKSIMD_V4F32& v0_z, const AKSIMD_V4F32& v0_w,
101  const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
102  {
103  return AKSIMD_ADD_V4F32(
105  AKSIMD_MUL_V4F32(v0_x, v1_x),
106  AKSIMD_MUL_V4F32(v0_y, v1_y)),
108  AKSIMD_MUL_V4F32(v0_z, v1_z),
109  AKSIMD_MUL_V4F32(v0_w, v1_w)));
110  }
111 
112  // 4-element dot product of 1 common vector with 4 vectors
113  AkForceInline AKSIMD_V4F32 DotPoduct4_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
114  {
115  const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
116  const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
117  const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
118  const AKSIMD_V4F32 v0_w = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
119  return DotPoduct4_4x4(v0_x, v0_y, v0_z, v0_w, v1_x, v1_y, v1_z, v1_w);
120  }
121 
122  // Trig functions approximation (based on the Fast versions found in AkMath.h)
124  {
125  const AKSIMD_V4F32 B = AKSIMD_SET_V4F32(4 / PI);
126  const AKSIMD_V4F32 C = AKSIMD_SET_V4F32(-4 / (PI * PI));
127  const AKSIMD_V4F32 P = AKSIMD_SET_V4F32(0.225f);
128 
129  //float y = B * x + C * x * fabs(x); //float y = X*(B+C*fabs(x));
130 
132  y = AKSIMD_MADD_V4F32(y, C, B);
133  y = AKSIMD_MUL_V4F32(y, x);
134 
135  // return P * (y * fabs(y) - y) + y;
136  AKSIMD_V4F32 sine = AKSIMD_ABS_V4F32(y);
137  sine = AKSIMD_MSUB_V4F32(y, sine, y);
138  sine = AKSIMD_MADD_V4F32(sine, P, y);
139  return sine;
140  }
141 
143  {
144  //Compute the offset needed for the cosinus. If you compare with FastCos, the constants have been combined.
145  const AKSIMD_V4F32 offsetNoWrap = AKSIMD_SET_V4F32(PI / 2); // cos = sin(x+pi/2)
146  const AKSIMD_V4F32 offsetWrap = AKSIMD_SET_V4F32(PI / 2 - 2 * PI); // Wrap: cos(x) = cos(x - 2 pi)
147  const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
148 
149  // (cond1 >= cond2) ? a : b
150  AKSIMD_V4F32 offset = AKSIMD_SEL_GTEZ_V4F32(AKSIMD_SUB_V4F32(x, vHalfPI), offsetWrap, offsetNoWrap);
151  return AKSIMD_SIN_V4F32(AKSIMD_ADD_V4F32(x, offset));
152  }
153 
155  {
156  const AKSIMD_V4F32 vNeg = AKSIMD_SET_V4F32(-1.0f);
157  const AKSIMD_V4F32 vOne = AKSIMD_SET_V4F32(1.0f);
158  const AKSIMD_V4F32 vZero = AKSIMD_SET_V4F32(0.0f);
159  const AKSIMD_V4F32 vK = AKSIMD_SET_V4F32(0.28f);
160  const AKSIMD_V4F32 vKRepro = AKSIMD_SET_V4F32(1.f / 0.28f);
161  const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
162  const AKSIMD_V4F32 vPI = AKSIMD_SET_V4F32(PI);
163  const AKSIMD_V4F32 vEpsilon = AKSIMD_SET_V4F32(1e-20f);
164 
165  //Ensure x is not zero a == 0 ? b : c.
166  x = AKSIMD_VSEL_V4F32(x, vEpsilon, AKSIMD_EQ_V4F32(x, vZero));
167 
168  AKSIMD_V4F32 z = AKSIMD_DIV_V4F32(y, x);
169  AKSIMD_V4F32 absz = AKSIMD_ABS_V4F32(z);
170  AKSIMD_V4COND zcond = AKSIMD_GTEQ_V4F32(vOne, absz);
171 
172  //The approximation is done in 2 segments of the form: offset + z/a*(z*z + b);
173 
174  //if ( fabsf( z ) < 1.0f ) then use .28 for the a coef
175  AKSIMD_V4F32 a = AKSIMD_VSEL_V4F32(vNeg, vK, zcond);
176 
177  //if ( fabsf( z ) < 1.0f ) then use 1 for the b factor, else use 0.28
178  AKSIMD_V4F32 b = AKSIMD_VSEL_V4F32(vK, vKRepro, zcond);
179 
180  AKSIMD_V4F32 atan = AKSIMD_MADD_V4F32(z, z, b);
181  atan = AKSIMD_MUL_V4F32(atan, a);
182  atan = AKSIMD_DIV_V4F32(z, atan);
183 
184  //Adjust for quadrant
185  // zcond x<0 y<0 offset
186  // 1 0 0 0
187  // 1 0 1 0
188  // 1 1 0 +PI
189  // 1 1 1 -PI
190  // 0 0 0 +PI/2
191  // 0 0 1 -PI/2
192  // 0 1 0 +PI/2
193  // 0 1 1 -PI/2
194 
195  AKSIMD_V4F32 offsetByX = AKSIMD_SEL_GTEZ_V4F32(x, vZero, vPI);
196  AKSIMD_V4F32 offset = AKSIMD_VSEL_V4F32(vHalfPI, offsetByX, zcond);
197  AKSIMD_V4F32 sign = AKSIMD_SEL_GTEZ_V4F32(y, vOne, vNeg);
198 
199  //Apply computed offset.
200  atan = AKSIMD_MADD_V4F32(offset, sign, atan);
201  return atan;
202  }
203 }
204 
205 #endif //_AKSIMDMATH_H_
AkForceInline AKSIMD_V4F32 AKSIMD_EQ_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:339
#define AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: AkSimd.h:489
AkForceInline AKSIMD_V4F32 DotPoduct3_1x4(const AKSIMD_V4F32 &v0_xyz, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z)
Definition: AkSimdMath.h:91
AkForceInline AKSIMD_V4F32 DotPoduct4_1x4(const AKSIMD_V4F32 &v0_xyz, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z, const AKSIMD_V4F32 &v1_w)
Definition: AkSimdMath.h:113
AkForceInline AKSIMD_V4F32 AKSIMD_GTEQ_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:291
AkForceInline AKSIMD_V4F32 DotPoduct4_4x4(const AKSIMD_V4F32 &v0_x, const AKSIMD_V4F32 &v0_y, const AKSIMD_V4F32 &v0_z, const AKSIMD_V4F32 &v0_w, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z, const AKSIMD_V4F32 &v1_w)
Definition: AkSimdMath.h:100
#define AKSIMD_VSEL_V4F32(__a__, __b__, __c__)
Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usu...
Definition: AkSimd.h:876
Definition: AkSimdMath.h:28
AkForceInline AKSIMD_V4F32 AKSIMD_COS_V4F32(const AKSIMD_V4F32 x)
Definition: AkSimdMath.h:142
uint32x4_t AKSIMD_V4COND
Vector of 4 comparison results.
Definition: AkSimd.h:75
AkForceInline void UnpermuteVectors3(const AKSIMD_V4F32 &xxxx, const AKSIMD_V4F32 &yyyy, const AKSIMD_V4F32 &zzzz, AKSIMD_V4F32 &out_v0, AKSIMD_V4F32 &out_v1, AKSIMD_V4F32 &out_v2, AKSIMD_V4F32 &out_v3)
Definition: AkSimdMath.h:44
#define AKSIMD_MADD_V4F32(__a__, __b__, __c__)
Vector multiply-add operation.
Definition: AkSimd.h:631
AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:401
#define AKSIMD_DIV_V4F32(a, b)
Definition: AkSimd.h:209
AkForceInline AKSIMD_V4F32 DotPoduct3_4x4(const AKSIMD_V4F32 &v0_x, const AKSIMD_V4F32 &v0_y, const AKSIMD_V4F32 &v0_z, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z)
Definition: AkSimdMath.h:84
AkForceInline AKSIMD_V4F32 AKSIMD_ATAN2_V4F32(AKSIMD_V4F32 y, AKSIMD_V4F32 x)
Definition: AkSimdMath.h:154
#define AKSIMD_MSUB_V4F32(__a__, __b__, __c__)
Vector multiply-substract operation. Careful: vmlsq_f32 does c-(a*b) and not the expected (a*b)-c.
Definition: AkSimd.h:634
#define AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__)
Definition: AkSimd.h:882
AkForceInline void PermuteVectors4(const AKSIMD_V4F32 &v0, const AKSIMD_V4F32 &v1, const AKSIMD_V4F32 &v2, const AKSIMD_V4F32 &v3, AKSIMD_V4F32 &out_xxxx, AKSIMD_V4F32 &out_yyyy, AKSIMD_V4F32 &out_zzzz, AKSIMD_V4F32 &out_wwww)
Definition: AkSimdMath.h:69
AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:480
AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32(const AKSIMD_V4F32 &xyzw, const AKSIMD_V4F32 &abcd, int mask)
Definition: AkSimd.h:750
AkForceInline void PermuteVectors3(const AKSIMD_V4F32 &v0, const AKSIMD_V4F32 &v1, const AKSIMD_V4F32 &v2, const AKSIMD_V4F32 &v3, AKSIMD_V4F32 &out_xxxx, AKSIMD_V4F32 &out_yyyy, AKSIMD_V4F32 &out_zzzz)
Definition: AkSimdMath.h:30
AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:430
AkForceInline AKSIMD_V4F32 AKSIMD_SIN_V4F32(const AKSIMD_V4F32 x)
Definition: AkSimdMath.h:123
AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32(const AKSIMD_V4F32 &in_vec1)
Definition: AkSimd.h:571
#define AkForceInline
Definition: AkTypes.h:62
AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32(AKSIMD_F32 in_value)
Definition: AkSimd.h:114

Was this page helpful?

Need Support?

Questions? Problems? Need more info? Contact us, and we can help!

Visit our Support page

Tell us about your project. We're here to help.

Register your project and we'll help you get started with no strings attached!

Get started with Wwise