目录

Wwise SDK 2019.2.0
AkSimd.h
浏览该文件的文档.
1 /*******************************************************************************
2 The content of this file includes portions of the AUDIOKINETIC Wwise Technology
3 released in source code form as part of the SDK installer package.
4 
5 Commercial License Usage
6 
7 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology
8 may use this file in accordance with the end user license agreement provided
9 with the software or, alternatively, in accordance with the terms contained in a
10 written agreement between you and Audiokinetic Inc.
11 
12 Apache License Usage
13 
14 Alternatively, this file may be used under the Apache License, Version 2.0 (the
15 "Apache License"); you may not use this file except in compliance with the
16 Apache License. You may obtain a copy of the Apache License at
17 http://www.apache.org/licenses/LICENSE-2.0.
18 
19 Unless required by applicable law or agreed to in writing, software distributed
20 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
21 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for
22 the specific language governing permissions and limitations under the License.
23 
24  Version: <VERSION> Build: <BUILDNUMBER>
25  Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc.
26 *******************************************************************************/
27 
28 // AkSimd.h
29 
30 /// \file
31 /// AKSIMD - Generic (no SIMD support) implementation
32 
33 #ifndef _AKSIMD_GENERIC_H_
34 #define _AKSIMD_GENERIC_H_
35 
36 #include <math.h>
37 #include <string.h>
40 
41 ////////////////////////////////////////////////////////////////////////
42 /// @name AKSIMD types
43 //@{
44 typedef AkInt32 AKSIMD_I32; ///< 32-bit signed integer
45 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32; ///< Vector of 4 32-bit signed integers
46 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32; ///< Vector of 4 32-bit signed integers
47 typedef AkReal32 AKSIMD_F32; ///< 32-bit float
48 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32; ///< Vector of 2 32-bit floats
49 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32; ///< Vector of 4 32-bit floats
50 typedef AKSIMD_V4UI32 AKSIMD_V4COND; ///< Vector of 4 comparison results
51 
52 #pragma pack(push,1)
53 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers
54 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers
55 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32_UNALIGNED; ///< Unaligned Vector of 2 32-bit floats
56 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32_UNALIGNED; ///< Unaligned Vector of 4 32-bit floats
57 #pragma pack(pop)
58 
59 //@}
60 ////////////////////////////////////////////////////////////////////////
61 
62 ////////////////////////////////////////////////////////////////////////
63 /// @name Platform specific defines for prefetching
64 //@{
65 
66 #define AKSIMD_ARCHCACHELINESIZE (32) ///< Assumed cache line width for architectures on this platform
67 #define AKSIMD_ARCHMAXPREFETCHSIZE (512) ///< Use this to control how much prefetching maximum is desirable (assuming 8-way cache)
68 /// Cross-platform memory prefetch of effective address assuming non-temporal data
69 #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ )
70 
71 //@}
72 ////////////////////////////////////////////////////////////////////////
73 
74 #ifndef AKSIMD_GETELEMENT_V4F32
75 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ ) (__vName).m_data[(__num__)]
76 #endif
77 
78 #ifndef AKSIMD_GETELEMENT_V2F32
79 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ ) (__vName).m_data[(__num__)]
80 #endif
81 
82 #ifndef AKSIMD_GETELEMENT_V4I32
83 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ ) (__vName).m_data[(__num__)]
84 #endif
85 
86 ////////////////////////////////////////////////////////////////////////
87 /// @name Platform specific memory size alignment for allocation purposes
88 //@{
89 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
90 //@}
91 ////////////////////////////////////////////////////////////////////////
92 
93 ////////////////////////////////////////////////////////////////////////
94 /// @name AKSIMD loading / setting
95 //@{
96 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData))
97 
98 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
99 
100 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
101 
103 {
104  AKSIMD_V4F32 vector;
105  vector.m_data[0] = in_value;
106  vector.m_data[1] = in_value;
107  vector.m_data[2] = in_value;
108  vector.m_data[3] = in_value;
109 
110  return vector;
111 }
112 
113 // _mm_set_ps1
115 {
116  AKSIMD_V4F32 vector;
117  vector.m_data[0] = in_value;
118  vector.m_data[1] = in_value;
119  vector.m_data[2] = in_value;
120  vector.m_data[3] = in_value;
121 
122  return vector;
123 }
124 
125 
127 {
128  AKSIMD_V2F32 vector;
129  vector.m_data[0] = in_value;
130  vector.m_data[1] = in_value;
131 
132  return vector;
133 }
134 
135 // _mm_setzero_ps()
137 {
138  AKSIMD_V4F32 vector;
139  vector.m_data[0] = 0.f;
140  vector.m_data[1] = 0.f;
141  vector.m_data[2] = 0.f;
142  vector.m_data[3] = 0.f;
143 
144  return vector;
145 }
146 
148 {
149  AKSIMD_V2F32 vector;
150  vector.m_data[0] = 0.f;
151  vector.m_data[1] = 0.f;
152 
153  return vector;
154 }
155 // _mm_setzero_si128()
157 {
158  AKSIMD_V4I32 vector;
159  vector.m_data[0] = 0;
160  vector.m_data[1] = 0;
161  vector.m_data[2] = 0;
162  vector.m_data[3] = 0;
163 
164  return vector;
165 }
166 
167 
168 /// Loads a single-precision, floating-point value into the low word
169 /// and clears the upper three words.
170 /// r0 := *p; r1 := 0.0 ; r2 := 0.0 ; r3 := 0.0 (see _mm_load_ss)
172 {
173  AKSIMD_V4F32 vector;
174  vector.m_data[0] = *in_pData;
175  vector.m_data[1] = 0.f;
176  vector.m_data[2] = 0.f;
177  vector.m_data[3] = 0.f;
178 
179  return vector;
180 }
181 
182 //@}
183 ////////////////////////////////////////////////////////////////////////
184 
185 ////////////////////////////////////////////////////////////////////////
186 /// @name AKSIMD storing
187 //@{
188 
189 // _mm_storeu_ps -- The address does not need to be 16-byte aligned.
190 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec)
191 
192 // _mm_store_ps -- The address must be 16-byte aligned.
193 // ????? _mm_storeu_ps vs _mm_store_ps ?????
194 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__)
195 
196 // _mm_storeu_si128
197 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec)
198 
199 /// Stores the lower single-precision, floating-point value.
200 /// *p := a0 (see _mm_store_ss)
202 {
203  ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0];
204 }
205 
206 //@}
207 ////////////////////////////////////////////////////////////////////////
208 
209 ////////////////////////////////////////////////////////////////////////
210 /// @name AKSIMD conversion
211 //@{
212 
213 // _mm_cvtepi32_ps
215 {
216  AKSIMD_V4F32 vector;
217  vector.m_data[0] = (AkReal32)in_from.m_data[0];
218  vector.m_data[1] = (AkReal32)in_from.m_data[1];
219  vector.m_data[2] = (AkReal32)in_from.m_data[2];
220  vector.m_data[3] = (AkReal32)in_from.m_data[3];
221 
222  return vector;
223 }
224 // _mm_cvtps_epi32
226 {
227  AKSIMD_V4I32 vector;
228  vector.m_data[0] = (AkInt32)in_from.m_data[0];
229  vector.m_data[1] = (AkInt32)in_from.m_data[1];
230  vector.m_data[2] = (AkInt32)in_from.m_data[2];
231  vector.m_data[3] = (AkInt32)in_from.m_data[3];
232 
233  return vector;
234 }
235 
236 //@}
237 ////////////////////////////////////////////////////////////////////////
238 
239 ////////////////////////////////////////////////////////////////////////
240 /// @name AKSIMD logical operations
241 //@{
242 
243 // _mm_and_si128
245 {
246  AKSIMD_V4I32 vector;
247  vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0];
248  vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1];
249  vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2];
250  vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3];
251 
252  return vector;
253 }
254 
255 /// Compares the 8 signed 16-bit integers in a and the 8 signed
256 /// 16-bit integers in b for greater than (see _mm_cmpgt_epi16)
258 {
259  AKSIMD_V4I32 vector;
260 
261  AkInt16 *pVec1,*pVec2,*pVec3;
262  pVec1 = (AkInt16*)&in_vec1;
263  pVec2 = (AkInt16*)&in_vec2;
264  pVec3 = (AkInt16*)&vector;
265 
266  pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0;
267  pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0;
268  pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0;
269  pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0;
270  pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0;
271  pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0;
272  pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0;
273  pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0;
274 
275  return vector;
276 }
277 
278 /// Compares for less than or equal (see _mm_cmple_ps)
280 {
281  AKSIMD_V4UI32 vector;
282 
283  vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0;
284  vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0;
285  vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0;
286  vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0;
287 
288  return vector;
289 }
290 
292 {
293  AKSIMD_V4F32 vector;
294 
295  vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] >= in_vec2.m_data[0]) ? 0xffffffff : 0x0);
296  vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] >= in_vec2.m_data[1]) ? 0xffffffff : 0x0);
297  vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] >= in_vec2.m_data[2]) ? 0xffffffff : 0x0);
298  vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] >= in_vec2.m_data[3]) ? 0xffffffff : 0x0);
299 
300  return vector;
301 }
302 
304 {
305  AKSIMD_V4F32 vector;
306 
307  vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] > in_vec2.m_data[0]) ? 0xffffffff : 0x0);
308  vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] > in_vec2.m_data[1]) ? 0xffffffff : 0x0);
309  vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] > in_vec2.m_data[2]) ? 0xffffffff : 0x0);
310  vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] > in_vec2.m_data[3]) ? 0xffffffff : 0x0);
311 
312  return vector;
313 }
314 
316 {
317  AKSIMD_V4F32 vector;
318 
319  vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0);
320  vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0);
321  vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0);
322  vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0);
323 
324  return vector;
325 }
326 
328 {
329  AKSIMD_V4F32 vector;
330 
331  vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] < in_vec2.m_data[0]) ? 0xffffffff : 0x0);
332  vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] < in_vec2.m_data[1]) ? 0xffffffff : 0x0);
333  vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] < in_vec2.m_data[2]) ? 0xffffffff : 0x0);
334  vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] < in_vec2.m_data[3]) ? 0xffffffff : 0x0);
335 
336  return vector;
337 }
338 
340 {
341  AKSIMD_V4F32 vector;
342 
343  vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] == in_vec2.m_data[0]) ? 0xffffffff : 0x0);
344  vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] == in_vec2.m_data[1]) ? 0xffffffff : 0x0);
345  vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] == in_vec2.m_data[2]) ? 0xffffffff : 0x0);
346  vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] == in_vec2.m_data[3]) ? 0xffffffff : 0x0);
347 
348  return vector;
349 }
350 
352 {
353  AKSIMD_V4F32 vector;
354 
355  vector.m_data[0] = (AkReal32)(((AkUInt32)in_vec1.m_data[0]) ^ ((AkUInt32)in_vec2.m_data[0]));
356  vector.m_data[1] = (AkReal32)(((AkUInt32)in_vec1.m_data[1]) ^ ((AkUInt32)in_vec2.m_data[1]));
357  vector.m_data[2] = (AkReal32)(((AkUInt32)in_vec1.m_data[2]) ^ ((AkUInt32)in_vec2.m_data[2]));
358  vector.m_data[3] = (AkReal32)(((AkUInt32)in_vec1.m_data[3]) ^ ((AkUInt32)in_vec2.m_data[3]));
359 
360  return vector;
361 }
362 
364 {
365  in_vector.m_data[0] <<= in_shiftBy;
366  in_vector.m_data[1] <<= in_shiftBy;
367  in_vector.m_data[2] <<= in_shiftBy;
368  in_vector.m_data[3] <<= in_shiftBy;
369 
370  return in_vector;
371 }
372 
374 {
375  in_vector.m_data[0] >>= in_shiftBy;
376  in_vector.m_data[1] >>= in_shiftBy;
377  in_vector.m_data[2] >>= in_shiftBy;
378  in_vector.m_data[3] >>= in_shiftBy;
379 
380  return in_vector;
381 }
382 
383 //@}
384 ////////////////////////////////////////////////////////////////////////
385 
386 
387 ////////////////////////////////////////////////////////////////////////
388 /// @name AKSIMD arithmetic
389 //@{
390 // _mm_sub_ps
392 {
393  AKSIMD_V4F32 vector;
394 
395  vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
396  vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1];
397  vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2];
398  vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3];
399 
400  return vector;
401 }
402 
403 /// Subtracts the lower single-precision, floating-point values of a and b.
404 /// The upper three single-precision, floating-point values are passed through from a.
405 /// r0 := a0 - b0 ; r1 := a1 ; r2 := a2 ; r3 := a3 (see _mm_sub_ss)
406 
408 {
409  AKSIMD_V4F32 vector;
410 
411  vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
412  vector.m_data[1] = in_vec1.m_data[1];
413  vector.m_data[2] = in_vec1.m_data[2];
414  vector.m_data[3] = in_vec1.m_data[3];
415 
416  return vector;
417 }
418 
419 // _mm_add_ps
421 {
422  AKSIMD_V4F32 vector;
423 
424  vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
425  vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
426  vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2];
427  vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3];
428 
429  return vector;
430 }
431 
433 {
434  AKSIMD_V4F32 vector;
435 
436  vector.m_data[0] = in_vec1.m_data[0] / in_vec2.m_data[0];
437  vector.m_data[1] = in_vec1.m_data[1] / in_vec2.m_data[1];
438  vector.m_data[2] = in_vec1.m_data[2] / in_vec2.m_data[2];
439  vector.m_data[3] = in_vec1.m_data[3] / in_vec2.m_data[3];
440 
441  return vector;
442 }
443 
445 {
446  AKSIMD_V2F32 vector;
447 
448  vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
449  vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
450 
451  return vector;
452 }
453 
454 /// Adds the lower single-precision, floating-point values of a and b; the
455 /// upper three single-precision, floating-point values are passed through from a.
456 /// r0 := a0 + b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
458 {
459  AKSIMD_V4F32 vector;
460 
461  vector.m_data[0] = a.m_data[0] + b.m_data[0];
462  vector.m_data[1] = a.m_data[1];
463  vector.m_data[2] = a.m_data[2];
464  vector.m_data[3] = a.m_data[3];
465 
466  return vector;
467 }
468 
469 // _mm_mul_ps
471 {
472  AKSIMD_V4F32 vector;
473 
474  vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
475  vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
476  vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2];
477  vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3];
478 
479  return vector;
480 }
481 
483 {
484  AKSIMD_V2F32 vector;
485 
486  vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
487  vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
488 
489  return vector;
490 }
491 
492 /// Multiplies the lower single-precision, floating-point values of
493 /// a and b; the upper three single-precision, floating-point values
494 /// are passed through from a.
495 /// r0 := a0 * b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
497 {
498  AKSIMD_V4F32 vector;
499 
500  vector.m_data[0] = a.m_data[0] * b.m_data[0];
501  vector.m_data[1] = a.m_data[1];
502  vector.m_data[2] = a.m_data[2];
503  vector.m_data[3] = a.m_data[3];
504 
505  return vector;
506 }
507 
508 /// Vector multiply-add operation.
509 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
510 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
511 
512 /// Vector multiply-add operation.
513 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) )
514 
515 // _mm_min_ps
517 {
518  AKSIMD_V4F32 vector;
519 
520  vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
521  vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
522  vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]);
523  vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]);
524 
525  return vector;
526 }
527 
529 {
530  AKSIMD_V2F32 vector;
531 
532  vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
533  vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
534 
535  return vector;
536 }
537 
538 // _mm_max_ps
540 {
541  AKSIMD_V4F32 vector;
542 
543  vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
544  vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
545  vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]);
546  vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]);
547 
548  return vector;
549 }
550 
552 {
553  AKSIMD_V2F32 vector;
554 
555  vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
556  vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
557 
558  return vector;
559 }
560 
562 {
563  AKSIMD_V4F32 vector;
564  vector.m_data[0] = fabsf(in_vec1.m_data[0]);
565  vector.m_data[1] = fabsf(in_vec1.m_data[1]);
566  vector.m_data[2] = fabsf(in_vec1.m_data[2]);
567  vector.m_data[3] = fabsf(in_vec1.m_data[3]);
568  return vector;
569 }
570 
572 {
573  AKSIMD_V4F32 vector;
574  vector.m_data[0] = -in_vec1.m_data[0];
575  vector.m_data[1] = -in_vec1.m_data[1];
576  vector.m_data[2] = -in_vec1.m_data[2];
577  vector.m_data[3] = -in_vec1.m_data[3];
578  return vector;
579 }
580 
581 // _mm_sqrt_ps
583 {
584  AKSIMD_V4F32 vCompare;
585  AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
586  AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
587  AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,2) );
588  AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,3) );
589 
590  //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
591 
592  return vCompare /*res*/;
593 }
594 
595 /// Vector reciprocal square root approximation 1/sqrt(a), or equivalently, sqrt(1/a)
597 {
598  AKSIMD_V4F32 vCompare;
599  AKSIMD_GETELEMENT_V4F32(vCompare, 0) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 0));
600  AKSIMD_GETELEMENT_V4F32(vCompare, 1) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 1));
601  AKSIMD_GETELEMENT_V4F32(vCompare, 2) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 2));
602  AKSIMD_GETELEMENT_V4F32(vCompare, 3) = 1.f / sqrtf(AKSIMD_GETELEMENT_V4F32(in_vec, 3));
603 
604  return vCompare;
605 }
606 
608 {
609  AKSIMD_V2F32 vCompare;
610  AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
611  AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
612 
613  //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
614 
615  return vCompare /*res*/;
616 }
617 
618 //@}
619 ////////////////////////////////////////////////////////////////////////
620 
621 
622 ////////////////////////////////////////////////////////////////////////
623 /// @name AKSIMD packing / unpacking
624 //@{
625 
626 //
627 // _mm_unpacklo_epi16
628 // r0 := a0
629 // r1 := b0
630 // r2 := a1
631 // r3 := b1
632 // r4 := a2
633 // r5 := b2
634 // r6 := a3
635 // r7 := b3
637 {
638  AKSIMD_V4I32 vector;
639  AkInt16 *pVec1,*pVec2,*pDest;
640  pVec1 = (AkInt16*)&in_vec1;
641  pVec2 = (AkInt16*)&in_vec2;
642  pDest = (AkInt16*)&vector;
643 
644  pDest[0] = pVec1[0];
645  pDest[1] = pVec2[0];
646  pDest[2] = pVec1[1];
647  pDest[3] = pVec2[1];
648  pDest[4] = pVec1[2];
649  pDest[5] = pVec2[2];
650  pDest[6] = pVec1[3];
651  pDest[7] = pVec2[3];
652 
653  return vector;
654 }
655 
656 // _mm_unpackhi_epi16
658 {
659  AKSIMD_V4I32 vector;
660  AkInt16 *pVec1,*pVec2,*pDest;
661  pVec1 = (AkInt16*)&in_vec1;
662  pVec2 = (AkInt16*)&in_vec2;
663  pDest = (AkInt16*)&vector;
664 
665  pDest[0] = pVec1[4];
666  pDest[1] = pVec2[4];
667  pDest[2] = pVec1[5];
668  pDest[3] = pVec2[5];
669  pDest[4] = pVec1[6];
670  pDest[5] = pVec2[6];
671  pDest[6] = pVec1[7];
672  pDest[7] = pVec2[7];
673 
674  return vector;
675 }
676 
677 // _mm_unpacklo_ps
679 {
680  AKSIMD_V4F32 vector;
681  vector.m_data[0] = in_vec1.m_data[0];
682  vector.m_data[1] = in_vec2.m_data[0];
683  vector.m_data[2] = in_vec1.m_data[1];
684  vector.m_data[3] = in_vec2.m_data[1];
685 
686  return vector;
687 }
688 
689 // _mm_unpackhi_ps
691 {
692  AKSIMD_V4F32 vector;
693  vector.m_data[0] = in_vec1.m_data[2];
694  vector.m_data[1] = in_vec2.m_data[2];
695  vector.m_data[2] = in_vec1.m_data[3];
696  vector.m_data[3] = in_vec2.m_data[3];
697 
698  return vector;
699 }
700 
701 // _mm_packs_epi32
703 {
704  AKSIMD_V4I32 vector;
705  AkInt16 *pDest = (AkInt16*)&vector;
706 
707  pDest[0] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[0], -32768, 32767);
708  pDest[1] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[1], -32768, 32767);
709  pDest[2] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[2], -32768, 32767);
710  pDest[3] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[3], -32768, 32767);
711  pDest[4] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[0], -32768, 32767);
712  pDest[5] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[1], -32768, 32767);
713  pDest[6] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[2], -32768, 32767);
714  pDest[7] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[3], -32768, 32767);
715 
716  return vector;
717 }
718 
719 //@}
720 ////////////////////////////////////////////////////////////////////////
721 
722 
723 //#define AKSIMD_GET_ITEM( vec, index ) vec[index]
724 
725 
726 
727 
728 ////////////////////////////////////////////////////////////////////////
729 /// @name AKSIMD shuffling
730 //@{
731 
732 // See _MM_SHUFFLE
733 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \
734  (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
735 
736 // See _mm_shuffle_ps
737 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) )
738 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw )
739 
741 {
742  AKSIMD_V4F32 vector;
743  vector.m_data[0] = xyzw.m_data[(mask) & 0x3];
744  vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3];
745  vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3];
746  vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3];
747 
748  return vector;
749 }
750 
751 
752 /// Moves the upper two single-precision, floating-point values of b to
753 /// the lower two single-precision, floating-point values of the result.
754 /// The upper two single-precision, floating-point values of a are passed
755 /// through to the result.
756 /// r3 := a3; r2 := a2; r1 := b3; r0 := b2 (see _mm_movehl_ps)
757 #define AKSIMD_MOVEHL_V4F32( a, b ) \
758  AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) )
759 
760 /// Moves the lower two single-precision, floating-point values of b to
761 /// the upper two single-precision, floating-point values of the result.
762 /// The lower two single-precision, floating-point values of a are passed
763 /// through to the result.
764 /// r3 := b1 ; r2 := b0 ; r1 := a1 ; r0 := a0 (see _mm_movelh_ps)
765 #define AKSIMD_MOVELH_V4F32( a, b ) \
766  AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) )
767 
768 /// Swap the 2 lower floats together and the 2 higher floats together.
769 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1));
770 
771 /// Swap the 2 lower floats with the 2 higher floats.
772 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2));
773 
774 /// Barrel-shift all floats by one.
775 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1))
776 
777  /// Duplicates the odd items into the even items (d c b a -> d d b b )
778 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
779 
780  /// Duplicates the even items into the odd items (d c b a -> c c a a )
781 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
782 
783 
784 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h>
785 
786 //@}
787 ////////////////////////////////////////////////////////////////////////
788 
789 // Old AKSIMD -- will search-and-replace later
790 #define AkReal32Vector AKSIMD_V4F32
791 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ )
792 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v)))
793 #define AKSIMD_MUL AKSIMD_MUL_V4F32
794 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32
795 
796 /// Faked in-place vector horizontal add.
797 /// \akwarning
798 /// Don't expect this to be very efficient.
799 /// \endakwarning
801 {
802  AKSIMD_V4F32 vAb = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0xB1);
803  AKSIMD_V4F32 vHaddAb = AKSIMD_ADD_V4F32(vVec, vAb);
804  AKSIMD_V4F32 vHaddCd = AKSIMD_SHUFFLE_V4F32(vHaddAb, vHaddAb, 0x4E);
805  AKSIMD_V4F32 vHaddAbcd = AKSIMD_ADD_V4F32(vHaddAb, vHaddCd);
806  return vHaddAbcd;
807 }
808 
809 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts
811 {
812  static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f };
813 
814  AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0));
815  vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
816  AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1));
817  vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign );
818  vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 );
819  vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 );
820  vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 );
821  return vTmp2;
822 }
823 
824 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
825 
826 #define AK_SIGN_BIT( val ) (((AkUInt32)val) >> 31)
827 
828 static AkForceInline int AKSIMD_MASK_V4F32( const AKSIMD_V4F32& in_vec )
829 {
830  return AK_SIGN_BIT(in_vec.m_data[0]) | AK_SIGN_BIT(in_vec.m_data[1]) << 1 | AK_SIGN_BIT(in_vec.m_data[2]) << 2 | AK_SIGN_BIT(in_vec.m_data[3]) << 3;
831 }
832 
834 {
835  AKSIMD_V4F32 r;
836  r.m_data[0] = 1.f / v.m_data[0];
837  r.m_data[1] = 1.f / v.m_data[1];
838  r.m_data[2] = 1.f / v.m_data[2];
839  r.m_data[3] = 1.f / v.m_data[3];
840  return r;
841 }
842 
844 {
845  AKSIMD_V4F32 r;
846  r.m_data[0] = ceil(x.m_data[0]);
847  r.m_data[1] = ceil(x.m_data[1]);
848  r.m_data[2] = ceil(x.m_data[2]);
849  r.m_data[3] = ceil(x.m_data[3]);
850  return r;
851 }
852 
853 #endif //_AKSIMD_GENERIC_H_
854 
AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32(AKSIMD_F32 in_value)
Definition: AkSimd.h:126
float32_t AKSIMD_F32
32-bit float
Definition: AkSimd.h:71
AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32(const AKSIMD_V4F32 &a, const AKSIMD_V4F32 &b)
Definition: AkSimd.h:457
static AkForceInline int AKSIMD_MASK_V4F32(const AKSIMD_V4F32 &in_vec)
Definition: AkSimd.h:828
AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32(AKSIMD_V4I32 in_vector, int in_shiftBy)
Definition: AkSimd.h:363
AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:683
AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32(const AKSIMD_V4F32 &xyzw, const AKSIMD_V4F32 &abcd, int mask)
Definition: AkSimd.h:740
AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32(AKSIMD_V4F32 a, AKSIMD_V4F32 b)
Rough estimation of division
Definition: AkSimd.h:553
uint32x4_t AKSIMD_V4COND
Vector of 4 comparison results
Definition: AkSimd.h:75
int16_t AkInt16
Signed 16-bit integer
Definition: AkTypes.h:98
AkForceInline AKSIMD_V4F32 AKSIMD_LTEQ_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:315
AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:407
#define AKSIMD_GETELEMENT_V4F32(__vName, __num__)
Definition: AkSimd.h:75
AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:516
AkReal32 m_data[4]
Definition: AkSimd.h:49
AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:391
AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:470
AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32(const AKSIMD_V4F32 &in_vec1)
Definition: AkSimd.h:571
#define AkClamp(x, min, max)
Definition: AkPlatformFuncs.h:95
static AkForceInline AKSIMD_V4F32 AKSIMD_CEIL_V4F32(const AKSIMD_V4F32 &x)
Definition: AkSimd.h:843
AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:695
AkReal32 m_data[2]
Definition: AkSimd.h:48
AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32(const AKSIMD_V4I32 &in_from)
Definition: AkSimd.h:214
AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32(const AKSIMD_V4F32 &in_vec1)
Definition: AkSimd.h:561
AkForceInline AKSIMD_V4F32 AKSIMD_GTEQ_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:291
AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32(const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)
Definition: AkSimd.h:244
AkInt32 AKSIMD_I32
32-bit signed integer
Definition: AkSimd.h:44
AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32(const AKSIMD_V2F32 &in_vec1, const AKSIMD_V2F32 &in_vec2)
Definition: AkSimd.h:551
float32x4_t AKSIMD_V4F32
Vector of 4 32-bit floats
Definition: AkSimd.h:73
AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16(const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)
Definition: AkSimd.h:257
AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32(const AKSIMD_V2F32 &in_vec1, const AKSIMD_V2F32 &in_vec2)
Definition: AkSimd.h:528
AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32()
Definition: AkSimd.h:136
#define AkForceInline
Force inlining
Definition: AkTypes.h:66
AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32(const AKSIMD_V4F32 &a, const AKSIMD_V4F32 &b)
Definition: AkSimd.h:496
AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32(const AKSIMD_V4F32 &in_vec)
Definition: AkSimd.h:582
int32x4_t AKSIMD_V4I32
Vector of 4 32-bit signed integers
Definition: AkSimd.h:65
AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32(const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)
Definition: AkSimd.h:707
int32_t AkInt32
Signed 32-bit integer
Definition: AkTypes.h:99
AkForceInline AKSIMD_V4F32 AKSIMD_RSQRT_V4F32(const AKSIMD_V4F32 &in_vec)
Vector reciprocal square root approximation 1/sqrt(a), or equivalently, sqrt(1/a)
Definition: AkSimd.h:596
AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32(const AKSIMD_V2F32 &in_vec)
Definition: AkSimd.h:607
AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16(const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)
Definition: AkSimd.h:657
AkForceInline void AKSIMD_STORE1_V4F32(AKSIMD_F32 *in_pTo, const AKSIMD_V4F32 &in_vec)
Definition: AkSimd.h:201
static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL_V4F32(const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary par...
Definition: AkSimd.h:810
#define AkMin(x1, x2)
Definition: AkPlatformFuncs.h:94
#define AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: AkSimd.h:733
AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32(const AKSIMD_V2F32 &in_vec1, const AKSIMD_V2F32 &in_vec2)
Definition: AkSimd.h:482
AkForceInline AKSIMD_V4F32 AKSIMD_EQ_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:339
AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16(const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)
Definition: AkSimd.h:636
uint32x4_t AKSIMD_V4UI32
Vector of 4 32-bit unsigned signed integers
Definition: AkSimd.h:68
AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:539
AkForceInline AKSIMD_V4I32 AKSIMD_TRUNCATE_V4F32_TO_V4I32(const AKSIMD_V4F32 &in_from)
Definition: AkSimd.h:225
float32x2_t AKSIMD_V2F32
Vector of 2 32-bit floats
Definition: AkSimd.h:72
AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32(AKSIMD_F32 in_value)
Definition: AkSimd.h:114
AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Compares for less than or equal (see _mm_cmple_ps)
Definition: AkSimd.h:279
AkForceInline AKSIMD_V4F32 AKSIMD_XOR_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:351
AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32()
Definition: AkSimd.h:156
#define AKSIMD_SHUFFLE_BADC(__a__)
Swap the 2 lower floats together and the 2 higher floats together.
Definition: AkSimd.h:769
static AkForceInline AKSIMD_V4F32 AKSIMD_HORIZONTALADD_V4F32(AKSIMD_V4F32 vVec)
Definition: AkSimd.h:800
AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32(AKSIMD_V4I32 in_vector, int in_shiftBy)
Definition: AkSimd.h:373
AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32(const AKSIMD_V2F32 &in_vec1, const AKSIMD_V2F32 &in_vec2)
Definition: AkSimd.h:444
AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:420
#define AkMax(x1, x2)
Definition: AkPlatformFuncs.h:93
uint32_t AkUInt32
Unsigned 32-bit integer
Definition: AkTypes.h:86
static AkForceInline AKSIMD_V4F32 AKSIMD_RECIP_V4F32(const AKSIMD_V4F32 &v)
Definition: AkSimd.h:833
#define AK_SIGN_BIT(val)
Definition: AkSimd.h:826
AkInt32 m_data[4]
Definition: AkSimd.h:45
AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32(const AKSIMD_F32 *in_pData)
Definition: AkSimd.h:171
AkForceInline AKSIMD_V4F32 AKSIMD_GT_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:303
AkForceInline AKSIMD_V4F32 AKSIMD_LT_V4F32(const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
Definition: AkSimd.h:327
float AkReal32
32-bit floating point
Definition: AkTypes.h:104
AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32(AKSIMD_F32 in_value)
Definition: AkSimd.h:102
AkUInt32 m_data[4]
Definition: AkSimd.h:46
AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32()
Definition: AkSimd.h:147