Wwise SDK 2017.1.9
Version
menu_open
link
include/AK/SoundEngine/Platforms/Generic/AkSimd.h
Go to the documentation of this file.00001 /******************************************************************************* 00002 The content of this file includes portions of the AUDIOKINETIC Wwise Technology 00003 released in source code form as part of the SDK installer package. 00004 00005 Commercial License Usage 00006 00007 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology 00008 may use this file in accordance with the end user license agreement provided 00009 with the software or, alternatively, in accordance with the terms contained in a 00010 written agreement between you and Audiokinetic Inc. 00011 00012 Apache License Usage 00013 00014 Alternatively, this file may be used under the Apache License, Version 2.0 (the 00015 "Apache License"); you may not use this file except in compliance with the 00016 Apache License. You may obtain a copy of the Apache License at 00017 http://www.apache.org/licenses/LICENSE-2.0. 00018 00019 Unless required by applicable law or agreed to in writing, software distributed 00020 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES 00021 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for 00022 the specific language governing permissions and limitations under the License. 00023 00024 Version: <VERSION> Build: <BUILDNUMBER> 00025 Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc. 00026 *******************************************************************************/ 00027 00028 // AkSimd.h 00029 00030 /// \file 00031 /// AKSIMD - Generic (no SIMD support) implementation 00032 00033 #ifndef _AKSIMD_GENERIC_H_ 00034 #define _AKSIMD_GENERIC_H_ 00035 00036 #include <math.h> 00037 #include <string.h> 00038 #include <AK/SoundEngine/Common/AkTypes.h> 00039 #include <AK/Tools/Common/AkPlatformFuncs.h> 00040 00041 //////////////////////////////////////////////////////////////////////// 00042 /// @name AKSIMD types 00043 //@{ 00044 typedef AkInt32 AKSIMD_I32; ///< 32-bit signed integer 00045 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32; ///< Vector of 4 32-bit signed integers 00046 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32; ///< Vector of 4 32-bit signed integers 00047 typedef AkReal32 AKSIMD_F32; ///< 32-bit float 00048 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32; ///< Vector of 2 32-bit floats 00049 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32; ///< Vector of 4 32-bit floats 00050 typedef AKSIMD_V4UI32 AKSIMD_V4COND; ///< Vector of 4 comparison results 00051 00052 #pragma pack(push,1) 00053 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers 00054 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers 00055 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32_UNALIGNED; ///< Unaligned Vector of 2 32-bit floats 00056 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32_UNALIGNED; ///< Unaligned Vector of 4 32-bit floats 00057 #pragma pack(pop) 00058 00059 //@} 00060 //////////////////////////////////////////////////////////////////////// 00061 00062 #ifndef AKSIMD_GETELEMENT_V4F32 00063 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00064 #endif 00065 00066 #ifndef AKSIMD_GETELEMENT_V2F32 00067 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00068 #endif 00069 00070 #ifndef AKSIMD_GETELEMENT_V4I32 00071 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ ) (__vName).m_data[(__num__)] 00072 #endif 00073 00074 //////////////////////////////////////////////////////////////////////// 00075 /// @name Platform specific memory size alignment for allocation purposes 00076 //@{ 00077 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00078 //@} 00079 //////////////////////////////////////////////////////////////////////// 00080 00081 //////////////////////////////////////////////////////////////////////// 00082 /// @name AKSIMD loading / setting 00083 //@{ 00084 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData)) 00085 00086 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00087 00088 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00089 00090 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value ) 00091 { 00092 AKSIMD_V4F32 vector; 00093 vector.m_data[0] = in_value; 00094 vector.m_data[1] = in_value; 00095 vector.m_data[2] = in_value; 00096 vector.m_data[3] = in_value; 00097 00098 return vector; 00099 } 00100 00101 // _mm_set_ps1 00102 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value ) 00103 { 00104 AKSIMD_V4F32 vector; 00105 vector.m_data[0] = in_value; 00106 vector.m_data[1] = in_value; 00107 vector.m_data[2] = in_value; 00108 vector.m_data[3] = in_value; 00109 00110 return vector; 00111 } 00112 00113 00114 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value ) 00115 { 00116 AKSIMD_V2F32 vector; 00117 vector.m_data[0] = in_value; 00118 vector.m_data[1] = in_value; 00119 00120 return vector; 00121 } 00122 00123 // _mm_setzero_ps() 00124 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32() 00125 { 00126 AKSIMD_V4F32 vector; 00127 vector.m_data[0] = 0.f; 00128 vector.m_data[1] = 0.f; 00129 vector.m_data[2] = 0.f; 00130 vector.m_data[3] = 0.f; 00131 00132 return vector; 00133 } 00134 00135 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32() 00136 { 00137 AKSIMD_V2F32 vector; 00138 vector.m_data[0] = 0.f; 00139 vector.m_data[1] = 0.f; 00140 00141 return vector; 00142 } 00143 // _mm_setzero_si128() 00144 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32() 00145 { 00146 AKSIMD_V4I32 vector; 00147 vector.m_data[0] = 0; 00148 vector.m_data[1] = 0; 00149 vector.m_data[2] = 0; 00150 vector.m_data[3] = 0; 00151 00152 return vector; 00153 } 00154 00155 00156 /// Loads a single-precision, floating-point value into the low word 00157 /// and clears the upper three words. 00158 /// r0 := *p; r1 := 0.0 ; r2 := 0.0 ; r3 := 0.0 (see _mm_load_ss) 00159 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData ) 00160 { 00161 AKSIMD_V4F32 vector; 00162 vector.m_data[0] = *in_pData; 00163 vector.m_data[1] = 0.f; 00164 vector.m_data[2] = 0.f; 00165 vector.m_data[3] = 0.f; 00166 00167 return vector; 00168 } 00169 00170 //@} 00171 //////////////////////////////////////////////////////////////////////// 00172 00173 //////////////////////////////////////////////////////////////////////// 00174 /// @name AKSIMD storing 00175 //@{ 00176 00177 // _mm_storeu_ps -- The address does not need to be 16-byte aligned. 00178 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec) 00179 00180 // _mm_store_ps -- The address must be 16-byte aligned. 00181 // ????? _mm_storeu_ps vs _mm_store_ps ????? 00182 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__) 00183 00184 // _mm_storeu_si128 00185 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec) 00186 00187 /// Stores the lower single-precision, floating-point value. 00188 /// *p := a0 (see _mm_store_ss) 00189 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec ) 00190 { 00191 ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0]; 00192 } 00193 00194 //@} 00195 //////////////////////////////////////////////////////////////////////// 00196 00197 //////////////////////////////////////////////////////////////////////// 00198 /// @name AKSIMD conversion 00199 //@{ 00200 00201 // _mm_cvtepi32_ps 00202 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from ) 00203 { 00204 AKSIMD_V4F32 vector; 00205 vector.m_data[0] = (AkReal32)in_from.m_data[0]; 00206 vector.m_data[1] = (AkReal32)in_from.m_data[1]; 00207 vector.m_data[2] = (AkReal32)in_from.m_data[2]; 00208 vector.m_data[3] = (AkReal32)in_from.m_data[3]; 00209 00210 return vector; 00211 } 00212 // _mm_cvtps_epi32 00213 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from ) 00214 { 00215 AKSIMD_V4I32 vector; 00216 vector.m_data[0] = (AkInt32)in_from.m_data[0]; 00217 vector.m_data[1] = (AkInt32)in_from.m_data[1]; 00218 vector.m_data[2] = (AkInt32)in_from.m_data[2]; 00219 vector.m_data[3] = (AkInt32)in_from.m_data[3]; 00220 00221 return vector; 00222 } 00223 00224 //@} 00225 //////////////////////////////////////////////////////////////////////// 00226 00227 //////////////////////////////////////////////////////////////////////// 00228 /// @name AKSIMD logical operations 00229 //@{ 00230 00231 // _mm_and_si128 00232 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00233 { 00234 AKSIMD_V4I32 vector; 00235 vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0]; 00236 vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1]; 00237 vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2]; 00238 vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3]; 00239 00240 return vector; 00241 } 00242 00243 /// Compares the 8 signed 16-bit integers in a and the 8 signed 00244 /// 16-bit integers in b for greater than (see _mm_cmpgt_epi16) 00245 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00246 { 00247 AKSIMD_V4I32 vector; 00248 00249 AkInt16 *pVec1,*pVec2,*pVec3; 00250 pVec1 = (AkInt16*)&in_vec1; 00251 pVec2 = (AkInt16*)&in_vec2; 00252 pVec3 = (AkInt16*)&vector; 00253 00254 pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0; 00255 pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0; 00256 pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0; 00257 pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0; 00258 pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0; 00259 pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0; 00260 pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0; 00261 pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0; 00262 00263 return vector; 00264 } 00265 00266 /// Compares for less than or equal (see _mm_cmple_ps) 00267 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00268 { 00269 AKSIMD_V4UI32 vector; 00270 00271 vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0; 00272 vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0; 00273 vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0; 00274 vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0; 00275 00276 return vector; 00277 } 00278 00279 AkForceInline AKSIMD_V4F32 AKSIMD_XOR_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00280 { 00281 AKSIMD_V4F32 vector; 00282 00283 vector.m_data[0] = (AkReal32)(((AkUInt32)in_vec1.m_data[0]) ^ ((AkUInt32)in_vec2.m_data[0])); 00284 vector.m_data[1] = (AkReal32)(((AkUInt32)in_vec1.m_data[1]) ^ ((AkUInt32)in_vec2.m_data[1])); 00285 vector.m_data[2] = (AkReal32)(((AkUInt32)in_vec1.m_data[2]) ^ ((AkUInt32)in_vec2.m_data[2])); 00286 vector.m_data[3] = (AkReal32)(((AkUInt32)in_vec1.m_data[3]) ^ ((AkUInt32)in_vec2.m_data[3])); 00287 00288 return vector; 00289 } 00290 00291 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00292 { 00293 in_vector.m_data[0] <<= in_shiftBy; 00294 in_vector.m_data[1] <<= in_shiftBy; 00295 in_vector.m_data[2] <<= in_shiftBy; 00296 in_vector.m_data[3] <<= in_shiftBy; 00297 00298 return in_vector; 00299 } 00300 00301 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00302 { 00303 in_vector.m_data[0] >>= in_shiftBy; 00304 in_vector.m_data[1] >>= in_shiftBy; 00305 in_vector.m_data[2] >>= in_shiftBy; 00306 in_vector.m_data[3] >>= in_shiftBy; 00307 00308 return in_vector; 00309 } 00310 00311 //@} 00312 //////////////////////////////////////////////////////////////////////// 00313 00314 00315 //////////////////////////////////////////////////////////////////////// 00316 /// @name AKSIMD arithmetic 00317 //@{ 00318 // _mm_sub_ps 00319 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00320 { 00321 AKSIMD_V4F32 vector; 00322 00323 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00324 vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1]; 00325 vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2]; 00326 vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3]; 00327 00328 return vector; 00329 } 00330 00331 /// Subtracts the lower single-precision, floating-point values of a and b. 00332 /// The upper three single-precision, floating-point values are passed through from a. 00333 /// r0 := a0 - b0 ; r1 := a1 ; r2 := a2 ; r3 := a3 (see _mm_sub_ss) 00334 00335 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00336 { 00337 AKSIMD_V4F32 vector; 00338 00339 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00340 vector.m_data[1] = in_vec1.m_data[1]; 00341 vector.m_data[2] = in_vec1.m_data[2]; 00342 vector.m_data[3] = in_vec1.m_data[3]; 00343 00344 return vector; 00345 } 00346 00347 // _mm_add_ps 00348 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00349 { 00350 AKSIMD_V4F32 vector; 00351 00352 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00353 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00354 vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2]; 00355 vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3]; 00356 00357 return vector; 00358 } 00359 00360 AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00361 { 00362 AKSIMD_V4F32 vector; 00363 00364 vector.m_data[0] = in_vec1.m_data[0] / in_vec2.m_data[0]; 00365 vector.m_data[1] = in_vec1.m_data[1] / in_vec2.m_data[1]; 00366 vector.m_data[2] = in_vec1.m_data[2] / in_vec2.m_data[2]; 00367 vector.m_data[3] = in_vec1.m_data[3] / in_vec2.m_data[3]; 00368 00369 return vector; 00370 } 00371 00372 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00373 { 00374 AKSIMD_V2F32 vector; 00375 00376 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00377 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00378 00379 return vector; 00380 } 00381 00382 /// Adds the lower single-precision, floating-point values of a and b; the 00383 /// upper three single-precision, floating-point values are passed through from a. 00384 /// r0 := a0 + b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss) 00385 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00386 { 00387 AKSIMD_V4F32 vector; 00388 00389 vector.m_data[0] = a.m_data[0] + b.m_data[0]; 00390 vector.m_data[1] = a.m_data[1]; 00391 vector.m_data[2] = a.m_data[2]; 00392 vector.m_data[3] = a.m_data[3]; 00393 00394 return vector; 00395 } 00396 00397 // _mm_mul_ps 00398 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00399 { 00400 AKSIMD_V4F32 vector; 00401 00402 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00403 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00404 vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2]; 00405 vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3]; 00406 00407 return vector; 00408 } 00409 00410 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00411 { 00412 AKSIMD_V2F32 vector; 00413 00414 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00415 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00416 00417 return vector; 00418 } 00419 00420 /// Multiplies the lower single-precision, floating-point values of 00421 /// a and b; the upper three single-precision, floating-point values 00422 /// are passed through from a. 00423 /// r0 := a0 * b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss) 00424 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00425 { 00426 AKSIMD_V4F32 vector; 00427 00428 vector.m_data[0] = a.m_data[0] * b.m_data[0]; 00429 vector.m_data[1] = a.m_data[1]; 00430 vector.m_data[2] = a.m_data[2]; 00431 vector.m_data[3] = a.m_data[3]; 00432 00433 return vector; 00434 } 00435 00436 /// Vector multiply-add operation. 00437 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00438 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00439 00440 /// Vector multiply-add operation. 00441 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) ) 00442 00443 // _mm_min_ps 00444 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00445 { 00446 AKSIMD_V4F32 vector; 00447 00448 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00449 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00450 vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]); 00451 vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]); 00452 00453 return vector; 00454 } 00455 00456 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00457 { 00458 AKSIMD_V2F32 vector; 00459 00460 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00461 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00462 00463 return vector; 00464 } 00465 00466 // _mm_max_ps 00467 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00468 { 00469 AKSIMD_V4F32 vector; 00470 00471 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00472 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00473 vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]); 00474 vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]); 00475 00476 return vector; 00477 } 00478 00479 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00480 { 00481 AKSIMD_V2F32 vector; 00482 00483 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00484 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00485 00486 return vector; 00487 } 00488 00489 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 ) 00490 { 00491 AKSIMD_V4F32 vector; 00492 vector.m_data[0] = fabs(in_vec1.m_data[0]); 00493 vector.m_data[1] = fabs(in_vec1.m_data[1]); 00494 vector.m_data[2] = fabs(in_vec1.m_data[2]); 00495 vector.m_data[3] = fabs(in_vec1.m_data[3]); 00496 return vector; 00497 } 00498 00499 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 ) 00500 { 00501 AKSIMD_V4F32 vector; 00502 vector.m_data[0] = -in_vec1.m_data[0]; 00503 vector.m_data[1] = -in_vec1.m_data[1]; 00504 vector.m_data[2] = -in_vec1.m_data[2]; 00505 vector.m_data[3] = -in_vec1.m_data[3]; 00506 return vector; 00507 } 00508 00509 // _mm_sqrt_ps 00510 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec ) 00511 { 00512 AKSIMD_V4F32 vCompare; 00513 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00514 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00515 AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,2) ); 00516 AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,3) ); 00517 00518 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00519 00520 return vCompare /*res*/; 00521 } 00522 00523 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec ) 00524 { 00525 AKSIMD_V2F32 vCompare; 00526 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00527 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00528 00529 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00530 00531 return vCompare /*res*/; 00532 } 00533 00534 //@} 00535 //////////////////////////////////////////////////////////////////////// 00536 00537 00538 //////////////////////////////////////////////////////////////////////// 00539 /// @name AKSIMD packing / unpacking 00540 //@{ 00541 00542 // 00543 // _mm_unpacklo_epi16 00544 // r0 := a0 00545 // r1 := b0 00546 // r2 := a1 00547 // r3 := b1 00548 // r4 := a2 00549 // r5 := b2 00550 // r6 := a3 00551 // r7 := b3 00552 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00553 { 00554 AKSIMD_V4I32 vector; 00555 AkInt16 *pVec1,*pVec2,*pDest; 00556 pVec1 = (AkInt16*)&in_vec1; 00557 pVec2 = (AkInt16*)&in_vec2; 00558 pDest = (AkInt16*)&vector; 00559 00560 pDest[0] = pVec1[0]; 00561 pDest[1] = pVec2[0]; 00562 pDest[2] = pVec1[1]; 00563 pDest[3] = pVec2[1]; 00564 pDest[4] = pVec1[2]; 00565 pDest[5] = pVec2[2]; 00566 pDest[6] = pVec1[3]; 00567 pDest[7] = pVec2[3]; 00568 00569 return vector; 00570 } 00571 00572 // _mm_unpackhi_epi16 00573 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00574 { 00575 AKSIMD_V4I32 vector; 00576 AkInt16 *pVec1,*pVec2,*pDest; 00577 pVec1 = (AkInt16*)&in_vec1; 00578 pVec2 = (AkInt16*)&in_vec2; 00579 pDest = (AkInt16*)&vector; 00580 00581 pDest[0] = pVec1[4]; 00582 pDest[1] = pVec2[4]; 00583 pDest[2] = pVec1[5]; 00584 pDest[3] = pVec2[5]; 00585 pDest[4] = pVec1[6]; 00586 pDest[5] = pVec2[6]; 00587 pDest[6] = pVec1[7]; 00588 pDest[7] = pVec2[7]; 00589 00590 return vector; 00591 } 00592 00593 // _mm_unpacklo_ps 00594 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00595 { 00596 AKSIMD_V4F32 vector; 00597 vector.m_data[0] = in_vec1.m_data[0]; 00598 vector.m_data[1] = in_vec2.m_data[0]; 00599 vector.m_data[2] = in_vec1.m_data[1]; 00600 vector.m_data[3] = in_vec2.m_data[1]; 00601 00602 return vector; 00603 } 00604 00605 // _mm_unpackhi_ps 00606 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00607 { 00608 AKSIMD_V4F32 vector; 00609 vector.m_data[0] = in_vec1.m_data[2]; 00610 vector.m_data[1] = in_vec2.m_data[2]; 00611 vector.m_data[2] = in_vec1.m_data[3]; 00612 vector.m_data[3] = in_vec2.m_data[3]; 00613 00614 return vector; 00615 } 00616 00617 // _mm_packs_epi32 00618 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00619 { 00620 AKSIMD_V4I32 vector; 00621 AkInt16 *pDest = (AkInt16*)&vector; 00622 00623 pDest[0] = AkClamp( in_vec1.m_data[0], -32768, 32767); 00624 pDest[1] = AkClamp( in_vec1.m_data[1], -32768, 32767); 00625 pDest[2] = AkClamp( in_vec1.m_data[2], -32768, 32767); 00626 pDest[3] = AkClamp( in_vec1.m_data[3], -32768, 32767); 00627 pDest[4] = AkClamp( in_vec2.m_data[0], -32768, 32767); 00628 pDest[5] = AkClamp( in_vec2.m_data[1], -32768, 32767); 00629 pDest[6] = AkClamp( in_vec2.m_data[2], -32768, 32767); 00630 pDest[7] = AkClamp( in_vec2.m_data[3], -32768, 32767); 00631 00632 return vector; 00633 } 00634 00635 //@} 00636 //////////////////////////////////////////////////////////////////////// 00637 00638 00639 //#define AKSIMD_GET_ITEM( vec, index ) vec[index] 00640 00641 00642 00643 00644 //////////////////////////////////////////////////////////////////////// 00645 /// @name AKSIMD shuffling 00646 //@{ 00647 00648 // See _MM_SHUFFLE 00649 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00650 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00651 00652 // See _mm_shuffle_ps 00653 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00654 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) 00655 00656 AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask ) 00657 { 00658 AKSIMD_V4F32 vector; 00659 vector.m_data[0] = xyzw.m_data[(mask) & 0x3]; 00660 vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3]; 00661 vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3]; 00662 vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3]; 00663 00664 return vector; 00665 } 00666 00667 00668 /// Moves the upper two single-precision, floating-point values of b to 00669 /// the lower two single-precision, floating-point values of the result. 00670 /// The upper two single-precision, floating-point values of a are passed 00671 /// through to the result. 00672 /// r3 := a3; r2 := a2; r1 := b3; r0 := b2 (see _mm_movehl_ps) 00673 #define AKSIMD_MOVEHL_V4F32( a, b ) \ 00674 AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) ) 00675 00676 /// Moves the lower two single-precision, floating-point values of b to 00677 /// the upper two single-precision, floating-point values of the result. 00678 /// The lower two single-precision, floating-point values of a are passed 00679 /// through to the result. 00680 /// r3 := b1 ; r2 := b0 ; r1 := a1 ; r0 := a0 (see _mm_movelh_ps) 00681 #define AKSIMD_MOVELH_V4F32( a, b ) \ 00682 AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) ) 00683 00684 /// Swap the 2 lower floats together and the 2 higher floats together. 00685 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)); 00686 00687 /// Swap the 2 lower floats with the 2 higher floats. 00688 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)); 00689 00690 /// Barrel-shift all floats by one. 00691 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1)) 00692 00693 /// Duplicates the odd items into the even items (d c b a -> d d b b ) 00694 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00695 00696 /// Duplicates the even items into the odd items (d c b a -> c c a a ) 00697 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00698 00699 00700 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h> 00701 00702 //@} 00703 //////////////////////////////////////////////////////////////////////// 00704 00705 // Old AKSIMD -- will search-and-replace later 00706 #define AkReal32Vector AKSIMD_V4F32 00707 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ ) 00708 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v))) 00709 #define AKSIMD_MUL AKSIMD_MUL_V4F32 00710 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32 00711 00712 /// Faked in-place vector horizontal add. 00713 /// \akwarning 00714 /// Don't expect this to be very efficient. 00715 /// /endakwarning 00716 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00717 { 00718 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00719 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00720 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00721 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00722 } 00723 00724 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts 00725 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00726 { 00727 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00728 00729 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00730 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00731 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00732 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00733 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00734 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00735 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00736 return vTmp2; 00737 } 00738 00739 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx)) 00740 00741 #define AK_SIGN_BIT( val ) (((AkUInt32)val) >> 31) 00742 00743 static AkForceInline int AKSIMD_MASK_V4F32( const AKSIMD_V4F32& in_vec ) 00744 { 00745 return AK_SIGN_BIT(in_vec.m_data[0]) | AK_SIGN_BIT(in_vec.m_data[1]) << 1 | AK_SIGN_BIT(in_vec.m_data[2]) << 2 | AK_SIGN_BIT(in_vec.m_data[3]) << 3; 00746 } 00747 00748 #endif //_AKSIMD_GENERIC_H_ 00749
Cette page a-t-elle été utile ?
Besoin d'aide ?
Des questions ? Des problèmes ? Besoin de plus d'informations ? Contactez-nous, nous pouvons vous aider !
Visitez notre page d'AideDécrivez-nous de votre projet. Nous sommes là pour vous aider.
Enregistrez votre projet et nous vous aiderons à démarrer sans aucune obligation !
Partir du bon pied avec Wwise