Wwise SDK 2017.2.10
_platforms_2_generic_2_ak_simd_8h_source
版本
menu_open
link
include/AK/SoundEngine/Platforms/Generic/AkSimd.h
浏览该文件的文档。00001 /******************************************************************************* 00002 The content of this file includes portions of the AUDIOKINETIC Wwise Technology 00003 released in source code form as part of the SDK installer package. 00004 00005 Commercial License Usage 00006 00007 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology 00008 may use this file in accordance with the end user license agreement provided 00009 with the software or, alternatively, in accordance with the terms contained in a 00010 written agreement between you and Audiokinetic Inc. 00011 00012 Apache License Usage 00013 00014 Alternatively, this file may be used under the Apache License, Version 2.0 (the 00015 "Apache License"); you may not use this file except in compliance with the 00016 Apache License. You may obtain a copy of the Apache License at 00017 http://www.apache.org/licenses/LICENSE-2.0. 00018 00019 Unless required by applicable law or agreed to in writing, software distributed 00020 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES 00021 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for 00022 the specific language governing permissions and limitations under the License. 00023 00024 Version: <VERSION> Build: <BUILDNUMBER> 00025 Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc. 00026 *******************************************************************************/ 00027 00028 // AkSimd.h 00029 00030 /// \file 00031 /// AKSIMD - Generic (no SIMD support) implementation 00032 00033 #ifndef _AKSIMD_GENERIC_H_ 00034 #define _AKSIMD_GENERIC_H_ 00035 00036 #include <math.h> 00037 #include <string.h> 00038 #include <AK/SoundEngine/Common/AkTypes.h> 00039 #include <AK/Tools/Common/AkPlatformFuncs.h> 00040 00041 //////////////////////////////////////////////////////////////////////// 00042 /// @name AKSIMD types 00043 //@{ 00044 typedef AkInt32 AKSIMD_I32; ///< 32-bit signed integer 00045 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32; ///< Vector of 4 32-bit signed integers 00046 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32; ///< Vector of 4 32-bit signed integers 00047 typedef AkReal32 AKSIMD_F32; ///< 32-bit float 00048 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32; ///< Vector of 2 32-bit floats 00049 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32; ///< Vector of 4 32-bit floats 00050 typedef AKSIMD_V4UI32 AKSIMD_V4COND; ///< Vector of 4 comparison results 00051 00052 #pragma pack(push,1) 00053 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers 00054 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32_UNALIGNED; ///< Unaligned Vector of 4 32-bit signed integers 00055 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32_UNALIGNED; ///< Unaligned Vector of 2 32-bit floats 00056 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32_UNALIGNED; ///< Unaligned Vector of 4 32-bit floats 00057 #pragma pack(pop) 00058 00059 //@} 00060 //////////////////////////////////////////////////////////////////////// 00061 00062 #ifndef AKSIMD_GETELEMENT_V4F32 00063 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00064 #endif 00065 00066 #ifndef AKSIMD_GETELEMENT_V2F32 00067 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00068 #endif 00069 00070 #ifndef AKSIMD_GETELEMENT_V4I32 00071 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ ) (__vName).m_data[(__num__)] 00072 #endif 00073 00074 //////////////////////////////////////////////////////////////////////// 00075 /// @name Platform specific memory size alignment for allocation purposes 00076 //@{ 00077 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00078 //@} 00079 //////////////////////////////////////////////////////////////////////// 00080 00081 //////////////////////////////////////////////////////////////////////// 00082 /// @name AKSIMD loading / setting 00083 //@{ 00084 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData)) 00085 00086 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00087 00088 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00089 00090 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value ) 00091 { 00092 AKSIMD_V4F32 vector; 00093 vector.m_data[0] = in_value; 00094 vector.m_data[1] = in_value; 00095 vector.m_data[2] = in_value; 00096 vector.m_data[3] = in_value; 00097 00098 return vector; 00099 } 00100 00101 // _mm_set_ps1 00102 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value ) 00103 { 00104 AKSIMD_V4F32 vector; 00105 vector.m_data[0] = in_value; 00106 vector.m_data[1] = in_value; 00107 vector.m_data[2] = in_value; 00108 vector.m_data[3] = in_value; 00109 00110 return vector; 00111 } 00112 00113 00114 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value ) 00115 { 00116 AKSIMD_V2F32 vector; 00117 vector.m_data[0] = in_value; 00118 vector.m_data[1] = in_value; 00119 00120 return vector; 00121 } 00122 00123 // _mm_setzero_ps() 00124 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32() 00125 { 00126 AKSIMD_V4F32 vector; 00127 vector.m_data[0] = 0.f; 00128 vector.m_data[1] = 0.f; 00129 vector.m_data[2] = 0.f; 00130 vector.m_data[3] = 0.f; 00131 00132 return vector; 00133 } 00134 00135 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32() 00136 { 00137 AKSIMD_V2F32 vector; 00138 vector.m_data[0] = 0.f; 00139 vector.m_data[1] = 0.f; 00140 00141 return vector; 00142 } 00143 // _mm_setzero_si128() 00144 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32() 00145 { 00146 AKSIMD_V4I32 vector; 00147 vector.m_data[0] = 0; 00148 vector.m_data[1] = 0; 00149 vector.m_data[2] = 0; 00150 vector.m_data[3] = 0; 00151 00152 return vector; 00153 } 00154 00155 00156 /// Loads a single-precision, floating-point value into the low word 00157 /// and clears the upper three words. 00158 /// r0 := *p; r1 := 0.0 ; r2 := 0.0 ; r3 := 0.0 (see _mm_load_ss) 00159 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData ) 00160 { 00161 AKSIMD_V4F32 vector; 00162 vector.m_data[0] = *in_pData; 00163 vector.m_data[1] = 0.f; 00164 vector.m_data[2] = 0.f; 00165 vector.m_data[3] = 0.f; 00166 00167 return vector; 00168 } 00169 00170 //@} 00171 //////////////////////////////////////////////////////////////////////// 00172 00173 //////////////////////////////////////////////////////////////////////// 00174 /// @name AKSIMD storing 00175 //@{ 00176 00177 // _mm_storeu_ps -- The address does not need to be 16-byte aligned. 00178 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec) 00179 00180 // _mm_store_ps -- The address must be 16-byte aligned. 00181 // ????? _mm_storeu_ps vs _mm_store_ps ????? 00182 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__) 00183 00184 // _mm_storeu_si128 00185 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec) 00186 00187 /// Stores the lower single-precision, floating-point value. 00188 /// *p := a0 (see _mm_store_ss) 00189 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec ) 00190 { 00191 ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0]; 00192 } 00193 00194 //@} 00195 //////////////////////////////////////////////////////////////////////// 00196 00197 //////////////////////////////////////////////////////////////////////// 00198 /// @name AKSIMD conversion 00199 //@{ 00200 00201 // _mm_cvtepi32_ps 00202 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from ) 00203 { 00204 AKSIMD_V4F32 vector; 00205 vector.m_data[0] = (AkReal32)in_from.m_data[0]; 00206 vector.m_data[1] = (AkReal32)in_from.m_data[1]; 00207 vector.m_data[2] = (AkReal32)in_from.m_data[2]; 00208 vector.m_data[3] = (AkReal32)in_from.m_data[3]; 00209 00210 return vector; 00211 } 00212 // _mm_cvtps_epi32 00213 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from ) 00214 { 00215 AKSIMD_V4I32 vector; 00216 vector.m_data[0] = (AkInt32)in_from.m_data[0]; 00217 vector.m_data[1] = (AkInt32)in_from.m_data[1]; 00218 vector.m_data[2] = (AkInt32)in_from.m_data[2]; 00219 vector.m_data[3] = (AkInt32)in_from.m_data[3]; 00220 00221 return vector; 00222 } 00223 00224 //@} 00225 //////////////////////////////////////////////////////////////////////// 00226 00227 //////////////////////////////////////////////////////////////////////// 00228 /// @name AKSIMD logical operations 00229 //@{ 00230 00231 // _mm_and_si128 00232 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00233 { 00234 AKSIMD_V4I32 vector; 00235 vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0]; 00236 vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1]; 00237 vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2]; 00238 vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3]; 00239 00240 return vector; 00241 } 00242 00243 /// Compares the 8 signed 16-bit integers in a and the 8 signed 00244 /// 16-bit integers in b for greater than (see _mm_cmpgt_epi16) 00245 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00246 { 00247 AKSIMD_V4I32 vector; 00248 00249 AkInt16 *pVec1,*pVec2,*pVec3; 00250 pVec1 = (AkInt16*)&in_vec1; 00251 pVec2 = (AkInt16*)&in_vec2; 00252 pVec3 = (AkInt16*)&vector; 00253 00254 pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0; 00255 pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0; 00256 pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0; 00257 pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0; 00258 pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0; 00259 pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0; 00260 pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0; 00261 pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0; 00262 00263 return vector; 00264 } 00265 00266 /// Compares for less than or equal (see _mm_cmple_ps) 00267 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00268 { 00269 AKSIMD_V4UI32 vector; 00270 00271 vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0; 00272 vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0; 00273 vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0; 00274 vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0; 00275 00276 return vector; 00277 } 00278 00279 AkForceInline AKSIMD_V4F32 AKSIMD_GTEQ_V4F32(const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2) 00280 { 00281 AKSIMD_V4F32 vector; 00282 00283 vector.m_data[0] = (AkReal32)((in_vec1.m_data[0] >= in_vec2.m_data[0]) ? 0xffffffff : 0x0); 00284 vector.m_data[1] = (AkReal32)((in_vec1.m_data[1] >= in_vec2.m_data[1]) ? 0xffffffff : 0x0); 00285 vector.m_data[2] = (AkReal32)((in_vec1.m_data[2] >= in_vec2.m_data[2]) ? 0xffffffff : 0x0); 00286 vector.m_data[3] = (AkReal32)((in_vec1.m_data[3] >= in_vec2.m_data[3]) ? 0xffffffff : 0x0); 00287 00288 return vector; 00289 } 00290 00291 AkForceInline AKSIMD_V4F32 AKSIMD_XOR_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00292 { 00293 AKSIMD_V4F32 vector; 00294 00295 vector.m_data[0] = (AkReal32)(((AkUInt32)in_vec1.m_data[0]) ^ ((AkUInt32)in_vec2.m_data[0])); 00296 vector.m_data[1] = (AkReal32)(((AkUInt32)in_vec1.m_data[1]) ^ ((AkUInt32)in_vec2.m_data[1])); 00297 vector.m_data[2] = (AkReal32)(((AkUInt32)in_vec1.m_data[2]) ^ ((AkUInt32)in_vec2.m_data[2])); 00298 vector.m_data[3] = (AkReal32)(((AkUInt32)in_vec1.m_data[3]) ^ ((AkUInt32)in_vec2.m_data[3])); 00299 00300 return vector; 00301 } 00302 00303 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00304 { 00305 in_vector.m_data[0] <<= in_shiftBy; 00306 in_vector.m_data[1] <<= in_shiftBy; 00307 in_vector.m_data[2] <<= in_shiftBy; 00308 in_vector.m_data[3] <<= in_shiftBy; 00309 00310 return in_vector; 00311 } 00312 00313 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00314 { 00315 in_vector.m_data[0] >>= in_shiftBy; 00316 in_vector.m_data[1] >>= in_shiftBy; 00317 in_vector.m_data[2] >>= in_shiftBy; 00318 in_vector.m_data[3] >>= in_shiftBy; 00319 00320 return in_vector; 00321 } 00322 00323 //@} 00324 //////////////////////////////////////////////////////////////////////// 00325 00326 00327 //////////////////////////////////////////////////////////////////////// 00328 /// @name AKSIMD arithmetic 00329 //@{ 00330 // _mm_sub_ps 00331 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00332 { 00333 AKSIMD_V4F32 vector; 00334 00335 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00336 vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1]; 00337 vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2]; 00338 vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3]; 00339 00340 return vector; 00341 } 00342 00343 /// Subtracts the lower single-precision, floating-point values of a and b. 00344 /// The upper three single-precision, floating-point values are passed through from a. 00345 /// r0 := a0 - b0 ; r1 := a1 ; r2 := a2 ; r3 := a3 (see _mm_sub_ss) 00346 00347 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00348 { 00349 AKSIMD_V4F32 vector; 00350 00351 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00352 vector.m_data[1] = in_vec1.m_data[1]; 00353 vector.m_data[2] = in_vec1.m_data[2]; 00354 vector.m_data[3] = in_vec1.m_data[3]; 00355 00356 return vector; 00357 } 00358 00359 // _mm_add_ps 00360 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00361 { 00362 AKSIMD_V4F32 vector; 00363 00364 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00365 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00366 vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2]; 00367 vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3]; 00368 00369 return vector; 00370 } 00371 00372 AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00373 { 00374 AKSIMD_V4F32 vector; 00375 00376 vector.m_data[0] = in_vec1.m_data[0] / in_vec2.m_data[0]; 00377 vector.m_data[1] = in_vec1.m_data[1] / in_vec2.m_data[1]; 00378 vector.m_data[2] = in_vec1.m_data[2] / in_vec2.m_data[2]; 00379 vector.m_data[3] = in_vec1.m_data[3] / in_vec2.m_data[3]; 00380 00381 return vector; 00382 } 00383 00384 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00385 { 00386 AKSIMD_V2F32 vector; 00387 00388 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00389 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00390 00391 return vector; 00392 } 00393 00394 /// Adds the lower single-precision, floating-point values of a and b; the 00395 /// upper three single-precision, floating-point values are passed through from a. 00396 /// r0 := a0 + b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss) 00397 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00398 { 00399 AKSIMD_V4F32 vector; 00400 00401 vector.m_data[0] = a.m_data[0] + b.m_data[0]; 00402 vector.m_data[1] = a.m_data[1]; 00403 vector.m_data[2] = a.m_data[2]; 00404 vector.m_data[3] = a.m_data[3]; 00405 00406 return vector; 00407 } 00408 00409 // _mm_mul_ps 00410 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00411 { 00412 AKSIMD_V4F32 vector; 00413 00414 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00415 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00416 vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2]; 00417 vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3]; 00418 00419 return vector; 00420 } 00421 00422 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00423 { 00424 AKSIMD_V2F32 vector; 00425 00426 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00427 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00428 00429 return vector; 00430 } 00431 00432 /// Multiplies the lower single-precision, floating-point values of 00433 /// a and b; the upper three single-precision, floating-point values 00434 /// are passed through from a. 00435 /// r0 := a0 * b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss) 00436 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00437 { 00438 AKSIMD_V4F32 vector; 00439 00440 vector.m_data[0] = a.m_data[0] * b.m_data[0]; 00441 vector.m_data[1] = a.m_data[1]; 00442 vector.m_data[2] = a.m_data[2]; 00443 vector.m_data[3] = a.m_data[3]; 00444 00445 return vector; 00446 } 00447 00448 /// Vector multiply-add operation. 00449 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00450 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00451 00452 /// Vector multiply-add operation. 00453 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) ) 00454 00455 // _mm_min_ps 00456 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00457 { 00458 AKSIMD_V4F32 vector; 00459 00460 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00461 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00462 vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]); 00463 vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]); 00464 00465 return vector; 00466 } 00467 00468 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00469 { 00470 AKSIMD_V2F32 vector; 00471 00472 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00473 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00474 00475 return vector; 00476 } 00477 00478 // _mm_max_ps 00479 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00480 { 00481 AKSIMD_V4F32 vector; 00482 00483 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00484 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00485 vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]); 00486 vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]); 00487 00488 return vector; 00489 } 00490 00491 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00492 { 00493 AKSIMD_V2F32 vector; 00494 00495 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00496 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00497 00498 return vector; 00499 } 00500 00501 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 ) 00502 { 00503 AKSIMD_V4F32 vector; 00504 vector.m_data[0] = fabsf(in_vec1.m_data[0]); 00505 vector.m_data[1] = fabsf(in_vec1.m_data[1]); 00506 vector.m_data[2] = fabsf(in_vec1.m_data[2]); 00507 vector.m_data[3] = fabsf(in_vec1.m_data[3]); 00508 return vector; 00509 } 00510 00511 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 ) 00512 { 00513 AKSIMD_V4F32 vector; 00514 vector.m_data[0] = -in_vec1.m_data[0]; 00515 vector.m_data[1] = -in_vec1.m_data[1]; 00516 vector.m_data[2] = -in_vec1.m_data[2]; 00517 vector.m_data[3] = -in_vec1.m_data[3]; 00518 return vector; 00519 } 00520 00521 // _mm_sqrt_ps 00522 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec ) 00523 { 00524 AKSIMD_V4F32 vCompare; 00525 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00526 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00527 AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,2) ); 00528 AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,3) ); 00529 00530 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00531 00532 return vCompare /*res*/; 00533 } 00534 00535 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec ) 00536 { 00537 AKSIMD_V2F32 vCompare; 00538 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00539 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrtf( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00540 00541 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00542 00543 return vCompare /*res*/; 00544 } 00545 00546 //@} 00547 //////////////////////////////////////////////////////////////////////// 00548 00549 00550 //////////////////////////////////////////////////////////////////////// 00551 /// @name AKSIMD packing / unpacking 00552 //@{ 00553 00554 // 00555 // _mm_unpacklo_epi16 00556 // r0 := a0 00557 // r1 := b0 00558 // r2 := a1 00559 // r3 := b1 00560 // r4 := a2 00561 // r5 := b2 00562 // r6 := a3 00563 // r7 := b3 00564 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00565 { 00566 AKSIMD_V4I32 vector; 00567 AkInt16 *pVec1,*pVec2,*pDest; 00568 pVec1 = (AkInt16*)&in_vec1; 00569 pVec2 = (AkInt16*)&in_vec2; 00570 pDest = (AkInt16*)&vector; 00571 00572 pDest[0] = pVec1[0]; 00573 pDest[1] = pVec2[0]; 00574 pDest[2] = pVec1[1]; 00575 pDest[3] = pVec2[1]; 00576 pDest[4] = pVec1[2]; 00577 pDest[5] = pVec2[2]; 00578 pDest[6] = pVec1[3]; 00579 pDest[7] = pVec2[3]; 00580 00581 return vector; 00582 } 00583 00584 // _mm_unpackhi_epi16 00585 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00586 { 00587 AKSIMD_V4I32 vector; 00588 AkInt16 *pVec1,*pVec2,*pDest; 00589 pVec1 = (AkInt16*)&in_vec1; 00590 pVec2 = (AkInt16*)&in_vec2; 00591 pDest = (AkInt16*)&vector; 00592 00593 pDest[0] = pVec1[4]; 00594 pDest[1] = pVec2[4]; 00595 pDest[2] = pVec1[5]; 00596 pDest[3] = pVec2[5]; 00597 pDest[4] = pVec1[6]; 00598 pDest[5] = pVec2[6]; 00599 pDest[6] = pVec1[7]; 00600 pDest[7] = pVec2[7]; 00601 00602 return vector; 00603 } 00604 00605 // _mm_unpacklo_ps 00606 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00607 { 00608 AKSIMD_V4F32 vector; 00609 vector.m_data[0] = in_vec1.m_data[0]; 00610 vector.m_data[1] = in_vec2.m_data[0]; 00611 vector.m_data[2] = in_vec1.m_data[1]; 00612 vector.m_data[3] = in_vec2.m_data[1]; 00613 00614 return vector; 00615 } 00616 00617 // _mm_unpackhi_ps 00618 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00619 { 00620 AKSIMD_V4F32 vector; 00621 vector.m_data[0] = in_vec1.m_data[2]; 00622 vector.m_data[1] = in_vec2.m_data[2]; 00623 vector.m_data[2] = in_vec1.m_data[3]; 00624 vector.m_data[3] = in_vec2.m_data[3]; 00625 00626 return vector; 00627 } 00628 00629 // _mm_packs_epi32 00630 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00631 { 00632 AKSIMD_V4I32 vector; 00633 AkInt16 *pDest = (AkInt16*)&vector; 00634 00635 pDest[0] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[0], -32768, 32767); 00636 pDest[1] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[1], -32768, 32767); 00637 pDest[2] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[2], -32768, 32767); 00638 pDest[3] = (AkInt16)AkClamp((AkInt16)in_vec1.m_data[3], -32768, 32767); 00639 pDest[4] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[0], -32768, 32767); 00640 pDest[5] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[1], -32768, 32767); 00641 pDest[6] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[2], -32768, 32767); 00642 pDest[7] = (AkInt16)AkClamp((AkInt16)in_vec2.m_data[3], -32768, 32767); 00643 00644 return vector; 00645 } 00646 00647 //@} 00648 //////////////////////////////////////////////////////////////////////// 00649 00650 00651 //#define AKSIMD_GET_ITEM( vec, index ) vec[index] 00652 00653 00654 00655 00656 //////////////////////////////////////////////////////////////////////// 00657 /// @name AKSIMD shuffling 00658 //@{ 00659 00660 // See _MM_SHUFFLE 00661 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00662 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00663 00664 // See _mm_shuffle_ps 00665 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00666 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) 00667 00668 AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask ) 00669 { 00670 AKSIMD_V4F32 vector; 00671 vector.m_data[0] = xyzw.m_data[(mask) & 0x3]; 00672 vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3]; 00673 vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3]; 00674 vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3]; 00675 00676 return vector; 00677 } 00678 00679 00680 /// Moves the upper two single-precision, floating-point values of b to 00681 /// the lower two single-precision, floating-point values of the result. 00682 /// The upper two single-precision, floating-point values of a are passed 00683 /// through to the result. 00684 /// r3 := a3; r2 := a2; r1 := b3; r0 := b2 (see _mm_movehl_ps) 00685 #define AKSIMD_MOVEHL_V4F32( a, b ) \ 00686 AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) ) 00687 00688 /// Moves the lower two single-precision, floating-point values of b to 00689 /// the upper two single-precision, floating-point values of the result. 00690 /// The lower two single-precision, floating-point values of a are passed 00691 /// through to the result. 00692 /// r3 := b1 ; r2 := b0 ; r1 := a1 ; r0 := a0 (see _mm_movelh_ps) 00693 #define AKSIMD_MOVELH_V4F32( a, b ) \ 00694 AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) ) 00695 00696 /// Swap the 2 lower floats together and the 2 higher floats together. 00697 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)); 00698 00699 /// Swap the 2 lower floats with the 2 higher floats. 00700 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)); 00701 00702 /// Barrel-shift all floats by one. 00703 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1)) 00704 00705 /// Duplicates the odd items into the even items (d c b a -> d d b b ) 00706 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00707 00708 /// Duplicates the even items into the odd items (d c b a -> c c a a ) 00709 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00710 00711 00712 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h> 00713 00714 //@} 00715 //////////////////////////////////////////////////////////////////////// 00716 00717 // Old AKSIMD -- will search-and-replace later 00718 #define AkReal32Vector AKSIMD_V4F32 00719 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ ) 00720 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v))) 00721 #define AKSIMD_MUL AKSIMD_MUL_V4F32 00722 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32 00723 00724 /// Faked in-place vector horizontal add. 00725 /// \akwarning 00726 /// Don't expect this to be very efficient. 00727 /// /endakwarning 00728 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00729 { 00730 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00731 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00732 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00733 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00734 } 00735 00736 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts 00737 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00738 { 00739 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00740 00741 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00742 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00743 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00744 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00745 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00746 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00747 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00748 return vTmp2; 00749 } 00750 00751 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx)) 00752 00753 #define AK_SIGN_BIT( val ) (((AkUInt32)val) >> 31) 00754 00755 static AkForceInline int AKSIMD_MASK_V4F32( const AKSIMD_V4F32& in_vec ) 00756 { 00757 return AK_SIGN_BIT(in_vec.m_data[0]) | AK_SIGN_BIT(in_vec.m_data[1]) << 1 | AK_SIGN_BIT(in_vec.m_data[2]) << 2 | AK_SIGN_BIT(in_vec.m_data[3]) << 3; 00758 } 00759 00760 #endif //_AKSIMD_GENERIC_H_ 00761