Wwise SDK 2015.1.9
版本
menu_open
link
include/AK/SoundEngine/Platforms/Generic/AkSimd.h
Go to the documentation of this file.00001 00002 // 00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved 00004 // 00006 00007 // AkSimd.h 00008 00011 00012 #ifndef _AKSIMD_GENERIC_H_ 00013 #define _AKSIMD_GENERIC_H_ 00014 00015 #include <math.h> 00016 #include <string.h> 00017 #include <AK/SoundEngine/Common/AkTypes.h> 00018 #include <AK/Tools/Common/AkPlatformFuncs.h> 00019 00022 00023 typedef AkInt32 AKSIMD_I32; 00024 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32; 00025 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32; 00026 typedef AkReal32 AKSIMD_F32; 00027 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32; 00028 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32; 00029 typedef AKSIMD_V4UI32 AKSIMD_V4COND; 00030 00031 00032 typedef struct { AkInt32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4I32_UNALIGNED; 00033 typedef struct { AkUInt32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4UI32_UNALIGNED; 00034 typedef struct { AkReal32 m_data[2]; } __attribute__((__packed__)) AKSIMD_V2F32_UNALIGNED; 00035 typedef struct { AkReal32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4F32_UNALIGNED; 00036 00038 00039 00040 #ifndef AKSIMD_GETELEMENT_V4F32 00041 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00042 #endif 00043 00044 #ifndef AKSIMD_GETELEMENT_V2F32 00045 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00046 #endif 00047 00048 #ifndef AKSIMD_GETELEMENT_V4I32 00049 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ ) (__vName).m_data[(__num__)] 00050 #endif 00051 00054 00055 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00056 00057 00058 00061 00062 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData)) 00063 00064 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00065 00066 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00067 00068 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value ) 00069 { 00070 AKSIMD_V4F32 vector; 00071 vector.m_data[0] = in_value; 00072 vector.m_data[1] = in_value; 00073 vector.m_data[2] = in_value; 00074 vector.m_data[3] = in_value; 00075 00076 return vector; 00077 } 00078 00079 // _mm_set_ps1 00080 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value ) 00081 { 00082 AKSIMD_V4F32 vector; 00083 vector.m_data[0] = in_value; 00084 vector.m_data[1] = in_value; 00085 vector.m_data[2] = in_value; 00086 vector.m_data[3] = in_value; 00087 00088 return vector; 00089 } 00090 00091 00092 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value ) 00093 { 00094 AKSIMD_V2F32 vector; 00095 vector.m_data[0] = in_value; 00096 vector.m_data[1] = in_value; 00097 00098 return vector; 00099 } 00100 00101 // _mm_setzero_ps() 00102 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32() 00103 { 00104 AKSIMD_V4F32 vector; 00105 vector.m_data[0] = 0.f; 00106 vector.m_data[1] = 0.f; 00107 vector.m_data[2] = 0.f; 00108 vector.m_data[3] = 0.f; 00109 00110 return vector; 00111 } 00112 00113 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32() 00114 { 00115 AKSIMD_V2F32 vector; 00116 vector.m_data[0] = 0.f; 00117 vector.m_data[1] = 0.f; 00118 00119 return vector; 00120 } 00121 // _mm_setzero_si128() 00122 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32() 00123 { 00124 AKSIMD_V4I32 vector; 00125 vector.m_data[0] = 0; 00126 vector.m_data[1] = 0; 00127 vector.m_data[2] = 0; 00128 vector.m_data[3] = 0; 00129 00130 return vector; 00131 } 00132 00133 00137 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData ) 00138 { 00139 AKSIMD_V4F32 vector; 00140 vector.m_data[0] = *in_pData; 00141 vector.m_data[1] = 0.f; 00142 vector.m_data[2] = 0.f; 00143 vector.m_data[3] = 0.f; 00144 00145 return vector; 00146 } 00147 00149 00150 00153 00154 00155 // _mm_storeu_ps -- The address does not need to be 16-byte aligned. 00156 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec) 00157 00158 // _mm_store_ps -- The address must be 16-byte aligned. 00159 // ????? _mm_storeu_ps vs _mm_store_ps ????? 00160 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__) 00161 00162 // _mm_storeu_si128 00163 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec) 00164 00167 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec ) 00168 { 00169 ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0]; 00170 } 00171 00173 00174 00177 00178 00179 // _mm_cvtepi32_ps 00180 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from ) 00181 { 00182 AKSIMD_V4F32 vector; 00183 vector.m_data[0] = (AkReal32)in_from.m_data[0]; 00184 vector.m_data[1] = (AkReal32)in_from.m_data[1]; 00185 vector.m_data[2] = (AkReal32)in_from.m_data[2]; 00186 vector.m_data[3] = (AkReal32)in_from.m_data[3]; 00187 00188 return vector; 00189 } 00190 // _mm_cvtps_epi32 00191 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from ) 00192 { 00193 AKSIMD_V4I32 vector; 00194 vector.m_data[0] = (AkInt32)in_from.m_data[0]; 00195 vector.m_data[1] = (AkInt32)in_from.m_data[1]; 00196 vector.m_data[2] = (AkInt32)in_from.m_data[2]; 00197 vector.m_data[3] = (AkInt32)in_from.m_data[3]; 00198 00199 return vector; 00200 } 00201 00203 00204 00207 00208 00209 // _mm_and_si128 00210 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00211 { 00212 AKSIMD_V4I32 vector; 00213 vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0]; 00214 vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1]; 00215 vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2]; 00216 vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3]; 00217 00218 return vector; 00219 } 00220 00223 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00224 { 00225 AKSIMD_V4I32 vector; 00226 00227 AkInt16 *pVec1,*pVec2,*pVec3; 00228 pVec1 = (AkInt16*)&in_vec1; 00229 pVec2 = (AkInt16*)&in_vec2; 00230 pVec3 = (AkInt16*)&vector; 00231 00232 pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0; 00233 pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0; 00234 pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0; 00235 pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0; 00236 pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0; 00237 pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0; 00238 pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0; 00239 pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0; 00240 00241 return vector; 00242 } 00243 00245 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00246 { 00247 AKSIMD_V4UI32 vector; 00248 00249 vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0; 00250 vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0; 00251 vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0; 00252 vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0; 00253 00254 return vector; 00255 } 00256 00257 00258 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00259 { 00260 in_vector.m_data[0] <<= in_shiftBy; 00261 in_vector.m_data[1] <<= in_shiftBy; 00262 in_vector.m_data[2] <<= in_shiftBy; 00263 in_vector.m_data[3] <<= in_shiftBy; 00264 00265 return in_vector; 00266 } 00267 00268 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00269 { 00270 in_vector.m_data[0] >>= in_shiftBy; 00271 in_vector.m_data[1] >>= in_shiftBy; 00272 in_vector.m_data[2] >>= in_shiftBy; 00273 in_vector.m_data[3] >>= in_shiftBy; 00274 00275 return in_vector; 00276 } 00277 00279 00280 00281 00284 00285 // _mm_sub_ps 00286 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00287 { 00288 AKSIMD_V4F32 vector; 00289 00290 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00291 vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1]; 00292 vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2]; 00293 vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3]; 00294 00295 return vector; 00296 } 00297 00301 00302 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00303 { 00304 AKSIMD_V4F32 vector; 00305 00306 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00307 vector.m_data[1] = in_vec1.m_data[1]; 00308 vector.m_data[2] = in_vec1.m_data[2]; 00309 vector.m_data[3] = in_vec1.m_data[3]; 00310 00311 return vector; 00312 } 00313 00314 // _mm_add_ps 00315 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00316 { 00317 AKSIMD_V4F32 vector; 00318 00319 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00320 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00321 vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2]; 00322 vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3]; 00323 00324 return vector; 00325 } 00326 00327 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00328 { 00329 AKSIMD_V2F32 vector; 00330 00331 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00332 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00333 00334 return vector; 00335 } 00336 00340 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00341 { 00342 AKSIMD_V4F32 vector; 00343 00344 vector.m_data[0] = a.m_data[0] + b.m_data[0]; 00345 vector.m_data[1] = a.m_data[1]; 00346 vector.m_data[2] = a.m_data[2]; 00347 vector.m_data[3] = a.m_data[3]; 00348 00349 return vector; 00350 } 00351 00352 // _mm_mul_ps 00353 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00354 { 00355 AKSIMD_V4F32 vector; 00356 00357 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00358 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00359 vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2]; 00360 vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3]; 00361 00362 return vector; 00363 } 00364 00365 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00366 { 00367 AKSIMD_V2F32 vector; 00368 00369 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00370 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00371 00372 return vector; 00373 } 00374 00379 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00380 { 00381 AKSIMD_V4F32 vector; 00382 00383 vector.m_data[0] = a.m_data[0] * b.m_data[0]; 00384 vector.m_data[1] = a.m_data[1]; 00385 vector.m_data[2] = a.m_data[2]; 00386 vector.m_data[3] = a.m_data[3]; 00387 00388 return vector; 00389 } 00390 00392 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00393 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00394 00396 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) ) 00397 00398 // _mm_min_ps 00399 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00400 { 00401 AKSIMD_V4F32 vector; 00402 00403 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00404 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00405 vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]); 00406 vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]); 00407 00408 return vector; 00409 } 00410 00411 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00412 { 00413 AKSIMD_V2F32 vector; 00414 00415 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00416 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00417 00418 return vector; 00419 } 00420 00421 // _mm_max_ps 00422 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00423 { 00424 AKSIMD_V4F32 vector; 00425 00426 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00427 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00428 vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]); 00429 vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]); 00430 00431 return vector; 00432 } 00433 00434 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00435 { 00436 AKSIMD_V2F32 vector; 00437 00438 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00439 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00440 00441 return vector; 00442 } 00443 00444 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 ) 00445 { 00446 AKSIMD_V4F32 vector; 00447 vector.m_data[0] = fabs(in_vec1.m_data[0]); 00448 vector.m_data[1] = fabs(in_vec1.m_data[1]); 00449 vector.m_data[2] = fabs(in_vec1.m_data[2]); 00450 vector.m_data[3] = fabs(in_vec1.m_data[3]); 00451 return vector; 00452 } 00453 00454 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 ) 00455 { 00456 AKSIMD_V4F32 vector; 00457 vector.m_data[0] = -in_vec1.m_data[0]; 00458 vector.m_data[1] = -in_vec1.m_data[1]; 00459 vector.m_data[2] = -in_vec1.m_data[2]; 00460 vector.m_data[3] = -in_vec1.m_data[3]; 00461 return vector; 00462 } 00463 00464 // _mm_sqrt_ps 00465 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec ) 00466 { 00467 AKSIMD_V4F32 vCompare; 00468 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00469 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00470 AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,2) ); 00471 AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,3) ); 00472 00473 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00474 00475 return vCompare /*res*/; 00476 } 00477 00478 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec ) 00479 { 00480 AKSIMD_V2F32 vCompare; 00481 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00482 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00483 00484 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00485 00486 return vCompare /*res*/; 00487 } 00488 00490 00491 00492 00495 00496 00497 // 00498 // _mm_unpacklo_epi16 00499 // r0 := a0 00500 // r1 := b0 00501 // r2 := a1 00502 // r3 := b1 00503 // r4 := a2 00504 // r5 := b2 00505 // r6 := a3 00506 // r7 := b3 00507 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00508 { 00509 AKSIMD_V4I32 vector; 00510 AkInt16 *pVec1,*pVec2,*pDest; 00511 pVec1 = (AkInt16*)&in_vec1; 00512 pVec2 = (AkInt16*)&in_vec2; 00513 pDest = (AkInt16*)&vector; 00514 00515 pDest[0] = pVec1[0]; 00516 pDest[1] = pVec2[0]; 00517 pDest[2] = pVec1[1]; 00518 pDest[3] = pVec2[1]; 00519 pDest[4] = pVec1[2]; 00520 pDest[5] = pVec2[2]; 00521 pDest[6] = pVec1[3]; 00522 pDest[7] = pVec2[3]; 00523 00524 return vector; 00525 } 00526 00527 // _mm_unpackhi_epi16 00528 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00529 { 00530 AKSIMD_V4I32 vector; 00531 AkInt16 *pVec1,*pVec2,*pDest; 00532 pVec1 = (AkInt16*)&in_vec1; 00533 pVec2 = (AkInt16*)&in_vec2; 00534 pDest = (AkInt16*)&vector; 00535 00536 pDest[0] = pVec1[4]; 00537 pDest[1] = pVec2[4]; 00538 pDest[2] = pVec1[5]; 00539 pDest[3] = pVec2[5]; 00540 pDest[4] = pVec1[6]; 00541 pDest[5] = pVec2[6]; 00542 pDest[6] = pVec1[7]; 00543 pDest[7] = pVec2[7]; 00544 00545 return vector; 00546 } 00547 00548 // _mm_unpacklo_ps 00549 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00550 { 00551 AKSIMD_V4F32 vector; 00552 vector.m_data[0] = in_vec1.m_data[0]; 00553 vector.m_data[1] = in_vec2.m_data[0]; 00554 vector.m_data[2] = in_vec1.m_data[1]; 00555 vector.m_data[3] = in_vec2.m_data[1]; 00556 00557 return vector; 00558 } 00559 00560 // _mm_unpackhi_ps 00561 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00562 { 00563 AKSIMD_V4F32 vector; 00564 vector.m_data[0] = in_vec1.m_data[2]; 00565 vector.m_data[1] = in_vec2.m_data[2]; 00566 vector.m_data[2] = in_vec1.m_data[3]; 00567 vector.m_data[3] = in_vec2.m_data[3]; 00568 00569 return vector; 00570 } 00571 00572 // _mm_packs_epi32 00573 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00574 { 00575 AKSIMD_V4I32 vector; 00576 AkInt16 *pDest = (AkInt16*)&vector; 00577 00578 pDest[0] = AkClamp( in_vec1.m_data[0], -32768, 32767); 00579 pDest[1] = AkClamp( in_vec1.m_data[1], -32768, 32767); 00580 pDest[2] = AkClamp( in_vec1.m_data[2], -32768, 32767); 00581 pDest[3] = AkClamp( in_vec1.m_data[3], -32768, 32767); 00582 pDest[4] = AkClamp( in_vec2.m_data[0], -32768, 32767); 00583 pDest[5] = AkClamp( in_vec2.m_data[1], -32768, 32767); 00584 pDest[6] = AkClamp( in_vec2.m_data[2], -32768, 32767); 00585 pDest[7] = AkClamp( in_vec2.m_data[3], -32768, 32767); 00586 00587 return vector; 00588 } 00589 00591 00592 00593 00594 //#define AKSIMD_GET_ITEM( vec, index ) vec[index] 00595 00596 00597 00598 00601 00602 00603 // See _MM_SHUFFLE 00604 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00605 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00606 00607 // See _mm_shuffle_ps 00608 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00609 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) 00610 00611 AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask ) 00612 { 00613 AKSIMD_V4F32 vector; 00614 vector.m_data[0] = xyzw.m_data[(mask) & 0x3]; 00615 vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3]; 00616 vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3]; 00617 vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3]; 00618 00619 return vector; 00620 } 00621 00622 00628 #define AKSIMD_MOVEHL_V4F32( a, b ) \ 00629 AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) ) 00630 00636 #define AKSIMD_MOVELH_V4F32( a, b ) \ 00637 AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) ) 00638 00640 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)); 00641 00643 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)); 00644 00646 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00647 00649 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00650 00651 00652 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h> 00653 00655 00656 00657 // Old AKSIMD -- will search-and-replace later 00658 #define AkReal32Vector AKSIMD_V4F32 00659 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ ) 00660 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v))) 00661 #define AKSIMD_MUL AKSIMD_MUL_V4F32 00662 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32 00663 00668 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00669 { 00670 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00671 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00672 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00673 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00674 } 00675 00677 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00678 { 00679 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00680 00681 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00682 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00683 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00684 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00685 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00686 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00687 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00688 return vTmp2; 00689 } 00690 00691 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx)) 00692 00693 #endif //_AKSIMD_GENERIC_H_ 00694