00001 00002 // 00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved 00004 // 00006 00007 // AkSimd.h 00008 00011 00012 #ifndef _AK_SIMD_SSE_H_ 00013 #define _AK_SIMD_SSE_H_ 00014 00015 #include <AK/SoundEngine/Common/AkTypes.h> 00016 #include <xmmintrin.h> 00017 00020 00021 00022 #define AKSIMD_ARCHCACHELINESIZE (64) ///< Assumed cache line width for architectures on this platform 00023 #define AKSIMD_ARCHMAXPREFETCHSIZE (512) ///< Use this to control how much prefetching maximum is desirable (assuming 8-way cache) 00024 00025 #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ ) _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA ) 00026 00028 00029 00032 00033 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00034 00035 00036 00039 00040 00041 typedef float AKSIMD_F32; 00042 typedef __m128 AKSIMD_V4F32; 00043 typedef AKSIMD_V4F32 AKSIMD_V4COND; 00044 typedef AKSIMD_V4F32 AKSIMD_V4FCOND; 00045 #define AKSIMD_V4F32_SUPPORTED 00046 00048 00049 00050 00053 00054 00056 #define AKSIMD_LOAD_V4F32( __addr__ ) _mm_load_ps( (AkReal32*)(__addr__) ) 00057 00060 #define AKSIMD_LOADU_V4F32( __addr__ ) _mm_loadu_ps( (__addr__) ) 00061 00064 #define AKSIMD_LOAD1_V4F32( __scalar__ ) _mm_load1_ps( &(__scalar__) ) 00065 00068 #define AKSIMD_SET_V4F32( __scalar__ ) _mm_set_ps1( (__scalar__) ) 00069 00072 #define AKSIMD_SETZERO_V4F32() _mm_setzero_ps() 00073 00077 #define AKSIMD_LOAD_SS_V4F32( __addr__ ) _mm_load_ss( (__addr__) ) 00078 00080 00081 00082 00085 00086 00089 #define AKSIMD_STORE_V4F32( __addr__, __vec__ ) _mm_store_ps( (AkReal32*)(__addr__), (__vec__) ) 00090 00093 #define AKSIMD_STOREU_V4F32( __addr__, __vec__ ) _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) ) 00094 00097 #define AKSIMD_STORE1_V4F32( __addr__, __vec__ ) _mm_store_ss( (AkReal32*)(__addr__), (__vec__) ) 00098 00100 00101 00104 00105 00106 // Macro for shuffle parameter for AKSIMD_SHUFFLE_V4F32() (see _MM_SHUFFLE) 00107 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) ) 00108 00111 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00112 #define AKSIMD_SHUFFLE_V4F32( a, b, i ) _mm_shuffle_ps( a, b, i ) 00113 00119 #define AKSIMD_MOVEHL_V4F32( a, b ) _mm_movehl_ps( a, b ) 00120 00126 #define AKSIMD_MOVELH_V4F32( a, b ) _mm_movelh_ps( a, b ) 00127 00129 #define AKSIMD_SHUFFLE_BADC( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1)) 00130 00132 #define AKSIMD_SHUFFLE_CDAB( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2)) 00133 00135 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1)) 00136 00138 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00139 00141 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00142 00144 00145 00146 00149 00150 00153 #define AKSIMD_SUB_V4F32( a, b ) _mm_sub_ps( a, b ) 00154 00158 #define AKSIMD_SUB_SS_V4F32( a, b ) _mm_sub_ss( a, b ) 00159 00162 #define AKSIMD_ADD_V4F32( a, b ) _mm_add_ps( a, b ) 00163 00167 #define AKSIMD_ADD_SS_V4F32( a, b ) _mm_add_ss( a, b ) 00168 00171 #define AKSIMD_MUL_V4F32( a, b ) _mm_mul_ps( a, b ) 00172 00173 #define AKSIMD_DIV_V4F32( a, b ) _mm_div_ps( a, b ) 00174 00179 #define AKSIMD_MUL_SS_V4F32( a, b ) _mm_mul_ss( a, b ) 00180 00182 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) ) 00183 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) ) 00184 00186 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) ) 00187 00190 #define AKSIMD_MIN_V4F32( a, b ) _mm_min_ps( a, b ) 00191 00194 #define AKSIMD_MAX_V4F32( a, b ) _mm_max_ps( a, b ) 00195 00197 #define AKSIMD_ABS_V4F32( a ) _mm_andnot_ps(_mm_set1_ps(-0.f), a) 00198 00200 #define AKSIMD_NEG_V4F32( __a__ ) _mm_xor_ps(_mm_set1_ps(-0.f), __a__) 00201 00203 #define AKSIMD_SQRT_V4F32( __a__ ) _mm_sqrt_ps( (__a__) ) 00204 00209 static AkForceInline void AKSIMD_HORIZONTALADD(AKSIMD_V4F32 & vVec) 00210 { 00211 __m128 vHighLow = _mm_movehl_ps(vVec, vVec); 00212 vVec = _mm_add_ps(vVec, vHighLow); 00213 vHighLow = _mm_shuffle_ps(vVec, vVec, 0x55); 00214 vVec = _mm_add_ps(vVec, vHighLow); 00215 } 00216 00217 static AkForceInline AKSIMD_V4F32 AKSIMD_DOTPRODUCT( AKSIMD_V4F32 & vVec, const AKSIMD_V4F32 & vfSigns ) 00218 { 00219 AKSIMD_V4F32 vfDotProduct = AKSIMD_MUL_V4F32( vVec, vfSigns ); 00220 AKSIMD_HORIZONTALADD( vfDotProduct ); 00221 return AKSIMD_SHUFFLE_V4F32( vfDotProduct, vfDotProduct, AKSIMD_SHUFFLE(0,0,0,0) ); 00222 } 00223 00225 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00226 { 00227 static const AKSIMD_V4F32 vSign = { -1.f, 1.f, -1.f, 1.f }; 00228 00229 AKSIMD_V4F32 vTmp1 = _mm_shuffle_ps( vCIn1, vCIn1, _MM_SHUFFLE(2,2,0,0)); 00230 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00231 AKSIMD_V4F32 vTmp2 = _mm_shuffle_ps( vCIn1, vCIn1, _MM_SHUFFLE(3,3,1,1)); 00232 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00233 vTmp2 = AKSIMD_MADD_V4F32( vTmp2, AKSIMD_SHUFFLE_BADC( vCIn2 ), vTmp1 ); 00234 return vTmp2; 00235 } 00236 00237 #ifdef AK_SSE3 00238 00239 #include <pmmintrin.h> 00240 00242 static AKSIMD_V4F32 AKSIMD_COMPLEXMUL_SSE3( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00243 { 00244 AKSIMD_V4F32 vXMM0 = _mm_moveldup_ps(vCIn1); // multiplier real (a1, a1, a0, a0) 00245 vXMM0 = AKSIMD_MUL_V4F32(vXMM0, vCIn2); // temp1 (a1d1, a1c1, a0d0, a0c0) 00246 AKSIMD_V4F32 xMM1 = _mm_shuffle_ps(vCIn2, vCIn2, 0xB1); // shuf multiplicand(c1, d1, c0, d0) 00247 AKSIMD_V4F32 xMM2 = _mm_movehdup_ps(vCIn1); // multiplier imag (b1, b1, b0, b0) 00248 xMM2 = AKSIMD_MUL_V4F32( xMM2, xMM1); // temp2 (b1c1, b1d1, b0c0, b0d0) 00249 AKSIMD_V4F32 vCOut = _mm_addsub_ps(vXMM0, xMM2); // b1c1+a1d1, a1c1-b1d1, a0d0+b0d0, a0c0-b0c0 00250 return vCOut; 00251 } 00252 00253 #endif 00254 00255 #if defined _MSC_VER && ( _MSC_VER <= 1600 ) 00256 #define AKSIMD_ASSERTFLUSHZEROMODE AKASSERT( _MM_GET_FLUSH_ZERO_MODE(dummy) == _MM_FLUSH_ZERO_ON ) 00257 #else 00258 #define AKSIMD_ASSERTFLUSHZEROMODE AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON ) 00259 #endif 00260 00262 00263 00264 00267 00268 00270 #define AKSIMD_ADD_V4I32( a, b ) _mm_add_epi32( a, b ) 00271 00273 00274 00275 00278 00279 00282 #define AKSIMD_UNPACKLO_V4F32( a, b ) _mm_unpacklo_ps( a, b ) 00283 00286 #define AKSIMD_UNPACKHI_V4F32( a, b ) _mm_unpackhi_ps( a, b ) 00287 00289 00290 00294 00295 00296 #define AKSIMD_CMP_CTRLMASK __m128 00297 00299 #define AKSIMD_LTEQ_V4F32( __a__, __b__ ) _mm_cmple_ps( (__a__), (__b__) ) 00300 00302 #define AKSIMD_GTEQ_V4F32( __a__, __b__ ) _mm_cmpge_ps( (__a__), (__b__) ) 00303 00305 #define AKSIMD_EQ_V4F32( __a__, __b__ ) _mm_cmpeq_ps( (__a__), (__b__) ) 00306 00308 static AkForceInline AKSIMD_V4F32 AKSIMD_VSEL_V4F32( AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask ) 00309 { 00310 vB = _mm_and_ps( vB, vMask ); 00311 vA= _mm_andnot_ps( vMask, vA ); 00312 return _mm_or_ps( vA, vB ); 00313 } 00314 00315 // (cond1 >= cond2) ? b : a. 00316 #define AKSIMD_SEL_GTEQ_V4F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) ) 00317 00318 // a >= 0 ? b : c ... Written, like, you know, the normal C++ operator syntax. 00319 #define AKSIMD_SEL_GTEZ_V4F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) ) 00320 00321 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx)) 00322 00324 00325 00326 #include <emmintrin.h> 00327 00328 typedef __m128i AKSIMD_V4I32; 00329 00330 typedef AKSIMD_V4I32 AKSIMD_V4ICOND; 00331 00333 #define AKSIMD_LOADU_V4I32( __addr__ ) _mm_loadu_si128( (__addr__) ) 00334 00336 #define AKSIMD_LOAD_V4I32( __addr__ ) _mm_load_si128( (__addr__) ) 00337 00339 #define AKSIMD_SETZERO_V4I32() _mm_setzero_si128() 00340 00341 #define AKSIMD_SET_V4I32( __scalar__ ) _mm_set1_epi32( (__scalar__) ) 00342 00343 #define AKSIMD_SETV_V4I32( _d, _c, _b, _a ) _mm_set_epi32( (_d), (_c), (_b), (_a) ) 00344 00346 #define AKSIMD_STORE_V4I32( __addr__, __vec__ ) _mm_store_si128( (__addr__), (__vec__) ) 00347 00350 #define AKSIMD_STOREU_V4I32( __addr__, __vec__ ) _mm_storeu_si128( (__addr__), (__vec__) ) 00351 00354 00355 00358 #define AKSIMD_CONVERT_V4I32_TO_V4F32( __vec__ ) _mm_cvtepi32_ps( (__vec__) ) 00359 00362 #define AKSIMD_CONVERT_V4F32_TO_V4I32( __vec__ ) _mm_cvtps_epi32( (__vec__) ) 00363 00366 #define AKSIMD_TRUNCATE_V4F32_TO_V4I32( __vec__ ) _mm_cvttps_epi32( (__vec__) ) 00367 00370 #define AKSIMD_AND_V4I32( __a__, __b__ ) _mm_and_si128( (__a__), (__b__) ) 00371 00374 #define AKSIMD_CMPGT_V8I16( __a__, __b__ ) _mm_cmpgt_epi16( (__a__), (__b__) ) 00375 00377 00378 00381 #define AKSIMD_UNPACKLO_VECTOR8I16( a, b ) _mm_unpacklo_epi16( a, b ) 00382 00385 #define AKSIMD_UNPACKHI_VECTOR8I16( a, b ) _mm_unpackhi_epi16( a, b ) 00386 00389 #define AKSIMD_PACKS_V4I32( a, b ) _mm_packs_epi32( a, b ) 00390 00393 00394 00397 #define AKSIMD_SHIFTLEFT_V4I32( __vec__, __shiftBy__ ) \ 00398 _mm_slli_epi32( (__vec__), (__shiftBy__) ) 00399 00402 #define AKSIMD_SHIFTRIGHTARITH_V4I32( __vec__, __shiftBy__ ) \ 00403 _mm_srai_epi32( (__vec__), (__shiftBy__) ) 00404 00406 00407 00408 #if defined( AK_CPU_X86 ) && !defined(AK_IOS) /// MMX 00409 00410 typedef __m64 AKSIMD_V2F32; 00411 00412 #endif 00413 00414 00415 #endif //_AK_SIMD_SSE_H_
Des questions ? Des problèmes ? Besoin de plus d'informations ? Contactez-nous, nous pouvons vous aider !
Visitez notre page d'AideEnregistrez votre projet et nous vous aiderons à démarrer sans aucune obligation !
Partir du bon pied avec Wwise