버전
menu_open
link

include/AK/SoundEngine/Platforms/SSE/AkSimd.h

Go to the documentation of this file.
00001 
00002 //
00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved
00004 //
00006 
00007 // AkSimd.h
00008 
00011 
00012 #ifndef _AK_SIMD_SSE_H_
00013 #define _AK_SIMD_SSE_H_
00014 
00015 #include <AK/SoundEngine/Common/AkTypes.h>
00016 #include <xmmintrin.h>
00017 
00020 
00021 
00022 #define AKSIMD_ARCHCACHELINESIZE    (64)                ///< Assumed cache line width for architectures on this platform
00023 #define AKSIMD_ARCHMAXPREFETCHSIZE  (512)               ///< Use this to control how much prefetching maximum is desirable (assuming 8-way cache)       
00024 
00025 #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ ) _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA ) 
00026 
00028 
00029 
00032 
00033 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
00034 
00035 
00036 
00039 
00040 
00041 typedef float   AKSIMD_F32;     
00042 typedef __m128  AKSIMD_V4F32;   
00043 typedef AKSIMD_V4F32 AKSIMD_V4COND;  
00044 typedef AKSIMD_V4F32 AKSIMD_V4FCOND;     
00045 #define AKSIMD_V4F32_SUPPORTED
00046 
00048 
00049 
00050 
00053 
00054 
00056 #define AKSIMD_LOAD_V4F32( __addr__ ) _mm_load_ps( (AkReal32*)(__addr__) )
00057 
00060 #define AKSIMD_LOADU_V4F32( __addr__ ) _mm_loadu_ps( (__addr__) )
00061 
00064 #define AKSIMD_LOAD1_V4F32( __scalar__ ) _mm_load1_ps( &(__scalar__) )
00065 
00068 #define AKSIMD_SET_V4F32( __scalar__ ) _mm_set_ps1( (__scalar__) )
00069 
00072 #define AKSIMD_SETZERO_V4F32() _mm_setzero_ps()
00073 
00077 #define AKSIMD_LOAD_SS_V4F32( __addr__ ) _mm_load_ss( (__addr__) )
00078 
00080 
00081 
00082 
00085 
00086 
00089 #define AKSIMD_STORE_V4F32( __addr__, __vec__ ) _mm_store_ps( (AkReal32*)(__addr__), (__vec__) )
00090 
00093 #define AKSIMD_STOREU_V4F32( __addr__, __vec__ ) _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
00094 
00097 #define AKSIMD_STORE1_V4F32( __addr__, __vec__ ) _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
00098 
00100 
00101 
00104 
00105 
00106 // Macro for shuffle parameter for AKSIMD_SHUFFLE_V4F32() (see _MM_SHUFFLE)
00107 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
00108 
00111 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) )
00112 #define AKSIMD_SHUFFLE_V4F32( a, b, i ) _mm_shuffle_ps( a, b, i )
00113 
00119 #define AKSIMD_MOVEHL_V4F32( a, b ) _mm_movehl_ps( a, b )
00120 
00126 #define AKSIMD_MOVELH_V4F32( a, b ) _mm_movelh_ps( a, b )
00127 
00129 #define AKSIMD_SHUFFLE_BADC( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
00130 
00132 #define AKSIMD_SHUFFLE_CDAB( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
00133 
00135 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
00136 
00138 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
00139 
00141 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
00142 
00144 
00145 
00146 
00149 
00150 
00153 #define AKSIMD_SUB_V4F32( a, b ) _mm_sub_ps( a, b )
00154 
00158 #define AKSIMD_SUB_SS_V4F32( a, b ) _mm_sub_ss( a, b )
00159 
00162 #define AKSIMD_ADD_V4F32( a, b ) _mm_add_ps( a, b )
00163 
00167 #define AKSIMD_ADD_SS_V4F32( a, b ) _mm_add_ss( a, b )
00168 
00171 #define AKSIMD_MUL_V4F32( a, b ) _mm_mul_ps( a, b )
00172 
00173 #define AKSIMD_DIV_V4F32( a, b ) _mm_div_ps( a, b )
00174 
00179 #define AKSIMD_MUL_SS_V4F32( a, b ) _mm_mul_ss( a, b )
00180 
00182 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
00183 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
00184 
00186 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
00187 
00190 #define AKSIMD_MIN_V4F32( a, b ) _mm_min_ps( a, b )
00191 
00194 #define AKSIMD_MAX_V4F32( a, b ) _mm_max_ps( a, b )
00195 
00197 #define AKSIMD_ABS_V4F32( a ) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
00198 
00200 #define AKSIMD_NEG_V4F32( __a__ ) _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
00201 
00203 #define AKSIMD_SQRT_V4F32( __a__ ) _mm_sqrt_ps( (__a__) )
00204 
00209 static AkForceInline void AKSIMD_HORIZONTALADD(AKSIMD_V4F32 & vVec)
00210 {   
00211     __m128 vHighLow = _mm_movehl_ps(vVec, vVec);
00212     vVec = _mm_add_ps(vVec, vHighLow);
00213     vHighLow = _mm_shuffle_ps(vVec, vVec, 0x55);
00214     vVec = _mm_add_ps(vVec, vHighLow);
00215 } 
00216 
00217 static AkForceInline AKSIMD_V4F32 AKSIMD_DOTPRODUCT( AKSIMD_V4F32 & vVec, const AKSIMD_V4F32 & vfSigns )
00218 {
00219     AKSIMD_V4F32 vfDotProduct = AKSIMD_MUL_V4F32( vVec, vfSigns );
00220     AKSIMD_HORIZONTALADD( vfDotProduct );
00221     return AKSIMD_SHUFFLE_V4F32( vfDotProduct, vfDotProduct, AKSIMD_SHUFFLE(0,0,0,0) );
00222 }
00223 
00225 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
00226 {
00227     static const AKSIMD_V4F32 vSign = { -1.f, 1.f, -1.f, 1.f }; 
00228 
00229     AKSIMD_V4F32 vTmp1 = _mm_shuffle_ps( vCIn1, vCIn1, _MM_SHUFFLE(2,2,0,0)); 
00230     vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
00231     AKSIMD_V4F32 vTmp2 = _mm_shuffle_ps( vCIn1, vCIn1, _MM_SHUFFLE(3,3,1,1)); 
00232     vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign );
00233     vTmp2 = AKSIMD_MADD_V4F32( vTmp2, AKSIMD_SHUFFLE_BADC( vCIn2 ), vTmp1 );
00234     return vTmp2;
00235 }
00236 
00237 #ifdef AK_SSE3
00238 
00239 #include <pmmintrin.h>
00240 
00242 static AKSIMD_V4F32 AKSIMD_COMPLEXMUL_SSE3( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
00243 {
00244     AKSIMD_V4F32 vXMM0 = _mm_moveldup_ps(vCIn1);    // multiplier real  (a1,   a1,   a0,   a0) 
00245     vXMM0 = AKSIMD_MUL_V4F32(vXMM0, vCIn2);         // temp1            (a1d1, a1c1, a0d0, a0c0) 
00246     AKSIMD_V4F32 xMM1 = _mm_shuffle_ps(vCIn2, vCIn2, 0xB1); // shuf multiplicand(c1,   d1,   c0,   d0)  
00247     AKSIMD_V4F32 xMM2 = _mm_movehdup_ps(vCIn1);     // multiplier imag  (b1,   b1,   b0,   b0) 
00248     xMM2 = AKSIMD_MUL_V4F32( xMM2, xMM1);           // temp2            (b1c1, b1d1, b0c0, b0d0) 
00249     AKSIMD_V4F32 vCOut = _mm_addsub_ps(vXMM0, xMM2);        // b1c1+a1d1, a1c1-b1d1, a0d0+b0d0, a0c0-b0c0 
00250     return vCOut;
00251 }
00252 
00253 #endif
00254 
00255 #if defined _MSC_VER && ( _MSC_VER <= 1600 )
00256     #define AKSIMD_ASSERTFLUSHZEROMODE  AKASSERT( _MM_GET_FLUSH_ZERO_MODE(dummy) == _MM_FLUSH_ZERO_ON )
00257 #else
00258     #define AKSIMD_ASSERTFLUSHZEROMODE  AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
00259 #endif
00260 
00262 
00263 
00264 
00267 
00268 
00270 #define AKSIMD_ADD_V4I32( a, b ) _mm_add_epi32( a, b )
00271 
00273 
00274 
00275 
00278 
00279 
00282 #define AKSIMD_UNPACKLO_V4F32( a, b ) _mm_unpacklo_ps( a, b )
00283 
00286 #define AKSIMD_UNPACKHI_V4F32( a, b ) _mm_unpackhi_ps( a, b )
00287 
00289 
00290 
00294 
00295 
00296 #define AKSIMD_CMP_CTRLMASK __m128
00297 
00299 #define AKSIMD_LTEQ_V4F32( __a__, __b__ ) _mm_cmple_ps( (__a__), (__b__) )
00300 
00302 #define AKSIMD_GTEQ_V4F32( __a__, __b__ ) _mm_cmpge_ps( (__a__), (__b__) )
00303 
00305 #define AKSIMD_EQ_V4F32( __a__, __b__ ) _mm_cmpeq_ps( (__a__), (__b__) )
00306 
00308 static AkForceInline AKSIMD_V4F32 AKSIMD_VSEL_V4F32( AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask )
00309 {
00310     vB = _mm_and_ps( vB, vMask );
00311     vA= _mm_andnot_ps( vMask, vA );
00312     return _mm_or_ps( vA, vB );
00313 }
00314 
00315 // (cond1 >= cond2) ? b : a.
00316 #define AKSIMD_SEL_GTEQ_V4F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
00317 
00318 // a >= 0 ? b : c ... Written, like, you know, the normal C++ operator syntax.
00319 #define AKSIMD_SEL_GTEZ_V4F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
00320 
00321 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
00322 
00324 
00325 
00326 #include <emmintrin.h>
00327 
00328 typedef __m128i AKSIMD_V4I32;   
00329 
00330 typedef AKSIMD_V4I32 AKSIMD_V4ICOND;
00331 
00333 #define AKSIMD_LOADU_V4I32( __addr__ ) _mm_loadu_si128( (__addr__) )
00334 
00336 #define AKSIMD_LOAD_V4I32( __addr__ ) _mm_load_si128( (__addr__) )
00337 
00339 #define AKSIMD_SETZERO_V4I32() _mm_setzero_si128()
00340 
00341 #define AKSIMD_SET_V4I32( __scalar__ ) _mm_set1_epi32( (__scalar__) )
00342 
00343 #define AKSIMD_SETV_V4I32( _d, _c, _b, _a ) _mm_set_epi32( (_d), (_c), (_b), (_a) )
00344 
00346 #define AKSIMD_STORE_V4I32( __addr__, __vec__ ) _mm_store_si128( (__addr__), (__vec__) )
00347 
00350 #define AKSIMD_STOREU_V4I32( __addr__, __vec__ ) _mm_storeu_si128( (__addr__), (__vec__) )
00351 
00354 
00355 
00358 #define AKSIMD_CONVERT_V4I32_TO_V4F32( __vec__ ) _mm_cvtepi32_ps( (__vec__) )
00359 
00362 #define AKSIMD_CONVERT_V4F32_TO_V4I32( __vec__ ) _mm_cvtps_epi32( (__vec__) )
00363 
00366 #define AKSIMD_TRUNCATE_V4F32_TO_V4I32( __vec__ ) _mm_cvttps_epi32( (__vec__) )
00367 
00370 #define AKSIMD_AND_V4I32( __a__, __b__ ) _mm_and_si128( (__a__), (__b__) )
00371 
00374 #define AKSIMD_CMPGT_V8I16( __a__, __b__ ) _mm_cmpgt_epi16( (__a__), (__b__) )
00375 
00377 
00378 
00381 #define AKSIMD_UNPACKLO_VECTOR8I16( a, b ) _mm_unpacklo_epi16( a, b )
00382 
00385 #define AKSIMD_UNPACKHI_VECTOR8I16( a, b ) _mm_unpackhi_epi16( a, b )
00386 
00389 #define AKSIMD_PACKS_V4I32( a, b ) _mm_packs_epi32( a, b )
00390 
00393 
00394 
00397 #define AKSIMD_SHIFTLEFT_V4I32( __vec__, __shiftBy__ ) \
00398     _mm_slli_epi32( (__vec__), (__shiftBy__) )
00399 
00402 #define AKSIMD_SHIFTRIGHTARITH_V4I32( __vec__, __shiftBy__ ) \
00403     _mm_srai_epi32( (__vec__), (__shiftBy__) )
00404 
00406 
00407 
00408 #if defined( AK_CPU_X86 ) && !defined(AK_IOS)   /// MMX
00409 
00410 typedef __m64   AKSIMD_V2F32;   
00411 
00412 #endif
00413 
00414 
00415 #endif //_AK_SIMD_SSE_H_

이 페이지가 도움이 되었나요?

지원이 필요하신가요?

질문이 있으신가요? 문제를 겪고 계신가요? 더 많은 정보가 필요하신가요? 저희에게 문의해주시면 도와드리겠습니다!

지원 페이지를 방문해 주세요

작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.

프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.

Wwise를 시작해 보세요