Wwise SDK
_ak_simd_avx_8h_source
버전
menu_open
link
Wwise SDK 2023.1.3
|
AkSimdAvx.h
이 파일의 문서화 페이지로 가기
80 #define AKSIMD_SETV_V8F32( _h, _g, _f, _e, _d, _c, _b, _a ) _mm256_set_ps( (_h), (_g), (_f), (_e), (_d), (_c), (_b), (_a) )
83 #define AKSIMD_SETV_V4F64( _d, _c, _b, _a ) _mm256_castpd_ps( _mm256_set_pd( (_d), (_c), (_b), (_a) ) )
95 /// Note that this should be utilized instead of, e.g. adding & utilizing a macro "AKSIMD_INSERT_V8I32(m, i, idx)"
96 /// Because there is no direct corresponding instruction for an insert into 256. You should load into 128s
97 /// and use that. Some compilers do not handle _mm256_insert_epi32 (etc) well, or even include them
118 #define AKSIMD_STORE_V8F32( __addr__, __vec__ ) _mm256_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
122 #define AKSIMD_STORE1_V8F32( __addr__, __vec__ ) _mm_store_ss( (AkReal32*)(__addr__), _mm256_castps256_ps128( (__vec__) ) )
138 /// For each 128b lane, Swap the 2 lower floats together and the 2 higher floats together. ( h g f e d c b a -> g h e f c d a b )
139 #define AKSIMD_SHUFFLE_V8_BADC( __a__ ) AKSIMD_SHUFFLE_V8F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1))
141 /// For each 128b lane, Swap the 2 lower floats with the 2 higher floats. ( h g f e d c b a -> f e h g b a d c )
142 #define AKSIMD_SHUFFLE_V8_CDAB( __a__ ) AKSIMD_SHUFFLE_V8F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2))
145 #define AKSIMD_SHUFFLE_V8_BCDA( __a__ ) AKSIMD_SHUFFLE_V8F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1))
147 /// For each 128b lane, duplicates the odd items into the even items ( h g f e d c b a -> h h f f d d b b )
150 /// For each 128b lane, duplicates the even items into the odd items ( h g f e d c b a -> g g e e c c a a )
153 /// Shuffle 32-bit integers in a within 128-bit lanes using the control in i, and return the results
154 #define AKSIMD_SHUFFLE_V8I32( a, b, i ) _mm256_castps_si256(_mm256_shuffle_ps( _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), i ))
156 /// single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
166 /// Selects the lower of each of the 128b lanes in a and b to be the result ( B A ), ( D C ) -> ( C A )
167 #define AKSIMD_DEINTERLEAVELANES_LO_V8F32( a, b ) AKSIMD_PERMUTE_2X128_V8F32(a, b, AKSIMD_PERMUTE128(2, 0))
169 /// Selects the higher of each of the 128b lanes in a and b to be the result ( B A ), ( D C) -> ( D B )
170 #define AKSIMD_DEINTERLEAVELANES_HI_V8F32( a, b ) AKSIMD_PERMUTE_2X128_V8F32(a, b, AKSIMD_PERMUTE128(3, 1))
180 AkForceInline void AKSIMD_TRANSPOSE8X4_V8F32(AKSIMD_V8F32& A, AKSIMD_V8F32& B, AKSIMD_V8F32& C, AKSIMD_V8F32& D)
209 #define AKSIMD_SUB_SS_V8F32( a, b ) _mm256_sub_ps( a, _mm256_and_ps(b, _mm256_setr_epi32( -1, 0, 0, 0, 0, 0, 0, 0 ) ) )
222 #define AKSIMD_ADD_SS_V8F32( a, b ) _mm256_add_ps( a, _mm256_and_ps(b, _mm256_setr_epi32( -1, 0, 0, 0, 0, 0, 0, 0 ) ) )
234 #define AKSIMD_MUL_SS_V8F32( a, b ) _mm256_mul_ps( a, _mm256_blend_ps(b, _mm256_set1_ps(1.0f), 0xfe ) )
268 /// horizontal add across the entire vector - vVec will be updated to contain the sum of every input element of vVec
283 /// Cross-platform SIMD multiplication of 8 complex data elements with interleaved real and imaginary parts
284 static AkForceInline AKSIMD_V8F32 AKSIMD_COMPLEXMUL_V8F32(const AKSIMD_V8F32 cIn1, const AKSIMD_V8F32 cIn2)
287 __m256 in2Shuf = _mm256_shuffle_ps(cIn2, cIn2, 0xB1); // shuf multiplicand (c3, d3, c2, d2, c1, d1, c0, d0)
289 __m256 temp = _mm256_mul_ps(imag1Ext, in2Shuf); // temp (b3c3, b3d3, b2c2, b2d2, b1c1, b1d1, b0c0, b0d0)
291 __m256 out = _mm256_addsub_ps(mul, temp); // final (a3d3+b3c3, a3c3-b3d3, a2d2+b2c2, a2c2-b2d2, a1d1+b1c1, a1c1-b1d1, a0d0+b0c0, a0c0-b0d0)
335 /// Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations
336 static AkForceInline AKSIMD_V8F32 AKSIMD_VSEL_V8F32( AKSIMD_V8F32 vA, AKSIMD_V8F32 vB, AKSIMD_V8F32 vMask )
342 #define AKSIMD_SEL_GTEQ_V8F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V8F32( __a__, __b__, AKSIMD_GTEQ_V8F32( __cond1__, __cond2__ ) )
345 #define AKSIMD_SEL_GTEZ_V8F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V8F32( (__c__), (__b__), AKSIMD_GTEQ_V8F32( __a__, _mm256_set1_ps(0) ) )
347 #define AKSIMD_SPLAT_V8F32(var, idx) AKSIMD_SHUFFLE_V8F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
373 #define AKSIMD_SETV_V8I32( _h, _g, _f, _e, _d, _c, _b, _a ) _mm256_set_epi32( (_h), (_g), (_f), (_e), (_d), (_c), (_b), (_a) )
376 /// Note that this should be utilized instead of, e.g. adding & utilizing a macro "AKSIMD_INSERT_V8I32(m, i, idx)"
377 /// Because there is no direct corresponding instruction for an insert into 256. You should load into 128s
378 /// and use that. Some compilers do not handle _mm256_insert_epi32 (etc) well, or even include them
이 페이지가 도움이 되었나요?
작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.
프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.
Wwise를 시작해 보세요