include/AK/SoundEngine/Platforms/SSE/AkSimd.h File Reference

Go to the source code of this file.

Defines
#define	AKSIMD_LOADU_V4I32(__addr__) _mm_loadu_si128( (__addr__) )
	Loads unaligned 128-bit value (see _mm_loadu_si128).
#define	AKSIMD_LOAD_V4I32(__addr__) _mm_load_si128( (__addr__) )
	Loads aligned 128-bit value (see _mm_loadu_si128).
#define	AKSIMD_SETZERO_V4I32() _mm_setzero_si128()
	Sets the four 32-bit integer values to zero (see _mm_setzero_si128).
#define	AKSIMD_SET_V4I32(__scalar__) _mm_set1_epi32( (__scalar__) )
#define	AKSIMD_SETV_V4I32(_d, _c, _b, _a) _mm_set_epi32( (_d), (_c), (_b), (_a) )
#define	AKSIMD_STORE_V4I32(__addr__, __vec__) _mm_store_si128( (__addr__), (__vec__) )
	Stores four 32-bit integer values.
#define	AKSIMD_STOREU_V4I32(__addr__, __vec__) _mm_storeu_si128( (__addr__), (__vec__) )
#define	AKSIMD_UNPACKLO_VECTOR8I16(a, b) _mm_unpacklo_epi16( a, b )
#define	AKSIMD_UNPACKHI_VECTOR8I16(a, b) _mm_unpackhi_epi16( a, b )
#define	AKSIMD_PACKS_V4I32(a, b) _mm_packs_epi32( a, b )
Platform specific defines for prefetching

#define	AKSIMD_ARCHCACHELINESIZE (64)
	Assumed cache line width for architectures on this platform.
#define	AKSIMD_ARCHMAXPREFETCHSIZE (512)
#define	AKSIMD_PREFETCHMEMORY(__offset__, __add__) _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA )
	Cross-platform memory prefetch of effective address assuming non-temporal data.
Platform specific memory size alignment for allocation purposes

#define	AKSIMD_ALIGNSIZE(__Size__) (((__Size__) + 15) & ~15)
AKSIMD loading / setting

#define	AKSIMD_LOAD_V4F32(__addr__) _mm_load_ps( (AkReal32*)(__addr__) )
	Loads four single-precision, floating-point values (see _mm_load_ps).
#define	AKSIMD_LOADU_V4F32(__addr__) _mm_loadu_ps( (__addr__) )
#define	AKSIMD_LOAD1_V4F32(__scalar__) _mm_load1_ps( &(__scalar__) )
#define	AKSIMD_SET_V4F32(__scalar__) _mm_set_ps1( (__scalar__) )
#define	AKSIMD_SETZERO_V4F32() _mm_setzero_ps()
#define	AKSIMD_LOAD_SS_V4F32(__addr__) _mm_load_ss( (__addr__) )
AKSIMD storing

#define	AKSIMD_STORE_V4F32(__addr__, __vec__) _mm_store_ps( (AkReal32*)(__addr__), (__vec__) )
#define	AKSIMD_STOREU_V4F32(__addr__, __vec__) _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
#define	AKSIMD_STORE1_V4F32(__addr__, __vec__) _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
AKSIMD shuffling

#define	AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0) _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
#define	AKSIMD_SHUFFLE_V4F32(a, b, i) _mm_shuffle_ps( a, b, i )
#define	AKSIMD_MOVEHL_V4F32(a, b) _mm_movehl_ps( a, b )
#define	AKSIMD_MOVELH_V4F32(a, b) _mm_movelh_ps( a, b )
#define	AKSIMD_SHUFFLE_BADC(__a__) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
	Swap the 2 lower floats together and the 2 higher floats together.
#define	AKSIMD_SHUFFLE_CDAB(__a__) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
	Swap the 2 lower floats with the 2 higher floats.
#define	AKSIMD_SHUFFLE_BCDA(__a__) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
	Barrel-shift all floats by one.
#define	AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
	Duplicates the odd items into the even items (d c b a -> d d b b ).
#define	AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
	Duplicates the even items into the odd items (d c b a -> c c a a ).
AKSIMD integer arithmetic

#define	AKSIMD_ADD_V4I32(a, b) _mm_add_epi32( a, b )
	Adds the four integer values of a and b.
#define	AKSIMD_CMPLT_V4I32(a, b) _mm_cmplt_epi32(a,b)
#define	AKSIMD_CMPGT_V4I32(a, b) _mm_cmpgt_epi32(a,b)
#define	AKSIMD_XOR_V4I32(a, b) _mm_xor_si128(a,b)
#define	AKSIMD_XOR_V4F32(a, b) _mm_xor_ps(a,b)
#define	AKSIMD_SUB_V4I32(a, b) _mm_sub_epi32(a,b)
#define	AKSIMD_MULLO16_V4I32(a, b) _mm_mullo_epi16(a, b)
	Multiplies the low 16bits of a by b and stores it in V4I32 (no overflow).
AKSIMD packing / unpacking

#define	AKSIMD_UNPACKLO_V4F32(a, b) _mm_unpacklo_ps( a, b )
#define	AKSIMD_UNPACKHI_V4F32(a, b) _mm_unpackhi_ps( a, b )
AKSIMD conversion

#define	AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__) _mm_cvtepi32_ps( (__vec__) )
#define	AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__) _mm_cvtps_epi32( (__vec__) )
#define	AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__) _mm_cvttps_epi32( (__vec__) )
#define	AKSIMD_AND_V4I32(__a__, __b__) _mm_and_si128( (__a__), (__b__) )
#define	AKSIMD_CMPGT_V8I16(__a__, __b__) _mm_cmpgt_epi16( (__a__), (__b__) )
AKSIMD shifting

#define	AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__) _mm_slli_epi32( (__vec__), (__shiftBy__) )
#define	AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__) _mm_srai_epi32( (__vec__), (__shiftBy__) )
Typedefs
typedef __m128i	AKSIMD_V4I32
	Vector of 4 32-bit signed integers.
typedef AKSIMD_V4I32	AKSIMD_V4ICOND
AKSIMD types

typedef float	AKSIMD_F32
	32-bit float
typedef __m128	AKSIMD_V4F32
	Vector of 4 32-bit floats.
typedef AKSIMD_V4F32	AKSIMD_V4COND
	Vector of 4 comparison results.
typedef AKSIMD_V4F32	AKSIMD_V4FCOND
	Vector of 4 comparison results.
AKSIMD arithmetic

#define	AKSIMD_SUB_V4F32(a, b) _mm_sub_ps( a, b )
#define	AKSIMD_SUB_SS_V4F32(a, b) _mm_sub_ss( a, b )
#define	AKSIMD_ADD_V4F32(a, b) _mm_add_ps( a, b )
#define	AKSIMD_ADD_SS_V4F32(a, b) _mm_add_ss( a, b )
#define	AKSIMD_MUL_V4F32(a, b) _mm_mul_ps( a, b )
#define	AKSIMD_DIV_V4F32(a, b) _mm_div_ps( a, b )
#define	AKSIMD_MUL_SS_V4F32(a, b) _mm_mul_ss( a, b )
#define	AKSIMD_MADD_V4F32(__a__, __b__, __c__) _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
	Vector multiply-add operation.
#define	AKSIMD_MSUB_V4F32(__a__, __b__, __c__) _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
#define	AKSIMD_MADD_SS_V4F32(__a__, __b__, __c__) _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
	Vector multiply-add operation.
#define	AKSIMD_MIN_V4F32(a, b) _mm_min_ps( a, b )
#define	AKSIMD_MAX_V4F32(a, b) _mm_max_ps( a, b )
#define	AKSIMD_ABS_V4F32(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
	Computes the absolute value.
#define	AKSIMD_NEG_V4F32(__a__) _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
	Changes the sign.
#define	AKSIMD_SQRT_V4F32(__a__) _mm_sqrt_ps( (__a__) )
	Vector square root aproximation (see _mm_sqrt_ps).
#define	AKSIMD_ASSERTFLUSHZEROMODE AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
static AkForceInline void	AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec)
static AkForceInline AKSIMD_V4F32	AKSIMD_DOTPRODUCT (AKSIMD_V4F32 &vVec, const AKSIMD_V4F32 &vfSigns)
static AkForceInline AKSIMD_V4F32	AKSIMD_COMPLEXMUL (const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
	Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts.
AKSIMD vector comparison
Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.
#define	AKSIMD_CMP_CTRLMASK __m128
#define	AKSIMD_LTEQ_V4F32(__a__, __b__) _mm_cmple_ps( (__a__), (__b__) )
	Vector "<=" operation (see _mm_cmple_ps).
#define	AKSIMD_GTEQ_V4F32(__a__, __b__) _mm_cmpge_ps( (__a__), (__b__) )
	Vector ">=" operation (see _mm_cmple_ps).
#define	AKSIMD_EQ_V4F32(__a__, __b__) _mm_cmpeq_ps( (__a__), (__b__) )
	Vector "==" operation (see _mm_cmpeq_ps).
#define	AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
#define	AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
#define	AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
#define	AKSIMD_MASK_V4F32(__a__) _mm_movemask_ps( __a__ )
static AkForceInline AKSIMD_V4F32	AKSIMD_VSEL_V4F32 (AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask)
	Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations.

Detailed Description

AKSIMD - SSE implementation

Definition in file AkSimd.h.

Was this page helpful?

Need Support?

Questions? Problems? Need more info? Contact us, and we can help!

Visit our Support page

Tell us about your project. We're here to help.

Get started with Wwise

Wwise SDK 2017.1.9

include/AK/SoundEngine/Platforms/SSE/AkSimd.h

include/AK/SoundEngine/Platforms/SSE/AkSimd.h File Reference

Defines

Typedefs

AKSIMD arithmetic

AKSIMD vector comparison

Detailed Description

Was this page helpful?

Need Support?

Tell us about your project. We're here to help.