Version

menu_open

include/AK/SoundEngine/Platforms/SSE/AkSimd.h File Reference

Go to the source code of this file.

Defines

#define  AKSIMD_LOADU_V4I32(__addr__)   _mm_loadu_si128( (__addr__) )
  Loads unaligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_LOAD_V4I32(__addr__)   _mm_load_si128( (__addr__) )
  Loads aligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_SETZERO_V4I32()   _mm_setzero_si128()
  Sets the four 32-bit integer values to zero (see _mm_setzero_si128).
#define  AKSIMD_SET_V4I32(__scalar__)   _mm_set1_epi32( (__scalar__) )
#define  AKSIMD_SETV_V4I32(_d, _c, _b, _a)   _mm_set_epi32( (_d), (_c), (_b), (_a) )
#define  AKSIMD_STORE_V4I32(__addr__, __vec__)   _mm_store_si128( (__addr__), (__vec__) )
  Stores four 32-bit integer values.
#define  AKSIMD_STOREU_V4I32(__addr__, __vec__)   _mm_storeu_si128( (__addr__), (__vec__) )
#define  AKSIMD_UNPACKLO_VECTOR8I16(a, b)   _mm_unpacklo_epi16( a, b )
#define  AKSIMD_UNPACKHI_VECTOR8I16(a, b)   _mm_unpackhi_epi16( a, b )
#define  AKSIMD_PACKS_V4I32(a, b)   _mm_packs_epi32( a, b )
Platform specific defines for prefetching

#define  AKSIMD_ARCHCACHELINESIZE   (64)
  Assumed cache line width for architectures on this platform.
#define  AKSIMD_ARCHMAXPREFETCHSIZE   (512)
#define  AKSIMD_PREFETCHMEMORY(__offset__, __add__)   _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA )
  Cross-platform memory prefetch of effective address assuming non-temporal data.
Platform specific memory size alignment for allocation purposes

#define  AKSIMD_ALIGNSIZE(__Size__)   (((__Size__) + 15) & ~15)
AKSIMD loading / setting

#define  AKSIMD_LOAD_V4F32(__addr__)   _mm_load_ps( (AkReal32*)(__addr__) )
  Loads four single-precision, floating-point values (see _mm_load_ps).
#define  AKSIMD_LOADU_V4F32(__addr__)   _mm_loadu_ps( (__addr__) )
#define  AKSIMD_LOAD1_V4F32(__scalar__)   _mm_load1_ps( &(__scalar__) )
#define  AKSIMD_SET_V4F32(__scalar__)   _mm_set_ps1( (__scalar__) )
#define  AKSIMD_SETZERO_V4F32()   _mm_setzero_ps()
#define  AKSIMD_LOAD_SS_V4F32(__addr__)   _mm_load_ss( (__addr__) )
AKSIMD storing

#define  AKSIMD_STORE_V4F32(__addr__, __vec__)   _mm_store_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STOREU_V4F32(__addr__, __vec__)   _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STORE1_V4F32(__addr__, __vec__)   _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
AKSIMD shuffling

#define  AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)   _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
#define  AKSIMD_SHUFFLE_V4F32(a, b, i)   _mm_shuffle_ps( a, b, i )
#define  AKSIMD_MOVEHL_V4F32(a, b)   _mm_movehl_ps( a, b )
#define  AKSIMD_MOVELH_V4F32(a, b)   _mm_movelh_ps( a, b )
#define  AKSIMD_SHUFFLE_BADC(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
  Swap the 2 lower floats together and the 2 higher floats together.
#define  AKSIMD_SHUFFLE_CDAB(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
  Swap the 2 lower floats with the 2 higher floats.
#define  AKSIMD_SHUFFLE_BCDA(__a__)   AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
  Barrel-shift all floats by one.
#define  AKSIMD_DUP_ODD(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
  Duplicates the odd items into the even items (d c b a -> d d b b ).
#define  AKSIMD_DUP_EVEN(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
  Duplicates the even items into the odd items (d c b a -> c c a a ).
AKSIMD integer arithmetic

#define  AKSIMD_ADD_V4I32(a, b)   _mm_add_epi32( a, b )
  Adds the four integer values of a and b.
#define  AKSIMD_CMPLT_V4I32(a, b)   _mm_cmplt_epi32(a,b)
#define  AKSIMD_CMPGT_V4I32(a, b)   _mm_cmpgt_epi32(a,b)
#define  AKSIMD_XOR_V4I32(a, b)   _mm_xor_si128(a,b)
#define  AKSIMD_XOR_V4F32(a, b)   _mm_xor_ps(a,b)
#define  AKSIMD_SUB_V4I32(a, b)   _mm_sub_epi32(a,b)
#define  AKSIMD_MULLO16_V4I32(a, b)   _mm_mullo_epi16(a, b)
  Multiplies the low 16bits of a by b and stores it in V4I32 (no overflow).
AKSIMD packing / unpacking

#define  AKSIMD_UNPACKLO_V4F32(a, b)   _mm_unpacklo_ps( a, b )
#define  AKSIMD_UNPACKHI_V4F32(a, b)   _mm_unpackhi_ps( a, b )
AKSIMD conversion

#define  AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__)   _mm_cvtepi32_ps( (__vec__) )
#define  AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__)   _mm_cvtps_epi32( (__vec__) )
#define  AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__)   _mm_cvttps_epi32( (__vec__) )
#define  AKSIMD_AND_V4I32(__a__, __b__)   _mm_and_si128( (__a__), (__b__) )
#define  AKSIMD_CMPGT_V8I16(__a__, __b__)   _mm_cmpgt_epi16( (__a__), (__b__) )
AKSIMD shifting

#define  AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__)   _mm_slli_epi32( (__vec__), (__shiftBy__) )
#define  AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__)   _mm_srai_epi32( (__vec__), (__shiftBy__) )

Typedefs

typedef __m128i  AKSIMD_V4I32
  Vector of 4 32-bit signed integers.
typedef AKSIMD_V4I32  AKSIMD_V4ICOND
AKSIMD types

typedef float  AKSIMD_F32
  32-bit float
typedef __m128  AKSIMD_V4F32
  Vector of 4 32-bit floats.
typedef AKSIMD_V4F32  AKSIMD_V4COND
  Vector of 4 comparison results.
typedef AKSIMD_V4F32  AKSIMD_V4FCOND
  Vector of 4 comparison results.

AKSIMD arithmetic



#define  AKSIMD_SUB_V4F32(a, b)   _mm_sub_ps( a, b )
#define  AKSIMD_SUB_SS_V4F32(a, b)   _mm_sub_ss( a, b )
#define  AKSIMD_ADD_V4F32(a, b)   _mm_add_ps( a, b )
#define  AKSIMD_ADD_SS_V4F32(a, b)   _mm_add_ss( a, b )
#define  AKSIMD_MUL_V4F32(a, b)   _mm_mul_ps( a, b )
#define  AKSIMD_DIV_V4F32(a, b)   _mm_div_ps( a, b )
#define  AKSIMD_MUL_SS_V4F32(a, b)   _mm_mul_ss( a, b )
#define  AKSIMD_MADD_V4F32(__a__, __b__, __c__)   _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MSUB_V4F32(__a__, __b__, __c__)   _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
#define  AKSIMD_MADD_SS_V4F32(__a__, __b__, __c__)   _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MIN_V4F32(a, b)   _mm_min_ps( a, b )
#define  AKSIMD_MAX_V4F32(a, b)   _mm_max_ps( a, b )
#define  AKSIMD_ABS_V4F32(a)   _mm_andnot_ps(_mm_set1_ps(-0.f), a)
  Computes the absolute value.
#define  AKSIMD_NEG_V4F32(__a__)   _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
  Changes the sign.
#define  AKSIMD_SQRT_V4F32(__a__)   _mm_sqrt_ps( (__a__) )
  Vector square root aproximation (see _mm_sqrt_ps).
#define  AKSIMD_ASSERTFLUSHZEROMODE   AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
static AkForceInline void  AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec)
static AkForceInline AKSIMD_V4F32  AKSIMD_DOTPRODUCT (AKSIMD_V4F32 &vVec, const AKSIMD_V4F32 &vfSigns)
static AkForceInline AKSIMD_V4F32  AKSIMD_COMPLEXMUL (const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
  Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts.

AKSIMD vector comparison

Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.



#define  AKSIMD_CMP_CTRLMASK   __m128
#define  AKSIMD_LTEQ_V4F32(__a__, __b__)   _mm_cmple_ps( (__a__), (__b__) )
  Vector "<=" operation (see _mm_cmple_ps).
#define  AKSIMD_GTEQ_V4F32(__a__, __b__)   _mm_cmpge_ps( (__a__), (__b__) )
  Vector ">=" operation (see _mm_cmple_ps).
#define  AKSIMD_EQ_V4F32(__a__, __b__)   _mm_cmpeq_ps( (__a__), (__b__) )
  Vector "==" operation (see _mm_cmpeq_ps).
#define  AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__)   AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
#define  AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__)   AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
#define  AKSIMD_SPLAT_V4F32(var, idx)   AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
#define  AKSIMD_MASK_V4F32(__a__)   _mm_movemask_ps( __a__ )
static AkForceInline AKSIMD_V4F32  AKSIMD_VSEL_V4F32 (AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask)
  Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations.

Detailed Description

AKSIMD - SSE implementation

Definition in file AkSimd.h.


Was this page helpful?

Need Support?

Questions? Problems? Need more info? Contact us, and we can help!

Visit our Support page

Tell us about your project. We're here to help.

Register your project and we'll help you get started with no strings attached!

Get started with Wwise