버전

menu_open

include/AK/SoundEngine/Platforms/SSE/AkSimd.h File Reference

Go to the source code of this file.

Defines

#define  AKSIMD_LOADU_V4I32(__addr__)   _mm_loadu_si128( (__addr__) )
  Loads unaligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_LOAD_V4I32(__addr__)   _mm_load_si128( (__addr__) )
  Loads aligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_SETZERO_V4I32()   _mm_setzero_si128()
  Sets the four 32-bit integer values to zero (see _mm_setzero_si128).
#define  AKSIMD_SET_V4I32(__scalar__)   _mm_set1_epi32( (__scalar__) )
#define  AKSIMD_SETV_V4I32(_d, _c, _b, _a)   _mm_set_epi32( (_d), (_c), (_b), (_a) )
#define  AKSIMD_STORE_V4I32(__addr__, __vec__)   _mm_store_si128( (__addr__), (__vec__) )
  Stores four 32-bit integer values.
#define  AKSIMD_STOREU_V4I32(__addr__, __vec__)   _mm_storeu_si128( (__addr__), (__vec__) )
#define  AKSIMD_UNPACKLO_VECTOR8I16(a, b)   _mm_unpacklo_epi16( a, b )
#define  AKSIMD_UNPACKHI_VECTOR8I16(a, b)   _mm_unpackhi_epi16( a, b )
#define  AKSIMD_PACKS_V4I32(a, b)   _mm_packs_epi32( a, b )
Platform specific defines for prefetching

#define  AKSIMD_ARCHCACHELINESIZE   (64)
  Assumed cache line width for architectures on this platform.
#define  AKSIMD_ARCHMAXPREFETCHSIZE   (512)
#define  AKSIMD_PREFETCHMEMORY(__offset__, __add__)   _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA )
  Cross-platform memory prefetch of effective address assuming non-temporal data.
Platform specific memory size alignment for allocation purposes

#define  AKSIMD_ALIGNSIZE(__Size__)   (((__Size__) + 15) & ~15)
AKSIMD loading / setting

#define  AKSIMD_LOAD_V4F32(__addr__)   _mm_load_ps( (AkReal32*)(__addr__) )
  Loads four single-precision, floating-point values (see _mm_load_ps).
#define  AKSIMD_LOADU_V4F32(__addr__)   _mm_loadu_ps( (__addr__) )
#define  AKSIMD_LOAD1_V4F32(__scalar__)   _mm_load1_ps( &(__scalar__) )
#define  AKSIMD_SET_V4F32(__scalar__)   _mm_set_ps1( (__scalar__) )
#define  AKSIMD_SETZERO_V4F32()   _mm_setzero_ps()
#define  AKSIMD_LOAD_SS_V4F32(__addr__)   _mm_load_ss( (__addr__) )
AKSIMD storing

#define  AKSIMD_STORE_V4F32(__addr__, __vec__)   _mm_store_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STOREU_V4F32(__addr__, __vec__)   _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STORE1_V4F32(__addr__, __vec__)   _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
AKSIMD shuffling

#define  AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)   _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
#define  AKSIMD_SHUFFLE_V4F32(a, b, i)   _mm_shuffle_ps( a, b, i )
#define  AKSIMD_MOVEHL_V4F32(a, b)   _mm_movehl_ps( a, b )
#define  AKSIMD_MOVELH_V4F32(a, b)   _mm_movelh_ps( a, b )
#define  AKSIMD_SHUFFLE_BADC(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
  Swap the 2 lower floats together and the 2 higher floats together.
#define  AKSIMD_SHUFFLE_CDAB(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
  Swap the 2 lower floats with the 2 higher floats.
#define  AKSIMD_SHUFFLE_BCDA(__a__)   AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
  Barrel-shift all floats by one.
#define  AKSIMD_DUP_ODD(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
  Duplicates the odd items into the even items (d c b a -> d d b b ).
#define  AKSIMD_DUP_EVEN(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
  Duplicates the even items into the odd items (d c b a -> c c a a ).
AKSIMD integer arithmetic

#define  AKSIMD_ADD_V4I32(a, b)   _mm_add_epi32( a, b )
  Adds the four integer values of a and b.
AKSIMD packing / unpacking

#define  AKSIMD_UNPACKLO_V4F32(a, b)   _mm_unpacklo_ps( a, b )
#define  AKSIMD_UNPACKHI_V4F32(a, b)   _mm_unpackhi_ps( a, b )
AKSIMD conversion

#define  AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__)   _mm_cvtepi32_ps( (__vec__) )
#define  AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__)   _mm_cvtps_epi32( (__vec__) )
#define  AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__)   _mm_cvttps_epi32( (__vec__) )
#define  AKSIMD_AND_V4I32(__a__, __b__)   _mm_and_si128( (__a__), (__b__) )
#define  AKSIMD_CMPGT_V8I16(__a__, __b__)   _mm_cmpgt_epi16( (__a__), (__b__) )
AKSIMD shifting

#define  AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__)   _mm_slli_epi32( (__vec__), (__shiftBy__) )
#define  AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__)   _mm_srai_epi32( (__vec__), (__shiftBy__) )

Typedefs

typedef __m128i  AKSIMD_V4I32
  Vector of 4 32-bit signed integers.
typedef AKSIMD_V4I32  AKSIMD_V4ICOND

AKSIMD types



#define  AKSIMD_V4F32_SUPPORTED
typedef float  AKSIMD_F32
  32-bit float
typedef __m128  AKSIMD_V4F32
  Vector of 4 32-bit floats.
typedef AKSIMD_V4F32  AKSIMD_V4COND
  Vector of 4 comparison results.
typedef AKSIMD_V4F32  AKSIMD_V4FCOND
  Vector of 4 comparison results.

AKSIMD arithmetic



#define  AKSIMD_SUB_V4F32(a, b)   _mm_sub_ps( a, b )
#define  AKSIMD_SUB_SS_V4F32(a, b)   _mm_sub_ss( a, b )
#define  AKSIMD_ADD_V4F32(a, b)   _mm_add_ps( a, b )
#define  AKSIMD_ADD_SS_V4F32(a, b)   _mm_add_ss( a, b )
#define  AKSIMD_MUL_V4F32(a, b)   _mm_mul_ps( a, b )
#define  AKSIMD_DIV_V4F32(a, b)   _mm_div_ps( a, b )
#define  AKSIMD_MUL_SS_V4F32(a, b)   _mm_mul_ss( a, b )
#define  AKSIMD_MADD_V4F32(__a__, __b__, __c__)   _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MSUB_V4F32(__a__, __b__, __c__)   _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
#define  AKSIMD_MADD_SS_V4F32(__a__, __b__, __c__)   _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MIN_V4F32(a, b)   _mm_min_ps( a, b )
#define  AKSIMD_MAX_V4F32(a, b)   _mm_max_ps( a, b )
#define  AKSIMD_ABS_V4F32(a)   _mm_andnot_ps(_mm_set1_ps(-0.f), a)
  Computes the absolute value.
#define  AKSIMD_NEG_V4F32(__a__)   _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
  Changes the sign.
#define  AKSIMD_SQRT_V4F32(__a__)   _mm_sqrt_ps( (__a__) )
  Vector square root aproximation (see _mm_sqrt_ps).
#define  AKSIMD_ASSERTFLUSHZEROMODE   AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
static AkForceInline void  AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec)
static AkForceInline AKSIMD_V4F32  AKSIMD_DOTPRODUCT (AKSIMD_V4F32 &vVec, const AKSIMD_V4F32 &vfSigns)
static AkForceInline AKSIMD_V4F32  AKSIMD_COMPLEXMUL (const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
  Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts.

AKSIMD vector comparison

Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.



#define  AKSIMD_CMP_CTRLMASK   __m128
#define  AKSIMD_LTEQ_V4F32(__a__, __b__)   _mm_cmple_ps( (__a__), (__b__) )
  Vector "<=" operation (see _mm_cmple_ps).
#define  AKSIMD_GTEQ_V4F32(__a__, __b__)   _mm_cmpge_ps( (__a__), (__b__) )
  Vector ">=" operation (see _mm_cmple_ps).
#define  AKSIMD_EQ_V4F32(__a__, __b__)   _mm_cmpeq_ps( (__a__), (__b__) )
  Vector "==" operation (see _mm_cmpeq_ps).
#define  AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__)   AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
#define  AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__)   AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
#define  AKSIMD_SPLAT_V4F32(var, idx)   AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
static AkForceInline AKSIMD_V4F32  AKSIMD_VSEL_V4F32 (AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask)
  Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations.

Detailed Description

AKSIMD - SSE implementation

Definition in file AkSimd.h.


이 페이지가 도움이 되었나요?

지원이 필요하신가요?

질문이 있으신가요? 문제를 겪고 계신가요? 더 많은 정보가 필요하신가요? 저희에게 문의해주시면 도와드리겠습니다!

지원 페이지를 방문해 주세요

작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.

프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.

Wwise를 시작해 보세요