目次

include/AK/SoundEngine/Platforms/SSE/AkSimd.h

ソースコードを見る。

マクロ定義

#define  AKSIMD_LOADU_V4I32(__addr__)   _mm_loadu_si128( (__addr__) )
  Loads unaligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_LOAD_V4I32(__addr__)   _mm_load_si128( (__addr__) )
  Loads aligned 128-bit value (see _mm_loadu_si128).
#define  AKSIMD_SETZERO_V4I32()   _mm_setzero_si128()
  Sets the four 32-bit integer values to zero (see _mm_setzero_si128).
#define  AKSIMD_SET_V4I32(__scalar__)   _mm_set1_epi32( (__scalar__) )
#define  AKSIMD_SETV_V4I32(_d, _c, _b, _a)   _mm_set_epi32( (_d), (_c), (_b), (_a) )
#define  AKSIMD_STORE_V4I32(__addr__, __vec__)   _mm_store_si128( (__addr__), (__vec__) )
  Stores four 32-bit integer values.
#define  AKSIMD_STOREU_V4I32(__addr__, __vec__)   _mm_storeu_si128( (__addr__), (__vec__) )
#define  AKSIMD_UNPACKLO_VECTOR8I16(a, b)   _mm_unpacklo_epi16( a, b )
#define  AKSIMD_UNPACKHI_VECTOR8I16(a, b)   _mm_unpackhi_epi16( a, b )
#define  AKSIMD_PACKS_V4I32(a, b)   _mm_packs_epi32( a, b )
Platform specific defines for prefetching

#define  AKSIMD_ARCHCACHELINESIZE   (64)
  Assumed cache line width for architectures on this platform.
#define  AKSIMD_ARCHMAXPREFETCHSIZE   (512)
#define  AKSIMD_PREFETCHMEMORY(__offset__, __add__)   _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA )
  Cross-platform memory prefetch of effective address assuming non-temporal data.
Platform specific memory size alignment for allocation purposes

#define  AKSIMD_ALIGNSIZE(__Size__)   (((__Size__) + 15) & ~15)
AKSIMD loading / setting

#define  AKSIMD_LOAD_V4F32(__addr__)   _mm_load_ps( (AkReal32*)(__addr__) )
  Loads four single-precision, floating-point values (see _mm_load_ps).
#define  AKSIMD_LOADU_V4F32(__addr__)   _mm_loadu_ps( (__addr__) )
#define  AKSIMD_LOAD1_V4F32(__scalar__)   _mm_load1_ps( &(__scalar__) )
#define  AKSIMD_SET_V4F32(__scalar__)   _mm_set_ps1( (__scalar__) )
#define  AKSIMD_SETZERO_V4F32()   _mm_setzero_ps()
#define  AKSIMD_LOAD_SS_V4F32(__addr__)   _mm_load_ss( (__addr__) )
AKSIMD storing

#define  AKSIMD_STORE_V4F32(__addr__, __vec__)   _mm_store_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STOREU_V4F32(__addr__, __vec__)   _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
#define  AKSIMD_STORE1_V4F32(__addr__, __vec__)   _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
AKSIMD shuffling

#define  AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)   _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
#define  AKSIMD_SHUFFLE_V4F32(a, b, i)   _mm_shuffle_ps( a, b, i )
#define  AKSIMD_MOVEHL_V4F32(a, b)   _mm_movehl_ps( a, b )
#define  AKSIMD_MOVELH_V4F32(a, b)   _mm_movelh_ps( a, b )
#define  AKSIMD_SHUFFLE_BADC(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
  Swap the 2 lower floats together and the 2 higher floats together.
#define  AKSIMD_SHUFFLE_CDAB(__a__)   _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
  Swap the 2 lower floats with the 2 higher floats.
#define  AKSIMD_SHUFFLE_BCDA(__a__)   AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
  Barrel-shift all floats by one.
#define  AKSIMD_DUP_ODD(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
  Duplicates the odd items into the even items (d c b a -> d d b b ).
#define  AKSIMD_DUP_EVEN(__vv)   AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
  Duplicates the even items into the odd items (d c b a -> c c a a ).
AKSIMD integer arithmetic

#define  AKSIMD_ADD_V4I32(a, b)   _mm_add_epi32( a, b )
  Adds the four integer values of a and b.
#define  AKSIMD_CMPLT_V4I32(a, b)   _mm_cmplt_epi32(a,b)
#define  AKSIMD_CMPGT_V4I32(a, b)   _mm_cmpgt_epi32(a,b)
#define  AKSIMD_XOR_V4I32(a, b)   _mm_xor_si128(a,b)
#define  AKSIMD_XOR_V4F32(a, b)   _mm_xor_ps(a,b)
#define  AKSIMD_SUB_V4I32(a, b)   _mm_sub_epi32(a,b)
#define  AKSIMD_MULLO16_V4I32(a, b)   _mm_mullo_epi16(a, b)
  Multiplies the low 16bits of a by b and stores it in V4I32 (no overflow).
AKSIMD packing / unpacking

#define  AKSIMD_UNPACKLO_V4F32(a, b)   _mm_unpacklo_ps( a, b )
#define  AKSIMD_UNPACKHI_V4F32(a, b)   _mm_unpackhi_ps( a, b )
AKSIMD conversion

#define  AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__)   _mm_cvtepi32_ps( (__vec__) )
#define  AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__)   _mm_cvtps_epi32( (__vec__) )
#define  AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__)   _mm_cvttps_epi32( (__vec__) )
#define  AKSIMD_AND_V4I32(__a__, __b__)   _mm_and_si128( (__a__), (__b__) )
#define  AKSIMD_CMPGT_V8I16(__a__, __b__)   _mm_cmpgt_epi16( (__a__), (__b__) )
AKSIMD shifting

#define  AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__)   _mm_slli_epi32( (__vec__), (__shiftBy__) )
#define  AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__)   _mm_srai_epi32( (__vec__), (__shiftBy__) )

型定義

typedef __m128i  AKSIMD_V4I32
  Vector of 4 32-bit signed integers.
typedef AKSIMD_V4I32  AKSIMD_V4ICOND
AKSIMD types

typedef float  AKSIMD_F32
  32-bit float
typedef __m128  AKSIMD_V4F32
  Vector of 4 32-bit floats.
typedef AKSIMD_V4F32  AKSIMD_V4COND
  Vector of 4 comparison results.
typedef AKSIMD_V4F32  AKSIMD_V4FCOND
  Vector of 4 comparison results.

AKSIMD arithmetic



#define  AKSIMD_SUB_V4F32(a, b)   _mm_sub_ps( a, b )
#define  AKSIMD_SUB_SS_V4F32(a, b)   _mm_sub_ss( a, b )
#define  AKSIMD_ADD_V4F32(a, b)   _mm_add_ps( a, b )
#define  AKSIMD_ADD_SS_V4F32(a, b)   _mm_add_ss( a, b )
#define  AKSIMD_MUL_V4F32(a, b)   _mm_mul_ps( a, b )
#define  AKSIMD_DIV_V4F32(a, b)   _mm_div_ps( a, b )
#define  AKSIMD_MUL_SS_V4F32(a, b)   _mm_mul_ss( a, b )
#define  AKSIMD_MADD_V4F32(__a__, __b__, __c__)   _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MSUB_V4F32(__a__, __b__, __c__)   _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
#define  AKSIMD_MADD_SS_V4F32(__a__, __b__, __c__)   _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
  Vector multiply-add operation.
#define  AKSIMD_MIN_V4F32(a, b)   _mm_min_ps( a, b )
#define  AKSIMD_MAX_V4F32(a, b)   _mm_max_ps( a, b )
#define  AKSIMD_ABS_V4F32(a)   _mm_andnot_ps(_mm_set1_ps(-0.f), a)
  Computes the absolute value.
#define  AKSIMD_NEG_V4F32(__a__)   _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
  Changes the sign.
#define  AKSIMD_SQRT_V4F32(__a__)   _mm_sqrt_ps( (__a__) )
  Vector square root aproximation (see _mm_sqrt_ps).
#define  AKSIMD_RSQRT_V4F32(__a__)   _mm_rsqrt_ps( (__a__) )
  Vector reciprocal square root approximation 1/sqrt(a), or equivalently, sqrt(1/a).
#define  AKSIMD_ASSERTFLUSHZEROMODE   AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
static AkForceInline void  AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec)
static AkForceInline AKSIMD_V4F32  AKSIMD_DOTPRODUCT (AKSIMD_V4F32 &vVec, const AKSIMD_V4F32 &vfSigns)
static AkForceInline AKSIMD_V4F32  AKSIMD_COMPLEXMUL (const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
  Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts.

AKSIMD vector comparison

Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.



#define  AKSIMD_CMP_CTRLMASK   __m128
#define  AKSIMD_LTEQ_V4F32(__a__, __b__)   _mm_cmple_ps( (__a__), (__b__) )
  Vector "<=" operation (see _mm_cmple_ps).
#define  AKSIMD_LT_V4F32(__a__, __b__)   _mm_cmplt_ps( (__a__), (__b__) )
#define  AKSIMD_GTEQ_V4F32(__a__, __b__)   _mm_cmpge_ps( (__a__), (__b__) )
  Vector ">=" operation (see _mm_cmple_ps).
#define  AKSIMD_GT_V4F32(__a__, __b__)   _mm_cmpgt_ps( (__a__), (__b__) )
#define  AKSIMD_EQ_V4F32(__a__, __b__)   _mm_cmpeq_ps( (__a__), (__b__) )
  Vector "==" operation (see _mm_cmpeq_ps).
#define  AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__)   AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
#define  AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__)   AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
#define  AKSIMD_SPLAT_V4F32(var, idx)   AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
#define  AKSIMD_MASK_V4F32(__a__)   _mm_movemask_ps( __a__ )
static AkForceInline AKSIMD_V4F32  AKSIMD_VSEL_V4F32 (AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask)
  Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations.

説明

AKSIMD - SSE implementation

AkSimd.h で定義されています。