Wwise SDK 2015.1.9
_platforms_2arm__neon_2_ak_simd_8h_source
Version
menu_open
link
include/AK/SoundEngine/Platforms/arm_neon/AkSimd.h
Go to the documentation of this file.00001 00002 // 00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved 00004 // 00006 00007 // AkSimd.h 00008 00011 00012 #ifndef _AKSIMD_ARM_NEON_H_ 00013 #define _AKSIMD_ARM_NEON_H_ 00014 00015 #include <arm_neon.h> 00016 #include <AK/SoundEngine/Common/AkTypes.h> 00017 00018 // Platform specific defines for prefetching 00019 00020 /* 00021 // ?????? 00022 #define AKSIMD_ARCHCACHELINESIZE (64) ///< Assumed cache line width for architectures on this platform 00023 // ?????? 00024 #define AKSIMD_ARCHMAXPREFETCHSIZE (512) ///< Use this to control how much prefetching maximum is desirable (assuming 8-way cache) 00026 // ?????? 00027 #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ ) _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA ) 00028 */ 00029 00032 00033 00034 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00035 00037 00038 00041 00042 00043 typedef int32x4_t AKSIMD_V4I32; 00044 typedef int16x8_t AKSIMD_V8I16; 00045 typedef int16x4_t AKSIMD_V4I16; 00046 typedef uint32x4_t AKSIMD_V4UI32; 00047 typedef uint32x2_t AKSIMD_V2UI32; 00048 typedef int32x2_t AKSIMD_V2I32; 00049 typedef float32_t AKSIMD_F32; 00050 typedef float32x2_t AKSIMD_V2F32; 00051 typedef float32x4_t AKSIMD_V4F32; 00052 00053 typedef uint32x4_t AKSIMD_V4COND; 00054 typedef uint32x4_t AKSIMD_V4ICOND; 00055 typedef uint32x4_t AKSIMD_V4FCOND; 00056 00057 #if defined(AK_CPU_ARM_NEON) 00058 typedef float32x2x2_t AKSIMD_V2F32X2; 00059 typedef float32x4x2_t AKSIMD_V4F32X2; 00060 typedef float32x4x4_t AKSIMD_V4F32X4; 00061 #endif 00062 00063 #define AKSIMD_V4F32_SUPPORTED 00064 00065 00067 00068 00071 00072 00074 #define AKSIMD_LOAD_V4F32( __addr__ ) vld1q_f32( (float32_t*)(__addr__) ) 00075 00078 #define AKSIMD_LOADU_V4F32( __addr__ ) vld1q_f32( (float32_t*)(__addr__) ) 00079 00082 #define AKSIMD_LOAD1_V4F32( __scalar__ ) vld1q_dup_f32( (float32_t*)(&(__scalar__)) ) 00083 00086 #define AKSIMD_SET_V4F32( __scalar__ ) vdupq_n_f32( __scalar__ ) 00087 00089 #define AKSIMD_SET_V4I32( __scalar__ ) vdupq_n_s32( __scalar__ ) 00090 00093 #define AKSIMD_SETZERO_V4F32() AKSIMD_SET_V4F32( 0 ) 00094 00098 #define AKSIMD_LOAD_SS_V4F32( __addr__ ) vld1q_lane_f32( (float32_t*)(__addr__), AKSIMD_SETZERO_V4F32(), 0 ); 00099 00101 #define AKSIMD_LOAD_V4I32( __addr__ ) vld1q_s32( (const int32_t*)(__addr__) ) 00102 00104 #define AKSIMD_LOAD_V8I16( __addr__ ) vld1q_s16( (const int16_t*)(__addr__) ) 00105 00107 #define AKSIMD_LOAD_V4I16( __addr__ ) vld1_s16( (const int16_t*)(__addr__) ) 00108 00110 #if defined(AK_VITA) 00111 // Due to a compiler bug in sony sdk 1.8 and 2.0, this workaround is required. Removed when fixed. 00112 #define AKSIMD_LOADU_V4I32( __addr__ ) *__addr__ 00113 #else 00114 #define AKSIMD_LOADU_V4I32( __addr__ ) vld1q_s32( (const int32_t*)(__addr__)) 00115 #endif 00116 00117 #define AKSIMD_SETZERO_V4I32() vdupq_n_s32( 0 ) 00118 00120 #define AKSIMD_LOAD_V2F32( __addr__ ) vld1_f32( (float32_t*)(__addr__) ) 00121 #define AKSIMD_LOAD_V2F32_LANE( __addr__, __vec__, __lane__ ) vld1_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00122 00124 #define AKSIMD_SET_V2F32( __scalar__ ) vdup_n_f32( __scalar__ ) 00125 00127 #define AKSIMD_SETZERO_V2F32() AKSIMD_SET_V2F32( 0 ) 00128 00130 #define AKSIMD_LOAD_V4F32X2( __addr__ ) vld2q_f32( (float32_t*)(__addr__) ) 00131 #define AKSIMD_LOAD_V2F32X2( __addr__ ) vld2_f32( (float32_t*)(__addr__) ) 00132 00134 #define AKSIMD_LOAD_V2F32X2_LANE( __addr__, __vec__, __lane__ ) vld2_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00135 #define AKSIMD_LOAD_V4F32X4_LANE( __addr__, __vec__, __lane__ ) vld4q_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00136 00138 00139 00140 00143 00144 00146 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) vst1q_f32( (float32_t*)(__addr__), (__vName__) ) 00147 00149 #define AKSIMD_STOREU_V4F32( __addr__, __vec__ ) vst1q_f32( (float32_t*)(__addr__), (__vec__) ) 00150 00153 #define AKSIMD_STORE1_V4F32( __addr__, __vec__ ) vst1q_lane_f32( (float32_t*)(__addr__), (__vec__), 0 ) 00154 00156 #define AKSIMD_STORE_V4I32( __addr__, __vec__ ) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) 00157 00159 #define AKSIMD_STOREU_V4I32( __addr__, __vec__ ) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) 00160 00162 #define AKSIMD_STOREU_V4UI32( __addr__, __vec__ ) vst1q_u32( (uint32_t*)(__addr__), (__vec__) ) 00163 00165 #define AKSIMD_STORE_V2F32( __addr__, __vName__ ) vst1_f32( (AkReal32*)(__addr__), (__vName__) ) 00166 00168 #define AKSIMD_STORE_V4F32X2( __addr__, __vName__ ) vst2q_f32( (float32_t*)(__addr__), (__vName__) ) 00169 #define AKSIMD_STORE_V2F32X2( __addr__, __vName__ ) vst2_f32( (float32_t*)(__addr__), (__vName__) ) 00170 00172 00173 00174 00177 00178 00181 #define AKSIMD_CONVERT_V4I32_TO_V4F32( __vec__ ) vcvtq_f32_s32( __vec__ ) 00182 00185 #define AKSIMD_CONVERT_V4F32_TO_V4I32( __vec__ ) vcvtq_s32_f32( __vec__ ) 00186 00189 #define AKSIMD_TRUNCATE_V4F32_TO_V4I32( __vec__ ) vcvtq_s32_f32( (__vec__) ) 00190 00193 #define AKSIMD_CONVERT_V2F32_TO_V2I32( __vec__ ) vcvt_s32_f32( __vec__ ) 00194 00196 00197 00198 00201 00202 00205 #define AKSIMD_AND_V4I32( __a__, __b__ ) vandq_s32( (__a__), (__b__) ) 00206 00209 #define AKSIMD_CMPGT_V8I16( __a__, __b__ ) \ 00210 vreinterpretq_s32_u16( vcgtq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ) ) 00211 00213 #define AKSIMD_CMPLE_V4F32( __a__, __b__ ) vcleq_f32( (__a__), (__b__) ) 00214 00216 00217 00218 00221 00222 00225 #define AKSIMD_SHIFTLEFT_V4I32( __vec__, __shiftBy__ ) \ 00226 vshlq_n_s32( (__vec__), (__shiftBy__) ) 00227 00230 #define AKSIMD_SHIFTRIGHTARITH_V4I32( __vec__, __shiftBy__ ) \ 00231 vrshrq_n_s32( (__vec__), (__shiftBy__) ) 00232 00234 00235 00236 00239 00240 00241 // Macro for combining two vector of 2 elements into one vector of 00242 // 4 elements. 00243 #define AKSIMD_COMBINE_V2F32( a, b ) vcombine_f32( a, b ) 00244 00245 // Macro for shuffle parameter for AKSIMD_SHUFFLE_V4F32() (see _MM_SHUFFLE) 00246 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00247 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00248 00251 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00252 // If you get a link error, it's probably because the required 00253 // _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw > is not implemented in 00254 // <AK/SoundEngine/Platforms/arm_neon/AkSimdShuffle.h>. 00255 #define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) \ 00256 _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw >( a, b ) 00257 00258 // Various combinations of zyxw for _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw > are 00259 // implemented in a separate header file to keep this one cleaner: 00260 #include <AK/SoundEngine/Platforms/arm_neon/AkSimdShuffle.h> 00261 00267 inline AKSIMD_V4F32 AKSIMD_MOVEHL_V4F32( const AKSIMD_V4F32 abcd, const AKSIMD_V4F32 xyzw ) 00268 { 00269 //return akshuffle_zwcd( xyzw, abcd ); 00270 AKSIMD_V2F32 zw = vget_high_f32( xyzw ); 00271 AKSIMD_V2F32 cd = vget_high_f32( abcd ); 00272 AKSIMD_V4F32 zwcd = vcombine_f32( zw , cd ); 00273 return zwcd; 00274 } 00275 00281 inline AKSIMD_V4F32 AKSIMD_MOVELH_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd ) 00282 { 00283 return vcombine_f32( vget_low_f32( xyzw ) , vget_low_f32( abcd ) ); 00284 } 00285 00287 //#define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)) 00288 #define AKSIMD_SHUFFLE_BADC( __a__ ) vrev64q_f32( __a__ ) 00289 00291 //#define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)) 00292 #define AKSIMD_SHUFFLE_CDAB( __a__ ) vcombine_f32( vget_high_f32(__a__), vget_low_f32(__a__) ) 00293 00295 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00296 00298 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00299 00301 00302 00303 00306 00307 00310 #define AKSIMD_SUB_V4F32( __a__, __b__ ) vsubq_f32( (__a__), (__b__) ) 00311 00314 #define AKSIMD_SUB_V2F32( __a__, __b__ ) vsub_f32( (__a__), (__b__) ) 00315 00319 #define AKSIMD_SUB_SS_V4F32( __a__, __b__ ) \ 00320 vsubq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ); 00321 00324 #define AKSIMD_ADD_V4F32( __a__, __b__ ) vaddq_f32( (__a__), (__b__) ) 00325 00328 #define AKSIMD_ADD_V2F32( __a__, __b__ ) vadd_f32( (__a__), (__b__) ) 00329 00331 #define AKSIMD_ADD_V4I32( __a__, __b__ ) vaddq_s32( (__a__), (__b__) ) 00332 00335 #define AKSIMD_COMP_V4F32( __a__, __b__ ) vceqq_f32( (__a__), (__b__) ) 00336 00339 #define AKSIMD_COMP_V2F32( __a__, __b__ ) vceq_f32( (__a__), (__b__) ) 00340 00344 #define AKSIMD_ADD_SS_V4F32( __a__, __b__ ) \ 00345 vaddq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) 00346 00349 #define AKSIMD_MUL_V4F32( __a__, __b__ ) vmulq_f32( (__a__), (__b__) ) 00350 00353 #define AKSIMD_MUL_V4F32_SCALAR( __a__, __b__ ) vmulq_n_f32( (__a__), (__b__) ) 00354 00356 AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( AKSIMD_V4F32 a, AKSIMD_V4F32 b ) 00357 { 00358 AKSIMD_V4F32 inv = vrecpeq_f32(b); 00359 AKSIMD_V4F32 restep = vrecpsq_f32(b, inv); 00360 inv = vmulq_f32(restep, inv); 00361 return vmulq_f32(a, inv); 00362 } 00363 00366 #define AKSIMD_MUL_V2F32( __a__, __b__ ) vmul_f32( (__a__), (__b__) ) 00367 00370 #define AKSIMD_MUL_V2F32_SCALAR( __a__, __b__ ) vmul_n_f32( (__a__), (__b__) ) 00371 00376 #define AKSIMD_MUL_SS_V4F32( __a__, __b__ ) \ 00377 vmulq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) 00378 00380 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) \ 00381 AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00382 00383 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) \ 00384 AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00385 00386 #define AKSIMD_MADD_V2F32( __a__, __b__, __c__ ) \ 00387 AKSIMD_ADD_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) 00388 00389 #define AKSIMD_MSUB_V2F32( __a__, __b__, __c__ ) \ 00390 AKSIMD_SUB_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) 00391 00392 #define AKSIMD_MADD_V4F32_INST( __a__, __b__, __c__ ) vmlaq_f32( (__c__), (__a__), (__b__) ) 00393 #define AKSIMD_MADD_V2F32_INST( __a__, __b__, __c__ ) vmla_f32( (__c__), (__a__), (__b__) ) 00394 //#define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) vmlsq_f32( (__c__), (__a__), (__b__) ) 00395 //#define AKSIMD_MSUB_V2F32( __a__, __b__, __c__ ) vmls_f32( (__c__), (__a__), (__b__) ) 00396 00397 #define AKSIMD_MADD_V4F32_SCALAR( __a__, __b__, __c__ ) vmlaq_n_f32( (__c__), (__a__), (__b__) ) 00398 #define AKSIMD_MADD_V2F32_SCALAR( __a__, __b__, __c__ ) vmla_n_f32( (__c__), (__a__), (__b__) ) 00399 00401 AkForceInline AKSIMD_V4F32 AKSIMD_MADD_SS_V4F32( const AKSIMD_V4F32& __a__, const AKSIMD_V4F32& __b__, const AKSIMD_V4F32& __c__ ) 00402 { 00403 return AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( __a__, __b__ ), __c__ ); 00404 } 00405 00408 #define AKSIMD_MIN_V4F32( __a__, __b__ ) vminq_f32( (__a__), (__b__) ) 00409 00412 #define AKSIMD_MIN_V2F32( __a__, __b__ ) vmin_f32( (__a__), (__b__) ) 00413 00416 #define AKSIMD_MAX_V4F32( __a__, __b__ ) vmaxq_f32( (__a__), (__b__) ) 00417 00420 #define AKSIMD_MAX_V2F32( __a__, __b__ ) vmax_f32( (__a__), (__b__) ) 00421 00423 #define AKSIMD_ABS_V4F32( __a__ ) vabsq_f32((__a__)) 00424 00426 #define AKSIMD_NEG_V2F32( __a__ ) vneg_f32( (__a__) ) 00427 #define AKSIMD_NEG_V4F32( __a__ ) vnegq_f32( (__a__) ) 00428 00430 #define AKSIMD_SQRT_V4F32( __vec__ ) vrecpeq_f32( vrsqrteq_f32( __vec__ ) ) 00431 00433 #define AKSIMD_SQRT_V2F32( __vec__ ) vrecpe_f32( vrsqrte_f32( __vec__ ) ) 00434 00439 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00440 { 00441 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00442 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00443 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00444 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00445 } 00446 00448 00449 #if defined(AK_IOS) || defined(AK_VITA) 00450 00451 // V2 implementation (faster 'cause ARM processors actually have an x2 pipeline) 00452 00453 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2 ) 00454 { 00455 static const AKSIMD_V2F32 vSign = { -1.f, 1.f }; 00456 00457 AKSIMD_V2F32 vCIn1a = vget_low_f32( vCIn1 ); 00458 AKSIMD_V2F32 vCIn2a = vget_low_f32( vCIn2 ); 00459 AKSIMD_V2F32 vTmpa0 = vmul_n_f32( vCIn2a, vCIn1a[0] ); 00460 AKSIMD_V2F32 vTmpa1 = vmul_n_f32( vCIn2a, vCIn1a[1] ); 00461 vTmpa1 = vrev64_f32( vTmpa1 ); 00462 vTmpa1 = vmul_f32( vTmpa1, vSign ); 00463 vTmpa0 = vadd_f32( vTmpa0, vTmpa1 ); 00464 00465 AKSIMD_V2F32 vCIn1b = vget_high_f32( vCIn1 ); 00466 AKSIMD_V2F32 vCIn2b = vget_high_f32( vCIn2 ); 00467 AKSIMD_V2F32 vTmpb0 = vmul_n_f32( vCIn2b, vCIn1b[0] ); 00468 AKSIMD_V2F32 vTmpb1 = vmul_n_f32( vCIn2b, vCIn1b[1] ); 00469 vTmpb1 = vrev64_f32( vTmpb1 ); 00470 vTmpb1 = vmul_f32( vTmpb1, vSign ); 00471 vTmpb0 = vadd_f32( vTmpb0, vTmpb1 ); 00472 00473 return vcombine_f32( vTmpa0, vTmpb0 ); 00474 } 00475 00476 #else 00477 00478 // V4 implementation (kept in case future ARM processors actually have an x4 pipeline) 00479 00480 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2 ) 00481 { 00482 #ifdef AKSIMD_DECLARE_V4F32 00483 static const AKSIMD_DECLARE_V4F32( vSign, 1.f, -1.f, 1.f, -1.f ); 00484 #else 00485 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00486 #endif 00487 00488 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00489 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00490 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00491 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00492 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00493 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00494 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00495 return vTmp2; 00496 } 00497 00498 #endif 00499 00501 00502 00503 00506 00507 00510 #define AKSIMD_UNPACKLO_VECTOR8I16( __a__, __b__ ) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[0] ) 00511 00514 #define AKSIMD_UNPACKHI_VECTOR8I16( __a__, __b__ ) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[1] ) 00515 00518 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00519 { 00520 // sce_vectormath_xayb(in_vec1, in_vec2) 00521 float32x2_t xy = vget_low_f32( in_vec1 /*xyzw*/ ); 00522 float32x2_t ab = vget_low_f32( in_vec2 /*abcd*/ ); 00523 float32x2x2_t xa_yb = vtrn_f32( xy, ab ); 00524 AKSIMD_V4F32 xayb = vcombine_f32( xa_yb.val[0], xa_yb.val[1] ); 00525 return xayb; 00526 } 00527 00530 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00531 { 00532 //return sce_vectormath_zcwd( in_vec1, in_vec2 ); 00533 float32x2_t zw = vget_high_f32( in_vec1 /*xyzw*/ ); 00534 float32x2_t cd = vget_high_f32( in_vec2 /*abcd*/ ); 00535 float32x2x2_t zc_wd = vtrn_f32( zw, cd ); 00536 AKSIMD_V4F32 zcwd = vcombine_f32( zc_wd.val[0], zc_wd.val[1] ); 00537 return zcwd; 00538 } 00539 00542 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00543 { 00544 int16x4_t vec1_16 = vqmovn_s32( in_vec1 ); 00545 int16x4_t vec2_16 = vqmovn_s32( in_vec2 ); 00546 int16x8_t result = vcombine_s16( vec1_16, vec2_16 ); 00547 return vreinterpretq_s32_s16( result ); 00548 } 00549 00552 #define AKSIMD_HILO_V2F32( in_vec1, in_vec2 ) vreinterpret_f32_u32( vext_u32( vreinterpret_u32_f32( in_vec1 ), vreinterpret_u32_f32( in_vec2 ), 1 ) ) 00553 00556 #define AKSIMD_TRANSPOSE_V2F32( in_vec1, in_vec2 ) vtrn_f32( in_vec1, in_vec2 ) 00557 00558 #define AKSIMD_TRANSPOSE_V4F32( in_vec1, in_vec2 ) vtrnq_f32( in_vec1, in_vec2 ) 00559 00561 #define AKSIMD_SWAP_V2F32( in_vec ) vrev64_f32( in_vec ) 00562 00564 00565 00569 00570 00571 #define AKSIMD_CMP_CTRLMASK uint32x4_t 00572 00574 #define AKSIMD_GTEQ_V4F32( __a__, __b__ ) vcgeq_f32( (__a__), (__b__)) 00575 00577 #define AKSIMD_GTEQ_V4I32( __a__, __b__ ) vcgeq_s32( (__a__), (__b__)) 00578 00580 #define AKSIMD_EQ_V4F32( __a__, __b__ ) vceqq_f32( (__a__), (__b__)) 00581 00583 #define AKSIMD_EQ_V4I32( __a__, __b__ ) vceqq_s32( (__a__), (__b__)) 00584 00586 #define AKSIMD_VSEL_V4F32( __a__, __b__, __c__ ) vbslq_f32( (__c__), (__b__), (__a__) ) 00587 00588 // (cond1 >= cond2) ? b : a. 00589 #define AKSIMD_SEL_GTEQ_V4F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) ) 00590 00591 // a >= 0 ? b : c ... Written, like, you know, the normal C++ operator syntax. 00592 #define AKSIMD_SEL_GTEZ_V4F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, AKSIMD_SETZERO_V4F32() ) ) 00593 00594 #define AKSIMD_SPLAT_V4F32(var, idx) vmovq_n_f32(vgetq_lane_f32(var, idx)) 00595 00597 00598 00599 #endif //_AKSIMD_ARM_NEON_H_ 00600
Was this page helpful?
Need Support?
Questions? Problems? Need more info? Contact us, and we can help!
Visit our Support pageTell us about your project. We're here to help.
Register your project and we'll help you get started with no strings attached!
Get started with Wwise