Wwise SDK 2015.1.9
버전
menu_open
link
include/AK/SoundEngine/Platforms/arm_neon/AkSimd.h
Go to the documentation of this file.00001 00002 // 00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved 00004 // 00006 00007 // AkSimd.h 00008 00011 00012 #ifndef _AKSIMD_ARM_NEON_H_ 00013 #define _AKSIMD_ARM_NEON_H_ 00014 00015 #include <arm_neon.h> 00016 #include <AK/SoundEngine/Common/AkTypes.h> 00017 00018 // Platform specific defines for prefetching 00019 00020 /* 00021 // ?????? 00022 #define AKSIMD_ARCHCACHELINESIZE (64) ///< Assumed cache line width for architectures on this platform 00023 // ?????? 00024 #define AKSIMD_ARCHMAXPREFETCHSIZE (512) ///< Use this to control how much prefetching maximum is desirable (assuming 8-way cache) 00026 // ?????? 00027 #define AKSIMD_PREFETCHMEMORY( __offset__, __add__ ) _mm_prefetch(((char *)(__add__))+(__offset__), _MM_HINT_NTA ) 00028 */ 00029 00032 00033 00034 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00035 00037 00038 00041 00042 00043 typedef int32x4_t AKSIMD_V4I32; 00044 typedef int16x8_t AKSIMD_V8I16; 00045 typedef int16x4_t AKSIMD_V4I16; 00046 typedef uint32x4_t AKSIMD_V4UI32; 00047 typedef uint32x2_t AKSIMD_V2UI32; 00048 typedef int32x2_t AKSIMD_V2I32; 00049 typedef float32_t AKSIMD_F32; 00050 typedef float32x2_t AKSIMD_V2F32; 00051 typedef float32x4_t AKSIMD_V4F32; 00052 00053 typedef uint32x4_t AKSIMD_V4COND; 00054 typedef uint32x4_t AKSIMD_V4ICOND; 00055 typedef uint32x4_t AKSIMD_V4FCOND; 00056 00057 #if defined(AK_CPU_ARM_NEON) 00058 typedef float32x2x2_t AKSIMD_V2F32X2; 00059 typedef float32x4x2_t AKSIMD_V4F32X2; 00060 typedef float32x4x4_t AKSIMD_V4F32X4; 00061 #endif 00062 00063 #define AKSIMD_V4F32_SUPPORTED 00064 00065 00067 00068 00071 00072 00074 #define AKSIMD_LOAD_V4F32( __addr__ ) vld1q_f32( (float32_t*)(__addr__) ) 00075 00078 #define AKSIMD_LOADU_V4F32( __addr__ ) vld1q_f32( (float32_t*)(__addr__) ) 00079 00082 #define AKSIMD_LOAD1_V4F32( __scalar__ ) vld1q_dup_f32( (float32_t*)(&(__scalar__)) ) 00083 00086 #define AKSIMD_SET_V4F32( __scalar__ ) vdupq_n_f32( __scalar__ ) 00087 00089 #define AKSIMD_SET_V4I32( __scalar__ ) vdupq_n_s32( __scalar__ ) 00090 00093 #define AKSIMD_SETZERO_V4F32() AKSIMD_SET_V4F32( 0 ) 00094 00098 #define AKSIMD_LOAD_SS_V4F32( __addr__ ) vld1q_lane_f32( (float32_t*)(__addr__), AKSIMD_SETZERO_V4F32(), 0 ); 00099 00101 #define AKSIMD_LOAD_V4I32( __addr__ ) vld1q_s32( (const int32_t*)(__addr__) ) 00102 00104 #define AKSIMD_LOAD_V8I16( __addr__ ) vld1q_s16( (const int16_t*)(__addr__) ) 00105 00107 #define AKSIMD_LOAD_V4I16( __addr__ ) vld1_s16( (const int16_t*)(__addr__) ) 00108 00110 #if defined(AK_VITA) 00111 // Due to a compiler bug in sony sdk 1.8 and 2.0, this workaround is required. Removed when fixed. 00112 #define AKSIMD_LOADU_V4I32( __addr__ ) *__addr__ 00113 #else 00114 #define AKSIMD_LOADU_V4I32( __addr__ ) vld1q_s32( (const int32_t*)(__addr__)) 00115 #endif 00116 00117 #define AKSIMD_SETZERO_V4I32() vdupq_n_s32( 0 ) 00118 00120 #define AKSIMD_LOAD_V2F32( __addr__ ) vld1_f32( (float32_t*)(__addr__) ) 00121 #define AKSIMD_LOAD_V2F32_LANE( __addr__, __vec__, __lane__ ) vld1_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00122 00124 #define AKSIMD_SET_V2F32( __scalar__ ) vdup_n_f32( __scalar__ ) 00125 00127 #define AKSIMD_SETZERO_V2F32() AKSIMD_SET_V2F32( 0 ) 00128 00130 #define AKSIMD_LOAD_V4F32X2( __addr__ ) vld2q_f32( (float32_t*)(__addr__) ) 00131 #define AKSIMD_LOAD_V2F32X2( __addr__ ) vld2_f32( (float32_t*)(__addr__) ) 00132 00134 #define AKSIMD_LOAD_V2F32X2_LANE( __addr__, __vec__, __lane__ ) vld2_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00135 #define AKSIMD_LOAD_V4F32X4_LANE( __addr__, __vec__, __lane__ ) vld4q_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); 00136 00138 00139 00140 00143 00144 00146 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) vst1q_f32( (float32_t*)(__addr__), (__vName__) ) 00147 00149 #define AKSIMD_STOREU_V4F32( __addr__, __vec__ ) vst1q_f32( (float32_t*)(__addr__), (__vec__) ) 00150 00153 #define AKSIMD_STORE1_V4F32( __addr__, __vec__ ) vst1q_lane_f32( (float32_t*)(__addr__), (__vec__), 0 ) 00154 00156 #define AKSIMD_STORE_V4I32( __addr__, __vec__ ) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) 00157 00159 #define AKSIMD_STOREU_V4I32( __addr__, __vec__ ) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) 00160 00162 #define AKSIMD_STOREU_V4UI32( __addr__, __vec__ ) vst1q_u32( (uint32_t*)(__addr__), (__vec__) ) 00163 00165 #define AKSIMD_STORE_V2F32( __addr__, __vName__ ) vst1_f32( (AkReal32*)(__addr__), (__vName__) ) 00166 00168 #define AKSIMD_STORE_V4F32X2( __addr__, __vName__ ) vst2q_f32( (float32_t*)(__addr__), (__vName__) ) 00169 #define AKSIMD_STORE_V2F32X2( __addr__, __vName__ ) vst2_f32( (float32_t*)(__addr__), (__vName__) ) 00170 00172 00173 00174 00177 00178 00181 #define AKSIMD_CONVERT_V4I32_TO_V4F32( __vec__ ) vcvtq_f32_s32( __vec__ ) 00182 00185 #define AKSIMD_CONVERT_V4F32_TO_V4I32( __vec__ ) vcvtq_s32_f32( __vec__ ) 00186 00189 #define AKSIMD_TRUNCATE_V4F32_TO_V4I32( __vec__ ) vcvtq_s32_f32( (__vec__) ) 00190 00193 #define AKSIMD_CONVERT_V2F32_TO_V2I32( __vec__ ) vcvt_s32_f32( __vec__ ) 00194 00196 00197 00198 00201 00202 00205 #define AKSIMD_AND_V4I32( __a__, __b__ ) vandq_s32( (__a__), (__b__) ) 00206 00209 #define AKSIMD_CMPGT_V8I16( __a__, __b__ ) \ 00210 vreinterpretq_s32_u16( vcgtq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ) ) 00211 00213 #define AKSIMD_CMPLE_V4F32( __a__, __b__ ) vcleq_f32( (__a__), (__b__) ) 00214 00216 00217 00218 00221 00222 00225 #define AKSIMD_SHIFTLEFT_V4I32( __vec__, __shiftBy__ ) \ 00226 vshlq_n_s32( (__vec__), (__shiftBy__) ) 00227 00230 #define AKSIMD_SHIFTRIGHTARITH_V4I32( __vec__, __shiftBy__ ) \ 00231 vrshrq_n_s32( (__vec__), (__shiftBy__) ) 00232 00234 00235 00236 00239 00240 00241 // Macro for combining two vector of 2 elements into one vector of 00242 // 4 elements. 00243 #define AKSIMD_COMBINE_V2F32( a, b ) vcombine_f32( a, b ) 00244 00245 // Macro for shuffle parameter for AKSIMD_SHUFFLE_V4F32() (see _MM_SHUFFLE) 00246 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00247 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00248 00251 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00252 // If you get a link error, it's probably because the required 00253 // _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw > is not implemented in 00254 // <AK/SoundEngine/Platforms/arm_neon/AkSimdShuffle.h>. 00255 #define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) \ 00256 _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw >( a, b ) 00257 00258 // Various combinations of zyxw for _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw > are 00259 // implemented in a separate header file to keep this one cleaner: 00260 #include <AK/SoundEngine/Platforms/arm_neon/AkSimdShuffle.h> 00261 00267 inline AKSIMD_V4F32 AKSIMD_MOVEHL_V4F32( const AKSIMD_V4F32 abcd, const AKSIMD_V4F32 xyzw ) 00268 { 00269 //return akshuffle_zwcd( xyzw, abcd ); 00270 AKSIMD_V2F32 zw = vget_high_f32( xyzw ); 00271 AKSIMD_V2F32 cd = vget_high_f32( abcd ); 00272 AKSIMD_V4F32 zwcd = vcombine_f32( zw , cd ); 00273 return zwcd; 00274 } 00275 00281 inline AKSIMD_V4F32 AKSIMD_MOVELH_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd ) 00282 { 00283 return vcombine_f32( vget_low_f32( xyzw ) , vget_low_f32( abcd ) ); 00284 } 00285 00287 //#define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)) 00288 #define AKSIMD_SHUFFLE_BADC( __a__ ) vrev64q_f32( __a__ ) 00289 00291 //#define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)) 00292 #define AKSIMD_SHUFFLE_CDAB( __a__ ) vcombine_f32( vget_high_f32(__a__), vget_low_f32(__a__) ) 00293 00295 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00296 00298 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00299 00301 00302 00303 00306 00307 00310 #define AKSIMD_SUB_V4F32( __a__, __b__ ) vsubq_f32( (__a__), (__b__) ) 00311 00314 #define AKSIMD_SUB_V2F32( __a__, __b__ ) vsub_f32( (__a__), (__b__) ) 00315 00319 #define AKSIMD_SUB_SS_V4F32( __a__, __b__ ) \ 00320 vsubq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ); 00321 00324 #define AKSIMD_ADD_V4F32( __a__, __b__ ) vaddq_f32( (__a__), (__b__) ) 00325 00328 #define AKSIMD_ADD_V2F32( __a__, __b__ ) vadd_f32( (__a__), (__b__) ) 00329 00331 #define AKSIMD_ADD_V4I32( __a__, __b__ ) vaddq_s32( (__a__), (__b__) ) 00332 00335 #define AKSIMD_COMP_V4F32( __a__, __b__ ) vceqq_f32( (__a__), (__b__) ) 00336 00339 #define AKSIMD_COMP_V2F32( __a__, __b__ ) vceq_f32( (__a__), (__b__) ) 00340 00344 #define AKSIMD_ADD_SS_V4F32( __a__, __b__ ) \ 00345 vaddq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) 00346 00349 #define AKSIMD_MUL_V4F32( __a__, __b__ ) vmulq_f32( (__a__), (__b__) ) 00350 00353 #define AKSIMD_MUL_V4F32_SCALAR( __a__, __b__ ) vmulq_n_f32( (__a__), (__b__) ) 00354 00356 AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( AKSIMD_V4F32 a, AKSIMD_V4F32 b ) 00357 { 00358 AKSIMD_V4F32 inv = vrecpeq_f32(b); 00359 AKSIMD_V4F32 restep = vrecpsq_f32(b, inv); 00360 inv = vmulq_f32(restep, inv); 00361 return vmulq_f32(a, inv); 00362 } 00363 00366 #define AKSIMD_MUL_V2F32( __a__, __b__ ) vmul_f32( (__a__), (__b__) ) 00367 00370 #define AKSIMD_MUL_V2F32_SCALAR( __a__, __b__ ) vmul_n_f32( (__a__), (__b__) ) 00371 00376 #define AKSIMD_MUL_SS_V4F32( __a__, __b__ ) \ 00377 vmulq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) 00378 00380 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) \ 00381 AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00382 00383 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) \ 00384 AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00385 00386 #define AKSIMD_MADD_V2F32( __a__, __b__, __c__ ) \ 00387 AKSIMD_ADD_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) 00388 00389 #define AKSIMD_MSUB_V2F32( __a__, __b__, __c__ ) \ 00390 AKSIMD_SUB_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) 00391 00392 #define AKSIMD_MADD_V4F32_INST( __a__, __b__, __c__ ) vmlaq_f32( (__c__), (__a__), (__b__) ) 00393 #define AKSIMD_MADD_V2F32_INST( __a__, __b__, __c__ ) vmla_f32( (__c__), (__a__), (__b__) ) 00394 //#define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) vmlsq_f32( (__c__), (__a__), (__b__) ) 00395 //#define AKSIMD_MSUB_V2F32( __a__, __b__, __c__ ) vmls_f32( (__c__), (__a__), (__b__) ) 00396 00397 #define AKSIMD_MADD_V4F32_SCALAR( __a__, __b__, __c__ ) vmlaq_n_f32( (__c__), (__a__), (__b__) ) 00398 #define AKSIMD_MADD_V2F32_SCALAR( __a__, __b__, __c__ ) vmla_n_f32( (__c__), (__a__), (__b__) ) 00399 00401 AkForceInline AKSIMD_V4F32 AKSIMD_MADD_SS_V4F32( const AKSIMD_V4F32& __a__, const AKSIMD_V4F32& __b__, const AKSIMD_V4F32& __c__ ) 00402 { 00403 return AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( __a__, __b__ ), __c__ ); 00404 } 00405 00408 #define AKSIMD_MIN_V4F32( __a__, __b__ ) vminq_f32( (__a__), (__b__) ) 00409 00412 #define AKSIMD_MIN_V2F32( __a__, __b__ ) vmin_f32( (__a__), (__b__) ) 00413 00416 #define AKSIMD_MAX_V4F32( __a__, __b__ ) vmaxq_f32( (__a__), (__b__) ) 00417 00420 #define AKSIMD_MAX_V2F32( __a__, __b__ ) vmax_f32( (__a__), (__b__) ) 00421 00423 #define AKSIMD_ABS_V4F32( __a__ ) vabsq_f32((__a__)) 00424 00426 #define AKSIMD_NEG_V2F32( __a__ ) vneg_f32( (__a__) ) 00427 #define AKSIMD_NEG_V4F32( __a__ ) vnegq_f32( (__a__) ) 00428 00430 #define AKSIMD_SQRT_V4F32( __vec__ ) vrecpeq_f32( vrsqrteq_f32( __vec__ ) ) 00431 00433 #define AKSIMD_SQRT_V2F32( __vec__ ) vrecpe_f32( vrsqrte_f32( __vec__ ) ) 00434 00439 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00440 { 00441 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00442 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00443 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00444 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00445 } 00446 00448 00449 #if defined(AK_IOS) || defined(AK_VITA) 00450 00451 // V2 implementation (faster 'cause ARM processors actually have an x2 pipeline) 00452 00453 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2 ) 00454 { 00455 static const AKSIMD_V2F32 vSign = { -1.f, 1.f }; 00456 00457 AKSIMD_V2F32 vCIn1a = vget_low_f32( vCIn1 ); 00458 AKSIMD_V2F32 vCIn2a = vget_low_f32( vCIn2 ); 00459 AKSIMD_V2F32 vTmpa0 = vmul_n_f32( vCIn2a, vCIn1a[0] ); 00460 AKSIMD_V2F32 vTmpa1 = vmul_n_f32( vCIn2a, vCIn1a[1] ); 00461 vTmpa1 = vrev64_f32( vTmpa1 ); 00462 vTmpa1 = vmul_f32( vTmpa1, vSign ); 00463 vTmpa0 = vadd_f32( vTmpa0, vTmpa1 ); 00464 00465 AKSIMD_V2F32 vCIn1b = vget_high_f32( vCIn1 ); 00466 AKSIMD_V2F32 vCIn2b = vget_high_f32( vCIn2 ); 00467 AKSIMD_V2F32 vTmpb0 = vmul_n_f32( vCIn2b, vCIn1b[0] ); 00468 AKSIMD_V2F32 vTmpb1 = vmul_n_f32( vCIn2b, vCIn1b[1] ); 00469 vTmpb1 = vrev64_f32( vTmpb1 ); 00470 vTmpb1 = vmul_f32( vTmpb1, vSign ); 00471 vTmpb0 = vadd_f32( vTmpb0, vTmpb1 ); 00472 00473 return vcombine_f32( vTmpa0, vTmpb0 ); 00474 } 00475 00476 #else 00477 00478 // V4 implementation (kept in case future ARM processors actually have an x4 pipeline) 00479 00480 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2 ) 00481 { 00482 #ifdef AKSIMD_DECLARE_V4F32 00483 static const AKSIMD_DECLARE_V4F32( vSign, 1.f, -1.f, 1.f, -1.f ); 00484 #else 00485 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00486 #endif 00487 00488 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00489 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00490 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00491 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00492 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00493 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00494 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00495 return vTmp2; 00496 } 00497 00498 #endif 00499 00501 00502 00503 00506 00507 00510 #define AKSIMD_UNPACKLO_VECTOR8I16( __a__, __b__ ) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[0] ) 00511 00514 #define AKSIMD_UNPACKHI_VECTOR8I16( __a__, __b__ ) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[1] ) 00515 00518 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00519 { 00520 // sce_vectormath_xayb(in_vec1, in_vec2) 00521 float32x2_t xy = vget_low_f32( in_vec1 /*xyzw*/ ); 00522 float32x2_t ab = vget_low_f32( in_vec2 /*abcd*/ ); 00523 float32x2x2_t xa_yb = vtrn_f32( xy, ab ); 00524 AKSIMD_V4F32 xayb = vcombine_f32( xa_yb.val[0], xa_yb.val[1] ); 00525 return xayb; 00526 } 00527 00530 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00531 { 00532 //return sce_vectormath_zcwd( in_vec1, in_vec2 ); 00533 float32x2_t zw = vget_high_f32( in_vec1 /*xyzw*/ ); 00534 float32x2_t cd = vget_high_f32( in_vec2 /*abcd*/ ); 00535 float32x2x2_t zc_wd = vtrn_f32( zw, cd ); 00536 AKSIMD_V4F32 zcwd = vcombine_f32( zc_wd.val[0], zc_wd.val[1] ); 00537 return zcwd; 00538 } 00539 00542 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00543 { 00544 int16x4_t vec1_16 = vqmovn_s32( in_vec1 ); 00545 int16x4_t vec2_16 = vqmovn_s32( in_vec2 ); 00546 int16x8_t result = vcombine_s16( vec1_16, vec2_16 ); 00547 return vreinterpretq_s32_s16( result ); 00548 } 00549 00552 #define AKSIMD_HILO_V2F32( in_vec1, in_vec2 ) vreinterpret_f32_u32( vext_u32( vreinterpret_u32_f32( in_vec1 ), vreinterpret_u32_f32( in_vec2 ), 1 ) ) 00553 00556 #define AKSIMD_TRANSPOSE_V2F32( in_vec1, in_vec2 ) vtrn_f32( in_vec1, in_vec2 ) 00557 00558 #define AKSIMD_TRANSPOSE_V4F32( in_vec1, in_vec2 ) vtrnq_f32( in_vec1, in_vec2 ) 00559 00561 #define AKSIMD_SWAP_V2F32( in_vec ) vrev64_f32( in_vec ) 00562 00564 00565 00569 00570 00571 #define AKSIMD_CMP_CTRLMASK uint32x4_t 00572 00574 #define AKSIMD_GTEQ_V4F32( __a__, __b__ ) vcgeq_f32( (__a__), (__b__)) 00575 00577 #define AKSIMD_GTEQ_V4I32( __a__, __b__ ) vcgeq_s32( (__a__), (__b__)) 00578 00580 #define AKSIMD_EQ_V4F32( __a__, __b__ ) vceqq_f32( (__a__), (__b__)) 00581 00583 #define AKSIMD_EQ_V4I32( __a__, __b__ ) vceqq_s32( (__a__), (__b__)) 00584 00586 #define AKSIMD_VSEL_V4F32( __a__, __b__, __c__ ) vbslq_f32( (__c__), (__b__), (__a__) ) 00587 00588 // (cond1 >= cond2) ? b : a. 00589 #define AKSIMD_SEL_GTEQ_V4F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) ) 00590 00591 // a >= 0 ? b : c ... Written, like, you know, the normal C++ operator syntax. 00592 #define AKSIMD_SEL_GTEZ_V4F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, AKSIMD_SETZERO_V4F32() ) ) 00593 00594 #define AKSIMD_SPLAT_V4F32(var, idx) vmovq_n_f32(vgetq_lane_f32(var, idx)) 00595 00597 00598 00599 #endif //_AKSIMD_ARM_NEON_H_ 00600
이 페이지가 도움이 되었나요?
작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.
프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.
Wwise를 시작해 보세요