Wwise SDK 2016.2.6
버전
menu_open
link
include/AK/SoundEngine/Platforms/Generic/AkSimd.h
Go to the documentation of this file.00001 /******************************************************************************* 00002 The content of this file includes portions of the AUDIOKINETIC Wwise Technology 00003 released in source code form as part of the SDK installer package. 00004 00005 Commercial License Usage 00006 00007 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology 00008 may use this file in accordance with the end user license agreement provided 00009 with the software or, alternatively, in accordance with the terms contained in a 00010 written agreement between you and Audiokinetic Inc. 00011 00012 Apache License Usage 00013 00014 Alternatively, this file may be used under the Apache License, Version 2.0 (the 00015 "Apache License"); you may not use this file except in compliance with the 00016 Apache License. You may obtain a copy of the Apache License at 00017 http://www.apache.org/licenses/LICENSE-2.0. 00018 00019 Unless required by applicable law or agreed to in writing, software distributed 00020 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES 00021 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for 00022 the specific language governing permissions and limitations under the License. 00023 00024 Version: <VERSION> Build: <BUILDNUMBER> 00025 Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc. 00026 *******************************************************************************/ 00027 00028 // AkSimd.h 00029 00032 00033 #ifndef _AKSIMD_GENERIC_H_ 00034 #define _AKSIMD_GENERIC_H_ 00035 00036 #include <math.h> 00037 #include <string.h> 00038 #include <AK/SoundEngine/Common/AkTypes.h> 00039 #include <AK/Tools/Common/AkPlatformFuncs.h> 00040 00043 00044 typedef AkInt32 AKSIMD_I32; 00045 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32; 00046 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32; 00047 typedef AkReal32 AKSIMD_F32; 00048 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32; 00049 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32; 00050 typedef AKSIMD_V4UI32 AKSIMD_V4COND; 00051 00052 00053 typedef struct { AkInt32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4I32_UNALIGNED; 00054 typedef struct { AkUInt32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4UI32_UNALIGNED; 00055 typedef struct { AkReal32 m_data[2]; } __attribute__((__packed__)) AKSIMD_V2F32_UNALIGNED; 00056 typedef struct { AkReal32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4F32_UNALIGNED; 00057 00059 00060 00061 #ifndef AKSIMD_GETELEMENT_V4F32 00062 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00063 #endif 00064 00065 #ifndef AKSIMD_GETELEMENT_V2F32 00066 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ ) (__vName).m_data[(__num__)] 00067 #endif 00068 00069 #ifndef AKSIMD_GETELEMENT_V4I32 00070 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ ) (__vName).m_data[(__num__)] 00071 #endif 00072 00075 00076 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15) 00077 00078 00079 00082 00083 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData)) 00084 00085 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00086 00087 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue)) 00088 00089 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value ) 00090 { 00091 AKSIMD_V4F32 vector; 00092 vector.m_data[0] = in_value; 00093 vector.m_data[1] = in_value; 00094 vector.m_data[2] = in_value; 00095 vector.m_data[3] = in_value; 00096 00097 return vector; 00098 } 00099 00100 // _mm_set_ps1 00101 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value ) 00102 { 00103 AKSIMD_V4F32 vector; 00104 vector.m_data[0] = in_value; 00105 vector.m_data[1] = in_value; 00106 vector.m_data[2] = in_value; 00107 vector.m_data[3] = in_value; 00108 00109 return vector; 00110 } 00111 00112 00113 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value ) 00114 { 00115 AKSIMD_V2F32 vector; 00116 vector.m_data[0] = in_value; 00117 vector.m_data[1] = in_value; 00118 00119 return vector; 00120 } 00121 00122 // _mm_setzero_ps() 00123 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32() 00124 { 00125 AKSIMD_V4F32 vector; 00126 vector.m_data[0] = 0.f; 00127 vector.m_data[1] = 0.f; 00128 vector.m_data[2] = 0.f; 00129 vector.m_data[3] = 0.f; 00130 00131 return vector; 00132 } 00133 00134 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32() 00135 { 00136 AKSIMD_V2F32 vector; 00137 vector.m_data[0] = 0.f; 00138 vector.m_data[1] = 0.f; 00139 00140 return vector; 00141 } 00142 // _mm_setzero_si128() 00143 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32() 00144 { 00145 AKSIMD_V4I32 vector; 00146 vector.m_data[0] = 0; 00147 vector.m_data[1] = 0; 00148 vector.m_data[2] = 0; 00149 vector.m_data[3] = 0; 00150 00151 return vector; 00152 } 00153 00154 00158 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData ) 00159 { 00160 AKSIMD_V4F32 vector; 00161 vector.m_data[0] = *in_pData; 00162 vector.m_data[1] = 0.f; 00163 vector.m_data[2] = 0.f; 00164 vector.m_data[3] = 0.f; 00165 00166 return vector; 00167 } 00168 00170 00171 00174 00175 00176 // _mm_storeu_ps -- The address does not need to be 16-byte aligned. 00177 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec) 00178 00179 // _mm_store_ps -- The address must be 16-byte aligned. 00180 // ????? _mm_storeu_ps vs _mm_store_ps ????? 00181 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__) 00182 00183 // _mm_storeu_si128 00184 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec) 00185 00188 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec ) 00189 { 00190 ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0]; 00191 } 00192 00194 00195 00198 00199 00200 // _mm_cvtepi32_ps 00201 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from ) 00202 { 00203 AKSIMD_V4F32 vector; 00204 vector.m_data[0] = (AkReal32)in_from.m_data[0]; 00205 vector.m_data[1] = (AkReal32)in_from.m_data[1]; 00206 vector.m_data[2] = (AkReal32)in_from.m_data[2]; 00207 vector.m_data[3] = (AkReal32)in_from.m_data[3]; 00208 00209 return vector; 00210 } 00211 // _mm_cvtps_epi32 00212 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from ) 00213 { 00214 AKSIMD_V4I32 vector; 00215 vector.m_data[0] = (AkInt32)in_from.m_data[0]; 00216 vector.m_data[1] = (AkInt32)in_from.m_data[1]; 00217 vector.m_data[2] = (AkInt32)in_from.m_data[2]; 00218 vector.m_data[3] = (AkInt32)in_from.m_data[3]; 00219 00220 return vector; 00221 } 00222 00224 00225 00228 00229 00230 // _mm_and_si128 00231 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00232 { 00233 AKSIMD_V4I32 vector; 00234 vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0]; 00235 vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1]; 00236 vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2]; 00237 vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3]; 00238 00239 return vector; 00240 } 00241 00244 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00245 { 00246 AKSIMD_V4I32 vector; 00247 00248 AkInt16 *pVec1,*pVec2,*pVec3; 00249 pVec1 = (AkInt16*)&in_vec1; 00250 pVec2 = (AkInt16*)&in_vec2; 00251 pVec3 = (AkInt16*)&vector; 00252 00253 pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0; 00254 pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0; 00255 pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0; 00256 pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0; 00257 pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0; 00258 pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0; 00259 pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0; 00260 pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0; 00261 00262 return vector; 00263 } 00264 00266 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00267 { 00268 AKSIMD_V4UI32 vector; 00269 00270 vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0; 00271 vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0; 00272 vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0; 00273 vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0; 00274 00275 return vector; 00276 } 00277 00278 00279 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00280 { 00281 in_vector.m_data[0] <<= in_shiftBy; 00282 in_vector.m_data[1] <<= in_shiftBy; 00283 in_vector.m_data[2] <<= in_shiftBy; 00284 in_vector.m_data[3] <<= in_shiftBy; 00285 00286 return in_vector; 00287 } 00288 00289 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy) 00290 { 00291 in_vector.m_data[0] >>= in_shiftBy; 00292 in_vector.m_data[1] >>= in_shiftBy; 00293 in_vector.m_data[2] >>= in_shiftBy; 00294 in_vector.m_data[3] >>= in_shiftBy; 00295 00296 return in_vector; 00297 } 00298 00300 00301 00302 00305 00306 // _mm_sub_ps 00307 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00308 { 00309 AKSIMD_V4F32 vector; 00310 00311 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00312 vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1]; 00313 vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2]; 00314 vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3]; 00315 00316 return vector; 00317 } 00318 00322 00323 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00324 { 00325 AKSIMD_V4F32 vector; 00326 00327 vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0]; 00328 vector.m_data[1] = in_vec1.m_data[1]; 00329 vector.m_data[2] = in_vec1.m_data[2]; 00330 vector.m_data[3] = in_vec1.m_data[3]; 00331 00332 return vector; 00333 } 00334 00335 // _mm_add_ps 00336 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00337 { 00338 AKSIMD_V4F32 vector; 00339 00340 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00341 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00342 vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2]; 00343 vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3]; 00344 00345 return vector; 00346 } 00347 00348 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00349 { 00350 AKSIMD_V2F32 vector; 00351 00352 vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0]; 00353 vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1]; 00354 00355 return vector; 00356 } 00357 00361 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00362 { 00363 AKSIMD_V4F32 vector; 00364 00365 vector.m_data[0] = a.m_data[0] + b.m_data[0]; 00366 vector.m_data[1] = a.m_data[1]; 00367 vector.m_data[2] = a.m_data[2]; 00368 vector.m_data[3] = a.m_data[3]; 00369 00370 return vector; 00371 } 00372 00373 // _mm_mul_ps 00374 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00375 { 00376 AKSIMD_V4F32 vector; 00377 00378 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00379 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00380 vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2]; 00381 vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3]; 00382 00383 return vector; 00384 } 00385 00386 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00387 { 00388 AKSIMD_V2F32 vector; 00389 00390 vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0]; 00391 vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1]; 00392 00393 return vector; 00394 } 00395 00400 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b ) 00401 { 00402 AKSIMD_V4F32 vector; 00403 00404 vector.m_data[0] = a.m_data[0] * b.m_data[0]; 00405 vector.m_data[1] = a.m_data[1]; 00406 vector.m_data[2] = a.m_data[2]; 00407 vector.m_data[3] = a.m_data[3]; 00408 00409 return vector; 00410 } 00411 00413 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00414 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) 00415 00417 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) ) 00418 00419 // _mm_min_ps 00420 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00421 { 00422 AKSIMD_V4F32 vector; 00423 00424 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00425 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00426 vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]); 00427 vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]); 00428 00429 return vector; 00430 } 00431 00432 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00433 { 00434 AKSIMD_V2F32 vector; 00435 00436 vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]); 00437 vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]); 00438 00439 return vector; 00440 } 00441 00442 // _mm_max_ps 00443 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00444 { 00445 AKSIMD_V4F32 vector; 00446 00447 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00448 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00449 vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]); 00450 vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]); 00451 00452 return vector; 00453 } 00454 00455 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 ) 00456 { 00457 AKSIMD_V2F32 vector; 00458 00459 vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]); 00460 vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]); 00461 00462 return vector; 00463 } 00464 00465 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 ) 00466 { 00467 AKSIMD_V4F32 vector; 00468 vector.m_data[0] = fabs(in_vec1.m_data[0]); 00469 vector.m_data[1] = fabs(in_vec1.m_data[1]); 00470 vector.m_data[2] = fabs(in_vec1.m_data[2]); 00471 vector.m_data[3] = fabs(in_vec1.m_data[3]); 00472 return vector; 00473 } 00474 00475 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 ) 00476 { 00477 AKSIMD_V4F32 vector; 00478 vector.m_data[0] = -in_vec1.m_data[0]; 00479 vector.m_data[1] = -in_vec1.m_data[1]; 00480 vector.m_data[2] = -in_vec1.m_data[2]; 00481 vector.m_data[3] = -in_vec1.m_data[3]; 00482 return vector; 00483 } 00484 00485 // _mm_sqrt_ps 00486 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec ) 00487 { 00488 AKSIMD_V4F32 vCompare; 00489 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00490 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00491 AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,2) ); 00492 AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,3) ); 00493 00494 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00495 00496 return vCompare /*res*/; 00497 } 00498 00499 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec ) 00500 { 00501 AKSIMD_V2F32 vCompare; 00502 AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) ); 00503 AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) ); 00504 00505 //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) ); 00506 00507 return vCompare /*res*/; 00508 } 00509 00511 00512 00513 00516 00517 00518 // 00519 // _mm_unpacklo_epi16 00520 // r0 := a0 00521 // r1 := b0 00522 // r2 := a1 00523 // r3 := b1 00524 // r4 := a2 00525 // r5 := b2 00526 // r6 := a3 00527 // r7 := b3 00528 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00529 { 00530 AKSIMD_V4I32 vector; 00531 AkInt16 *pVec1,*pVec2,*pDest; 00532 pVec1 = (AkInt16*)&in_vec1; 00533 pVec2 = (AkInt16*)&in_vec2; 00534 pDest = (AkInt16*)&vector; 00535 00536 pDest[0] = pVec1[0]; 00537 pDest[1] = pVec2[0]; 00538 pDest[2] = pVec1[1]; 00539 pDest[3] = pVec2[1]; 00540 pDest[4] = pVec1[2]; 00541 pDest[5] = pVec2[2]; 00542 pDest[6] = pVec1[3]; 00543 pDest[7] = pVec2[3]; 00544 00545 return vector; 00546 } 00547 00548 // _mm_unpackhi_epi16 00549 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00550 { 00551 AKSIMD_V4I32 vector; 00552 AkInt16 *pVec1,*pVec2,*pDest; 00553 pVec1 = (AkInt16*)&in_vec1; 00554 pVec2 = (AkInt16*)&in_vec2; 00555 pDest = (AkInt16*)&vector; 00556 00557 pDest[0] = pVec1[4]; 00558 pDest[1] = pVec2[4]; 00559 pDest[2] = pVec1[5]; 00560 pDest[3] = pVec2[5]; 00561 pDest[4] = pVec1[6]; 00562 pDest[5] = pVec2[6]; 00563 pDest[6] = pVec1[7]; 00564 pDest[7] = pVec2[7]; 00565 00566 return vector; 00567 } 00568 00569 // _mm_unpacklo_ps 00570 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00571 { 00572 AKSIMD_V4F32 vector; 00573 vector.m_data[0] = in_vec1.m_data[0]; 00574 vector.m_data[1] = in_vec2.m_data[0]; 00575 vector.m_data[2] = in_vec1.m_data[1]; 00576 vector.m_data[3] = in_vec2.m_data[1]; 00577 00578 return vector; 00579 } 00580 00581 // _mm_unpackhi_ps 00582 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 00583 { 00584 AKSIMD_V4F32 vector; 00585 vector.m_data[0] = in_vec1.m_data[2]; 00586 vector.m_data[1] = in_vec2.m_data[2]; 00587 vector.m_data[2] = in_vec1.m_data[3]; 00588 vector.m_data[3] = in_vec2.m_data[3]; 00589 00590 return vector; 00591 } 00592 00593 // _mm_packs_epi32 00594 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 ) 00595 { 00596 AKSIMD_V4I32 vector; 00597 AkInt16 *pDest = (AkInt16*)&vector; 00598 00599 pDest[0] = AkClamp( in_vec1.m_data[0], -32768, 32767); 00600 pDest[1] = AkClamp( in_vec1.m_data[1], -32768, 32767); 00601 pDest[2] = AkClamp( in_vec1.m_data[2], -32768, 32767); 00602 pDest[3] = AkClamp( in_vec1.m_data[3], -32768, 32767); 00603 pDest[4] = AkClamp( in_vec2.m_data[0], -32768, 32767); 00604 pDest[5] = AkClamp( in_vec2.m_data[1], -32768, 32767); 00605 pDest[6] = AkClamp( in_vec2.m_data[2], -32768, 32767); 00606 pDest[7] = AkClamp( in_vec2.m_data[3], -32768, 32767); 00607 00608 return vector; 00609 } 00610 00612 00613 00614 00615 //#define AKSIMD_GET_ITEM( vec, index ) vec[index] 00616 00617 00618 00619 00622 00623 00624 // See _MM_SHUFFLE 00625 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \ 00626 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) 00627 00628 // See _mm_shuffle_ps 00629 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) ) 00630 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw ) 00631 00632 AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask ) 00633 { 00634 AKSIMD_V4F32 vector; 00635 vector.m_data[0] = xyzw.m_data[(mask) & 0x3]; 00636 vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3]; 00637 vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3]; 00638 vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3]; 00639 00640 return vector; 00641 } 00642 00643 00649 #define AKSIMD_MOVEHL_V4F32( a, b ) \ 00650 AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) ) 00651 00657 #define AKSIMD_MOVELH_V4F32( a, b ) \ 00658 AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) ) 00659 00661 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1)); 00662 00664 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2)); 00665 00667 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) 00668 00670 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) 00671 00672 00673 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h> 00674 00676 00677 00678 // Old AKSIMD -- will search-and-replace later 00679 #define AkReal32Vector AKSIMD_V4F32 00680 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ ) 00681 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v))) 00682 #define AKSIMD_MUL AKSIMD_MUL_V4F32 00683 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32 00684 00689 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec ) 00690 { 00691 AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec); 00692 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00693 vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55); 00694 vVec = AKSIMD_ADD_V4F32(vVec, vHighLow); 00695 } 00696 00698 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 ) 00699 { 00700 static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 00701 00702 AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 00703 vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 ); 00704 AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 00705 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign ); 00706 vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 ); 00707 vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 00708 vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 ); 00709 return vTmp2; 00710 } 00711 00712 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx)) 00713 00714 #endif //_AKSIMD_GENERIC_H_ 00715
이 페이지가 도움이 되었나요?
작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.
프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.
Wwise를 시작해 보세요