버전
menu_open
link

include/AK/SoundEngine/Platforms/Generic/AkSimd.h

Go to the documentation of this file.
00001 
00002 //
00003 // Copyright (c) 2006 Audiokinetic Inc. / All Rights Reserved
00004 //
00006 
00007 // AkSimd.h
00008 
00011 
00012 #ifndef _AKSIMD_GENERIC_H_
00013 #define _AKSIMD_GENERIC_H_
00014 
00015 #include <math.h>
00016 #include <string.h>
00017 #include <AK/SoundEngine/Common/AkTypes.h>
00018 #include <AK/Tools/Common/AkPlatformFuncs.h>
00019 
00022 
00023 typedef AkInt32 AKSIMD_I32;                                 
00024 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32;         
00025 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32;       
00026 typedef AkReal32 AKSIMD_F32;                                
00027 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32;        
00028 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32;        
00029 typedef AKSIMD_V4UI32   AKSIMD_V4COND;                      
00030 
00031 
00032 typedef struct { AkInt32 m_data[4]; }  __attribute__((__packed__)) AKSIMD_V4I32_UNALIGNED;      
00033 typedef struct { AkUInt32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4UI32_UNALIGNED;     
00034 typedef struct { AkReal32 m_data[2]; } __attribute__((__packed__)) AKSIMD_V2F32_UNALIGNED;      
00035 typedef struct { AkReal32 m_data[4]; } __attribute__((__packed__)) AKSIMD_V4F32_UNALIGNED;      
00036 
00038 
00039 
00040 #ifndef AKSIMD_GETELEMENT_V4F32
00041 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ )             (__vName).m_data[(__num__)]
00042 #endif
00043 
00044 #ifndef AKSIMD_GETELEMENT_V2F32
00045 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ )             (__vName).m_data[(__num__)]
00046 #endif
00047 
00048 #ifndef AKSIMD_GETELEMENT_V4I32
00049 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ )             (__vName).m_data[(__num__)]
00050 #endif
00051 
00054 
00055 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
00056 
00057 
00058 
00061 
00062 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData))
00063 
00064 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
00065 
00066 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
00067 
00068 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value )
00069 {
00070     AKSIMD_V4F32 vector;
00071     vector.m_data[0] = in_value;
00072     vector.m_data[1] = in_value;
00073     vector.m_data[2] = in_value;
00074     vector.m_data[3] = in_value;
00075     
00076     return vector;
00077 }
00078 
00079 // _mm_set_ps1
00080 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value )
00081 {
00082     AKSIMD_V4F32 vector;
00083     vector.m_data[0] = in_value;
00084     vector.m_data[1] = in_value;
00085     vector.m_data[2] = in_value;
00086     vector.m_data[3] = in_value;
00087     
00088     return vector;
00089 }
00090 
00091 
00092 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value )
00093 {
00094     AKSIMD_V2F32 vector;
00095     vector.m_data[0] = in_value;
00096     vector.m_data[1] = in_value;
00097     
00098     return vector;
00099 }
00100 
00101 // _mm_setzero_ps()
00102 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32()
00103 {
00104     AKSIMD_V4F32 vector;
00105     vector.m_data[0] = 0.f;
00106     vector.m_data[1] = 0.f;
00107     vector.m_data[2] = 0.f;
00108     vector.m_data[3] = 0.f;
00109     
00110     return vector;
00111 }
00112 
00113 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32()
00114 {
00115     AKSIMD_V2F32 vector;
00116     vector.m_data[0] = 0.f;
00117     vector.m_data[1] = 0.f;
00118     
00119     return vector;
00120 }
00121 // _mm_setzero_si128()
00122 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32()
00123 {
00124     AKSIMD_V4I32 vector;
00125     vector.m_data[0] = 0;
00126     vector.m_data[1] = 0;
00127     vector.m_data[2] = 0;
00128     vector.m_data[3] = 0;
00129     
00130     return vector;
00131 }
00132 
00133 
00137 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData )
00138 {
00139     AKSIMD_V4F32 vector;
00140     vector.m_data[0] = *in_pData;
00141     vector.m_data[1] = 0.f;
00142     vector.m_data[2] = 0.f;
00143     vector.m_data[3] = 0.f;
00144     
00145     return vector;
00146 }
00147 
00149 
00150 
00153 
00154 
00155 // _mm_storeu_ps -- The address does not need to be 16-byte aligned.
00156 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec)
00157 
00158 // _mm_store_ps -- The address must be 16-byte aligned.
00159 // ????? _mm_storeu_ps vs _mm_store_ps ?????
00160 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__)
00161 
00162 // _mm_storeu_si128
00163 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec)
00164 
00167 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec )
00168 {
00169     ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0];
00170 }
00171 
00173 
00174 
00177 
00178 
00179 // _mm_cvtepi32_ps
00180 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from )
00181 {
00182     AKSIMD_V4F32 vector;
00183     vector.m_data[0] = (AkReal32)in_from.m_data[0];
00184     vector.m_data[1] = (AkReal32)in_from.m_data[1];
00185     vector.m_data[2] = (AkReal32)in_from.m_data[2];
00186     vector.m_data[3] = (AkReal32)in_from.m_data[3];
00187     
00188     return vector;
00189 }
00190 // _mm_cvtps_epi32
00191 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from )
00192 {
00193     AKSIMD_V4I32 vector;
00194     vector.m_data[0] = (AkInt32)in_from.m_data[0];
00195     vector.m_data[1] = (AkInt32)in_from.m_data[1];
00196     vector.m_data[2] = (AkInt32)in_from.m_data[2];
00197     vector.m_data[3] = (AkInt32)in_from.m_data[3];
00198     
00199     return vector;
00200 }
00201 
00203 
00204 
00207 
00208 
00209 // _mm_and_si128
00210 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00211 {
00212     AKSIMD_V4I32 vector;
00213     vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0];
00214     vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1];
00215     vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2];
00216     vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3];
00217     
00218     return vector;
00219 }
00220 
00223 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00224 {
00225     AKSIMD_V4I32 vector;
00226     
00227     AkInt16 *pVec1,*pVec2,*pVec3;
00228     pVec1 = (AkInt16*)&in_vec1;
00229     pVec2 = (AkInt16*)&in_vec2;
00230     pVec3 = (AkInt16*)&vector;
00231     
00232     pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0;
00233     pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0;
00234     pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0;
00235     pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0;
00236     pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0;
00237     pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0;
00238     pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0;
00239     pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0;
00240 
00241     return vector;
00242 }
00243 
00245 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00246 {
00247     AKSIMD_V4UI32 vector;
00248     
00249     vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0;
00250     vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0;
00251     vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0;
00252     vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0;
00253     
00254     return vector;
00255 }
00256 
00257 
00258 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
00259 {
00260     in_vector.m_data[0] <<= in_shiftBy;
00261     in_vector.m_data[1] <<= in_shiftBy;
00262     in_vector.m_data[2] <<= in_shiftBy;
00263     in_vector.m_data[3] <<= in_shiftBy;
00264     
00265     return in_vector;
00266 }
00267 
00268 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
00269 {
00270     in_vector.m_data[0] >>= in_shiftBy;
00271     in_vector.m_data[1] >>= in_shiftBy;
00272     in_vector.m_data[2] >>= in_shiftBy;
00273     in_vector.m_data[3] >>= in_shiftBy;
00274     
00275     return in_vector;
00276 }
00277 
00279 
00280 
00281 
00284 
00285 // _mm_sub_ps
00286 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00287 {
00288     AKSIMD_V4F32 vector;
00289     
00290     vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
00291     vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1];
00292     vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2];
00293     vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3];
00294     
00295     return vector;
00296 }
00297 
00301 
00302 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00303 {
00304     AKSIMD_V4F32 vector;
00305     
00306     vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
00307     vector.m_data[1] = in_vec1.m_data[1];
00308     vector.m_data[2] = in_vec1.m_data[2];
00309     vector.m_data[3] = in_vec1.m_data[3];
00310     
00311     return vector;
00312 }
00313 
00314 // _mm_add_ps
00315 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00316 {
00317     AKSIMD_V4F32 vector;
00318     
00319     vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
00320     vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
00321     vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2];
00322     vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3];
00323     
00324     return vector;
00325 }
00326 
00327 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00328 {
00329     AKSIMD_V2F32 vector;
00330     
00331     vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
00332     vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
00333     
00334     return vector;
00335 }
00336 
00340 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
00341 {
00342     AKSIMD_V4F32 vector;
00343     
00344     vector.m_data[0] = a.m_data[0] + b.m_data[0];
00345     vector.m_data[1] = a.m_data[1];
00346     vector.m_data[2] = a.m_data[2];
00347     vector.m_data[3] = a.m_data[3];
00348     
00349     return vector;
00350 }
00351 
00352 // _mm_mul_ps
00353 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00354 {
00355     AKSIMD_V4F32 vector;
00356     
00357     vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
00358     vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
00359     vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2];
00360     vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3];
00361     
00362     return vector;
00363 }
00364 
00365 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00366 {
00367     AKSIMD_V2F32 vector;
00368     
00369     vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
00370     vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
00371     
00372     return vector;
00373 }
00374 
00379 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
00380 {
00381     AKSIMD_V4F32 vector;
00382     
00383     vector.m_data[0] = a.m_data[0] * b.m_data[0];
00384     vector.m_data[1] = a.m_data[1];
00385     vector.m_data[2] = a.m_data[2];
00386     vector.m_data[3] = a.m_data[3];
00387     
00388     return vector;
00389 }
00390 
00392 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
00393 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
00394 
00396 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) )
00397 
00398 // _mm_min_ps
00399 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00400 {
00401     AKSIMD_V4F32 vector;
00402     
00403     vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
00404     vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
00405     vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]);
00406     vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]);
00407     
00408     return vector;
00409 }
00410 
00411 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00412 {
00413     AKSIMD_V2F32 vector;
00414     
00415     vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
00416     vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
00417     
00418     return vector;
00419 }
00420 
00421 // _mm_max_ps
00422 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00423 {
00424     AKSIMD_V4F32 vector;
00425     
00426     vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
00427     vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
00428     vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]);
00429     vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]);
00430     
00431     return vector;
00432 }
00433 
00434 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00435 {
00436     AKSIMD_V2F32 vector;
00437     
00438     vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
00439     vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
00440     
00441     return vector;
00442 }
00443 
00444 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 )
00445 {
00446     AKSIMD_V4F32 vector;
00447     vector.m_data[0] = fabs(in_vec1.m_data[0]);
00448     vector.m_data[1] = fabs(in_vec1.m_data[1]);
00449     vector.m_data[2] = fabs(in_vec1.m_data[2]);
00450     vector.m_data[3] = fabs(in_vec1.m_data[3]);
00451     return vector;
00452 }
00453 
00454 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 )
00455 {
00456     AKSIMD_V4F32 vector;
00457     vector.m_data[0] = -in_vec1.m_data[0];
00458     vector.m_data[1] = -in_vec1.m_data[1];
00459     vector.m_data[2] = -in_vec1.m_data[2];
00460     vector.m_data[3] = -in_vec1.m_data[3];
00461     return vector;
00462 }
00463 
00464 // _mm_sqrt_ps
00465 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec )
00466 {
00467         AKSIMD_V4F32 vCompare;
00468         AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
00469         AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
00470         AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,2) );
00471         AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,3) );
00472 
00473         //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
00474 
00475         return vCompare /*res*/;
00476 }
00477 
00478 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec )
00479 {
00480     AKSIMD_V2F32 vCompare;
00481     AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
00482     AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
00483     
00484     //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
00485     
00486     return vCompare /*res*/;
00487 }
00488 
00490 
00491 
00492 
00495 
00496 
00497 //
00498 // _mm_unpacklo_epi16
00499 // r0 := a0
00500 // r1 := b0
00501 // r2 := a1
00502 // r3 := b1
00503 // r4 := a2
00504 // r5 := b2
00505 // r6 := a3
00506 // r7 := b3
00507 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00508 {
00509     AKSIMD_V4I32 vector;
00510     AkInt16 *pVec1,*pVec2,*pDest;
00511     pVec1 = (AkInt16*)&in_vec1;
00512     pVec2 = (AkInt16*)&in_vec2;
00513     pDest = (AkInt16*)&vector;
00514     
00515     pDest[0] = pVec1[0];
00516     pDest[1] = pVec2[0];    
00517     pDest[2] = pVec1[1];    
00518     pDest[3] = pVec2[1];
00519     pDest[4] = pVec1[2];
00520     pDest[5] = pVec2[2];
00521     pDest[6] = pVec1[3];
00522     pDest[7] = pVec2[3];
00523     
00524     return vector;
00525 }
00526 
00527 // _mm_unpackhi_epi16
00528 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00529 {
00530     AKSIMD_V4I32 vector;
00531     AkInt16 *pVec1,*pVec2,*pDest;
00532     pVec1 = (AkInt16*)&in_vec1;
00533     pVec2 = (AkInt16*)&in_vec2;
00534     pDest = (AkInt16*)&vector;
00535     
00536     pDest[0] = pVec1[4];
00537     pDest[1] = pVec2[4];    
00538     pDest[2] = pVec1[5];    
00539     pDest[3] = pVec2[5];
00540     pDest[4] = pVec1[6];
00541     pDest[5] = pVec2[6];
00542     pDest[6] = pVec1[7];
00543     pDest[7] = pVec2[7];
00544     
00545     return vector;
00546 }
00547 
00548 // _mm_unpacklo_ps
00549 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00550 {
00551     AKSIMD_V4F32 vector;
00552     vector.m_data[0] = in_vec1.m_data[0];
00553     vector.m_data[1] = in_vec2.m_data[0];
00554     vector.m_data[2] = in_vec1.m_data[1];
00555     vector.m_data[3] = in_vec2.m_data[1];
00556     
00557     return vector;
00558 }
00559 
00560 // _mm_unpackhi_ps
00561 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00562 {
00563     AKSIMD_V4F32 vector;
00564     vector.m_data[0] = in_vec1.m_data[2];
00565     vector.m_data[1] = in_vec2.m_data[2];
00566     vector.m_data[2] = in_vec1.m_data[3];
00567     vector.m_data[3] = in_vec2.m_data[3];
00568     
00569     return vector;
00570 }
00571 
00572 // _mm_packs_epi32
00573 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00574 {
00575     AKSIMD_V4I32 vector;
00576     AkInt16 *pDest = (AkInt16*)&vector;
00577     
00578     pDest[0] = AkClamp( in_vec1.m_data[0], -32768, 32767);
00579     pDest[1] = AkClamp( in_vec1.m_data[1], -32768, 32767);  
00580     pDest[2] = AkClamp( in_vec1.m_data[2], -32768, 32767);  
00581     pDest[3] = AkClamp( in_vec1.m_data[3], -32768, 32767);
00582     pDest[4] = AkClamp( in_vec2.m_data[0], -32768, 32767);
00583     pDest[5] = AkClamp( in_vec2.m_data[1], -32768, 32767);
00584     pDest[6] = AkClamp( in_vec2.m_data[2], -32768, 32767);
00585     pDest[7] = AkClamp( in_vec2.m_data[3], -32768, 32767);
00586     
00587     return vector;
00588 }
00589 
00591 
00592 
00593 
00594 //#define AKSIMD_GET_ITEM( vec, index ) vec[index]
00595 
00596 
00597 
00598 
00601 
00602 
00603 // See _MM_SHUFFLE
00604 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \
00605     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
00606 
00607 // See _mm_shuffle_ps
00608 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) )
00609 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw )
00610 
00611  AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask )
00612 {
00613     AKSIMD_V4F32 vector;
00614     vector.m_data[0] = xyzw.m_data[(mask) & 0x3];
00615     vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3];
00616     vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3];
00617     vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3];
00618     
00619     return vector;
00620 }
00621 
00622 
00628 #define AKSIMD_MOVEHL_V4F32( a, b ) \
00629     AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) )
00630 
00636 #define AKSIMD_MOVELH_V4F32( a, b ) \
00637     AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) )
00638 
00640 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1));
00641 
00643 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2));
00644 
00646 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
00647 
00649 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
00650 
00651 
00652 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h>
00653 
00655 
00656 
00657 // Old AKSIMD -- will search-and-replace later
00658 #define AkReal32Vector AKSIMD_V4F32
00659 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ )
00660 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v)))
00661 #define AKSIMD_MUL AKSIMD_MUL_V4F32
00662 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32
00663 
00668 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec )
00669 {   
00670     AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec);
00671     vVec = AKSIMD_ADD_V4F32(vVec, vHighLow);
00672     vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55);
00673     vVec = AKSIMD_ADD_V4F32(vVec, vHighLow);
00674 } 
00675 
00677 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
00678 {
00679     static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 
00680 
00681     AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 
00682     vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
00683     AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 
00684     vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign );
00685     vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 );
00686     vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 
00687     vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 );
00688     return vTmp2;
00689 }
00690 
00691 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
00692 
00693 #endif //_AKSIMD_GENERIC_H_
00694 

이 페이지가 도움이 되었나요?

지원이 필요하신가요?

질문이 있으신가요? 문제를 겪고 계신가요? 더 많은 정보가 필요하신가요? 저희에게 문의해주시면 도와드리겠습니다!

지원 페이지를 방문해 주세요

작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.

프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.

Wwise를 시작해 보세요