Version
menu_open
link

include/AK/SoundEngine/Platforms/Generic/AkSimd.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002 The content of this file includes portions of the AUDIOKINETIC Wwise Technology
00003 released in source code form as part of the SDK installer package.
00004 
00005 Commercial License Usage
00006 
00007 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology
00008 may use this file in accordance with the end user license agreement provided 
00009 with the software or, alternatively, in accordance with the terms contained in a
00010 written agreement between you and Audiokinetic Inc.
00011 
00012 Apache License Usage
00013 
00014 Alternatively, this file may be used under the Apache License, Version 2.0 (the 
00015 "Apache License"); you may not use this file except in compliance with the 
00016 Apache License. You may obtain a copy of the Apache License at 
00017 http://www.apache.org/licenses/LICENSE-2.0.
00018 
00019 Unless required by applicable law or agreed to in writing, software distributed
00020 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
00021 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for
00022 the specific language governing permissions and limitations under the License.
00023 
00024   Version: <VERSION>  Build: <BUILDNUMBER>
00025   Copyright (c) <COPYRIGHTYEAR> Audiokinetic Inc.
00026 *******************************************************************************/
00027 
00028 // AkSimd.h
00029 
00030 /// \file 
00031 /// AKSIMD - Generic (no SIMD support) implementation
00032 
00033 #ifndef _AKSIMD_GENERIC_H_
00034 #define _AKSIMD_GENERIC_H_
00035 
00036 #include <math.h>
00037 #include <string.h>
00038 #include <AK/SoundEngine/Common/AkTypes.h>
00039 #include <AK/Tools/Common/AkPlatformFuncs.h>
00040 
00041 ////////////////////////////////////////////////////////////////////////
00042 /// @name AKSIMD types
00043 //@{
00044 typedef AkInt32 AKSIMD_I32;                                 ///< 32-bit signed integer
00045 typedef struct { AkInt32 m_data[4]; } AKSIMD_V4I32;         ///< Vector of 4 32-bit signed integers
00046 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32;       ///< Vector of 4 32-bit signed integers
00047 typedef AkReal32 AKSIMD_F32;                                ///< 32-bit float
00048 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32;        ///< Vector of 2 32-bit floats
00049 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32;        ///< Vector of 4 32-bit floats
00050 typedef AKSIMD_V4UI32   AKSIMD_V4COND;                      ///< Vector of 4 comparison results
00051 
00052 #pragma pack(push,1)
00053 typedef struct { AkInt32 m_data[4]; }  AKSIMD_V4I32_UNALIGNED;      ///< Unaligned Vector of 4 32-bit signed integers
00054 typedef struct { AkUInt32 m_data[4]; } AKSIMD_V4UI32_UNALIGNED;     ///< Unaligned Vector of 4 32-bit signed integers
00055 typedef struct { AkReal32 m_data[2]; } AKSIMD_V2F32_UNALIGNED;      ///< Unaligned Vector of 2 32-bit floats
00056 typedef struct { AkReal32 m_data[4]; } AKSIMD_V4F32_UNALIGNED;      ///< Unaligned Vector of 4 32-bit floats
00057 #pragma pack(pop)
00058 
00059 //@}
00060 ////////////////////////////////////////////////////////////////////////
00061 
00062 #ifndef AKSIMD_GETELEMENT_V4F32
00063 #define AKSIMD_GETELEMENT_V4F32( __vName, __num__ )             (__vName).m_data[(__num__)]
00064 #endif
00065 
00066 #ifndef AKSIMD_GETELEMENT_V2F32
00067 #define AKSIMD_GETELEMENT_V2F32( __vName, __num__ )             (__vName).m_data[(__num__)]
00068 #endif
00069 
00070 #ifndef AKSIMD_GETELEMENT_V4I32
00071 #define AKSIMD_GETELEMENT_V4I32( __vName, __num__ )             (__vName).m_data[(__num__)]
00072 #endif
00073 
00074 ////////////////////////////////////////////////////////////////////////
00075 /// @name Platform specific memory size alignment for allocation purposes
00076 //@{
00077 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
00078 //@}
00079 ////////////////////////////////////////////////////////////////////////
00080 
00081 ////////////////////////////////////////////////////////////////////////
00082 /// @name AKSIMD loading / setting
00083 //@{
00084 #define AKSIMD_LOADU_V4I32( in_pData ) (*(in_pData))
00085 
00086 #define AKSIMD_LOADU_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
00087 
00088 #define AKSIMD_LOAD_V4F32( in_pValue ) (*(AKSIMD_V4F32*)(in_pValue))
00089 
00090 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD1_V4F32( AKSIMD_F32 in_value )
00091 {
00092     AKSIMD_V4F32 vector;
00093     vector.m_data[0] = in_value;
00094     vector.m_data[1] = in_value;
00095     vector.m_data[2] = in_value;
00096     vector.m_data[3] = in_value;
00097     
00098     return vector;
00099 }
00100 
00101 // _mm_set_ps1
00102 AkForceInline AKSIMD_V4F32 AKSIMD_SET_V4F32( AKSIMD_F32 in_value )
00103 {
00104     AKSIMD_V4F32 vector;
00105     vector.m_data[0] = in_value;
00106     vector.m_data[1] = in_value;
00107     vector.m_data[2] = in_value;
00108     vector.m_data[3] = in_value;
00109     
00110     return vector;
00111 }
00112 
00113 
00114 AkForceInline AKSIMD_V2F32 AKSIMD_SET_V2F32( AKSIMD_F32 in_value )
00115 {
00116     AKSIMD_V2F32 vector;
00117     vector.m_data[0] = in_value;
00118     vector.m_data[1] = in_value;
00119     
00120     return vector;
00121 }
00122 
00123 // _mm_setzero_ps()
00124 AkForceInline AKSIMD_V4F32 AKSIMD_SETZERO_V4F32()
00125 {
00126     AKSIMD_V4F32 vector;
00127     vector.m_data[0] = 0.f;
00128     vector.m_data[1] = 0.f;
00129     vector.m_data[2] = 0.f;
00130     vector.m_data[3] = 0.f;
00131     
00132     return vector;
00133 }
00134 
00135 AkForceInline AKSIMD_V2F32 AKSIMD_SETZERO_V2F32()
00136 {
00137     AKSIMD_V2F32 vector;
00138     vector.m_data[0] = 0.f;
00139     vector.m_data[1] = 0.f;
00140     
00141     return vector;
00142 }
00143 // _mm_setzero_si128()
00144 AkForceInline AKSIMD_V4I32 AKSIMD_SETZERO_V4I32()
00145 {
00146     AKSIMD_V4I32 vector;
00147     vector.m_data[0] = 0;
00148     vector.m_data[1] = 0;
00149     vector.m_data[2] = 0;
00150     vector.m_data[3] = 0;
00151     
00152     return vector;
00153 }
00154 
00155 
00156 /// Loads a single-precision, floating-point value into the low word
00157 /// and clears the upper three words.
00158 /// r0 := *p; r1 := 0.0 ; r2 := 0.0 ; r3 := 0.0 (see _mm_load_ss)
00159 AkForceInline AKSIMD_V4F32 AKSIMD_LOAD_SS_V4F32( const AKSIMD_F32* in_pData )
00160 {
00161     AKSIMD_V4F32 vector;
00162     vector.m_data[0] = *in_pData;
00163     vector.m_data[1] = 0.f;
00164     vector.m_data[2] = 0.f;
00165     vector.m_data[3] = 0.f;
00166     
00167     return vector;
00168 }
00169 
00170 //@}
00171 ////////////////////////////////////////////////////////////////////////
00172 
00173 ////////////////////////////////////////////////////////////////////////
00174 /// @name AKSIMD storing
00175 //@{
00176 
00177 // _mm_storeu_ps -- The address does not need to be 16-byte aligned.
00178 #define AKSIMD_STOREU_V4F32( in_pTo, in_vec ) (*(AKSIMD_V4F32*)(in_pTo)) = (in_vec)
00179 
00180 // _mm_store_ps -- The address must be 16-byte aligned.
00181 // ????? _mm_storeu_ps vs _mm_store_ps ?????
00182 #define AKSIMD_STORE_V4F32( __addr__, __vName__ ) AKSIMD_STOREU_V4F32(__addr__, __vName__)
00183 
00184 // _mm_storeu_si128
00185 #define AKSIMD_STOREU_V4I32( in_pTo, in_vec ) (*(AKSIMD_V4I32*)(in_pTo)) = (in_vec)
00186 
00187 /// Stores the lower single-precision, floating-point value.
00188 /// *p := a0 (see _mm_store_ss)
00189 AkForceInline void AKSIMD_STORE1_V4F32( AKSIMD_F32* in_pTo, const AKSIMD_V4F32& in_vec )
00190 {
00191     ((AKSIMD_V4F32*)in_pTo)->m_data[0] = in_vec.m_data[0];
00192 }
00193 
00194 //@}
00195 ////////////////////////////////////////////////////////////////////////
00196 
00197 ////////////////////////////////////////////////////////////////////////
00198 /// @name AKSIMD conversion
00199 //@{
00200 
00201 // _mm_cvtepi32_ps
00202 AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4I32_TO_V4F32( const AKSIMD_V4I32& in_from )
00203 {
00204     AKSIMD_V4F32 vector;
00205     vector.m_data[0] = (AkReal32)in_from.m_data[0];
00206     vector.m_data[1] = (AkReal32)in_from.m_data[1];
00207     vector.m_data[2] = (AkReal32)in_from.m_data[2];
00208     vector.m_data[3] = (AkReal32)in_from.m_data[3];
00209     
00210     return vector;
00211 }
00212 // _mm_cvtps_epi32
00213 AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4I32( const AKSIMD_V4F32& in_from )
00214 {
00215     AKSIMD_V4I32 vector;
00216     vector.m_data[0] = (AkInt32)in_from.m_data[0];
00217     vector.m_data[1] = (AkInt32)in_from.m_data[1];
00218     vector.m_data[2] = (AkInt32)in_from.m_data[2];
00219     vector.m_data[3] = (AkInt32)in_from.m_data[3];
00220     
00221     return vector;
00222 }
00223 
00224 //@}
00225 ////////////////////////////////////////////////////////////////////////
00226 
00227 ////////////////////////////////////////////////////////////////////////
00228 /// @name AKSIMD logical operations
00229 //@{
00230 
00231 // _mm_and_si128
00232 AkForceInline AKSIMD_V4I32 AKSIMD_AND_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00233 {
00234     AKSIMD_V4I32 vector;
00235     vector.m_data[0] = in_vec1.m_data[0] & in_vec2.m_data[0];
00236     vector.m_data[1] = in_vec1.m_data[1] & in_vec2.m_data[1];
00237     vector.m_data[2] = in_vec1.m_data[2] & in_vec2.m_data[2];
00238     vector.m_data[3] = in_vec1.m_data[3] & in_vec2.m_data[3];
00239     
00240     return vector;
00241 }
00242 
00243 /// Compares the 8 signed 16-bit integers in a and the 8 signed
00244 /// 16-bit integers in b for greater than (see _mm_cmpgt_epi16)
00245 AkForceInline AKSIMD_V4I32 AKSIMD_CMPGT_V8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00246 {
00247     AKSIMD_V4I32 vector;
00248     
00249     AkInt16 *pVec1,*pVec2,*pVec3;
00250     pVec1 = (AkInt16*)&in_vec1;
00251     pVec2 = (AkInt16*)&in_vec2;
00252     pVec3 = (AkInt16*)&vector;
00253     
00254     pVec3[0] = (pVec1[0] > pVec2[0]) ? 0xffff : 0x0;
00255     pVec3[1] = (pVec1[1] > pVec2[1]) ? 0xffff : 0x0;
00256     pVec3[2] = (pVec1[2] > pVec2[2]) ? 0xffff : 0x0;
00257     pVec3[3] = (pVec1[3] > pVec2[3]) ? 0xffff : 0x0;
00258     pVec3[4] = (pVec1[4] > pVec2[4]) ? 0xffff : 0x0;
00259     pVec3[5] = (pVec1[5] > pVec2[5]) ? 0xffff : 0x0;
00260     pVec3[6] = (pVec1[6] > pVec2[6]) ? 0xffff : 0x0;
00261     pVec3[7] = (pVec1[7] > pVec2[7]) ? 0xffff : 0x0;
00262 
00263     return vector;
00264 }
00265 
00266 /// Compares for less than or equal (see _mm_cmple_ps)
00267 AkForceInline AKSIMD_V4UI32 AKSIMD_CMPLE_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00268 {
00269     AKSIMD_V4UI32 vector;
00270     
00271     vector.m_data[0] = (in_vec1.m_data[0] <= in_vec2.m_data[0]) ? 0xffffffff : 0x0;
00272     vector.m_data[1] = (in_vec1.m_data[1] <= in_vec2.m_data[1]) ? 0xffffffff : 0x0;
00273     vector.m_data[2] = (in_vec1.m_data[2] <= in_vec2.m_data[2]) ? 0xffffffff : 0x0;
00274     vector.m_data[3] = (in_vec1.m_data[3] <= in_vec2.m_data[3]) ? 0xffffffff : 0x0;
00275     
00276     return vector;
00277 }
00278 
00279 AkForceInline AKSIMD_V4F32 AKSIMD_XOR_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00280 {
00281     AKSIMD_V4F32 vector;
00282     
00283     vector.m_data[0] = (AkReal32)(((AkUInt32)in_vec1.m_data[0]) ^ ((AkUInt32)in_vec2.m_data[0]));
00284     vector.m_data[1] = (AkReal32)(((AkUInt32)in_vec1.m_data[1]) ^ ((AkUInt32)in_vec2.m_data[1]));
00285     vector.m_data[2] = (AkReal32)(((AkUInt32)in_vec1.m_data[2]) ^ ((AkUInt32)in_vec2.m_data[2]));
00286     vector.m_data[3] = (AkReal32)(((AkUInt32)in_vec1.m_data[3]) ^ ((AkUInt32)in_vec2.m_data[3]));
00287     
00288     return vector;
00289 }
00290 
00291 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTLEFT_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
00292 {
00293     in_vector.m_data[0] <<= in_shiftBy;
00294     in_vector.m_data[1] <<= in_shiftBy;
00295     in_vector.m_data[2] <<= in_shiftBy;
00296     in_vector.m_data[3] <<= in_shiftBy;
00297     
00298     return in_vector;
00299 }
00300 
00301 AkForceInline AKSIMD_V4I32 AKSIMD_SHIFTRIGHTARITH_V4I32( AKSIMD_V4I32 in_vector, int in_shiftBy)
00302 {
00303     in_vector.m_data[0] >>= in_shiftBy;
00304     in_vector.m_data[1] >>= in_shiftBy;
00305     in_vector.m_data[2] >>= in_shiftBy;
00306     in_vector.m_data[3] >>= in_shiftBy;
00307     
00308     return in_vector;
00309 }
00310 
00311 //@}
00312 ////////////////////////////////////////////////////////////////////////
00313 
00314 
00315 ////////////////////////////////////////////////////////////////////////
00316 /// @name AKSIMD arithmetic
00317 //@{
00318 // _mm_sub_ps
00319 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00320 {
00321     AKSIMD_V4F32 vector;
00322     
00323     vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
00324     vector.m_data[1] = in_vec1.m_data[1] - in_vec2.m_data[1];
00325     vector.m_data[2] = in_vec1.m_data[2] - in_vec2.m_data[2];
00326     vector.m_data[3] = in_vec1.m_data[3] - in_vec2.m_data[3];
00327     
00328     return vector;
00329 }
00330 
00331 /// Subtracts the lower single-precision, floating-point values of a and b.
00332 /// The upper three single-precision, floating-point values are passed through from a.
00333 /// r0 := a0 - b0 ; r1 := a1 ; r2 := a2 ; r3 := a3 (see _mm_sub_ss)
00334 
00335 AkForceInline AKSIMD_V4F32 AKSIMD_SUB_SS_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00336 {
00337     AKSIMD_V4F32 vector;
00338     
00339     vector.m_data[0] = in_vec1.m_data[0] - in_vec2.m_data[0];
00340     vector.m_data[1] = in_vec1.m_data[1];
00341     vector.m_data[2] = in_vec1.m_data[2];
00342     vector.m_data[3] = in_vec1.m_data[3];
00343     
00344     return vector;
00345 }
00346 
00347 // _mm_add_ps
00348 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00349 {
00350     AKSIMD_V4F32 vector;
00351     
00352     vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
00353     vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
00354     vector.m_data[2] = in_vec1.m_data[2] + in_vec2.m_data[2];
00355     vector.m_data[3] = in_vec1.m_data[3] + in_vec2.m_data[3];
00356     
00357     return vector;
00358 }
00359 
00360 AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 ) 
00361 {
00362     AKSIMD_V4F32 vector;
00363     
00364     vector.m_data[0] = in_vec1.m_data[0] / in_vec2.m_data[0];
00365     vector.m_data[1] = in_vec1.m_data[1] / in_vec2.m_data[1];
00366     vector.m_data[2] = in_vec1.m_data[2] / in_vec2.m_data[2];
00367     vector.m_data[3] = in_vec1.m_data[3] / in_vec2.m_data[3];
00368     
00369     return vector;
00370 }
00371 
00372 AkForceInline AKSIMD_V2F32 AKSIMD_ADD_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00373 {
00374     AKSIMD_V2F32 vector;
00375     
00376     vector.m_data[0] = in_vec1.m_data[0] + in_vec2.m_data[0];
00377     vector.m_data[1] = in_vec1.m_data[1] + in_vec2.m_data[1];
00378     
00379     return vector;
00380 }
00381 
00382 /// Adds the lower single-precision, floating-point values of a and b; the
00383 /// upper three single-precision, floating-point values are passed through from a.
00384 /// r0 := a0 + b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
00385 AkForceInline AKSIMD_V4F32 AKSIMD_ADD_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
00386 {
00387     AKSIMD_V4F32 vector;
00388     
00389     vector.m_data[0] = a.m_data[0] + b.m_data[0];
00390     vector.m_data[1] = a.m_data[1];
00391     vector.m_data[2] = a.m_data[2];
00392     vector.m_data[3] = a.m_data[3];
00393     
00394     return vector;
00395 }
00396 
00397 // _mm_mul_ps
00398 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00399 {
00400     AKSIMD_V4F32 vector;
00401     
00402     vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
00403     vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
00404     vector.m_data[2] = in_vec1.m_data[2] * in_vec2.m_data[2];
00405     vector.m_data[3] = in_vec1.m_data[3] * in_vec2.m_data[3];
00406     
00407     return vector;
00408 }
00409 
00410 AkForceInline AKSIMD_V2F32 AKSIMD_MUL_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00411 {
00412     AKSIMD_V2F32 vector;
00413     
00414     vector.m_data[0] = in_vec1.m_data[0] * in_vec2.m_data[0];
00415     vector.m_data[1] = in_vec1.m_data[1] * in_vec2.m_data[1];
00416     
00417     return vector;
00418 }
00419 
00420 /// Multiplies the lower single-precision, floating-point values of
00421 /// a and b; the upper three single-precision, floating-point values
00422 /// are passed through from a.
00423 /// r0 := a0 * b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
00424 AkForceInline AKSIMD_V4F32 AKSIMD_MUL_SS_V4F32( const AKSIMD_V4F32& a, const AKSIMD_V4F32& b )
00425 {
00426     AKSIMD_V4F32 vector;
00427     
00428     vector.m_data[0] = a.m_data[0] * b.m_data[0];
00429     vector.m_data[1] = a.m_data[1];
00430     vector.m_data[2] = a.m_data[2];
00431     vector.m_data[3] = a.m_data[3];
00432     
00433     return vector;
00434 }
00435 
00436 /// Vector multiply-add operation.
00437 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
00438 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
00439 
00440 /// Vector multiply-add operation.
00441 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) AKSIMD_ADD_SS_V4F32( AKSIMD_MUL_SS_V4F32( (__a__), (__b__) ), (__c__) )
00442 
00443 // _mm_min_ps
00444 AkForceInline AKSIMD_V4F32 AKSIMD_MIN_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00445 {
00446     AKSIMD_V4F32 vector;
00447     
00448     vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
00449     vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
00450     vector.m_data[2] = AkMin(in_vec1.m_data[2], in_vec2.m_data[2]);
00451     vector.m_data[3] = AkMin(in_vec1.m_data[3], in_vec2.m_data[3]);
00452     
00453     return vector;
00454 }
00455 
00456 AkForceInline AKSIMD_V2F32 AKSIMD_MIN_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00457 {
00458     AKSIMD_V2F32 vector;
00459     
00460     vector.m_data[0] = AkMin(in_vec1.m_data[0], in_vec2.m_data[0]);
00461     vector.m_data[1] = AkMin(in_vec1.m_data[1], in_vec2.m_data[1]);
00462     
00463     return vector;
00464 }
00465 
00466 // _mm_max_ps
00467 AkForceInline AKSIMD_V4F32 AKSIMD_MAX_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00468 {
00469     AKSIMD_V4F32 vector;
00470     
00471     vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
00472     vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
00473     vector.m_data[2] = AkMax(in_vec1.m_data[2], in_vec2.m_data[2]);
00474     vector.m_data[3] = AkMax(in_vec1.m_data[3], in_vec2.m_data[3]);
00475     
00476     return vector;
00477 }
00478 
00479 AkForceInline AKSIMD_V2F32 AKSIMD_MAX_V2F32( const AKSIMD_V2F32& in_vec1, const AKSIMD_V2F32& in_vec2 )
00480 {
00481     AKSIMD_V2F32 vector;
00482     
00483     vector.m_data[0] = AkMax(in_vec1.m_data[0], in_vec2.m_data[0]);
00484     vector.m_data[1] = AkMax(in_vec1.m_data[1], in_vec2.m_data[1]);
00485     
00486     return vector;
00487 }
00488 
00489 AkForceInline AKSIMD_V4F32 AKSIMD_ABS_V4F32( const AKSIMD_V4F32& in_vec1 )
00490 {
00491     AKSIMD_V4F32 vector;
00492     vector.m_data[0] = fabs(in_vec1.m_data[0]);
00493     vector.m_data[1] = fabs(in_vec1.m_data[1]);
00494     vector.m_data[2] = fabs(in_vec1.m_data[2]);
00495     vector.m_data[3] = fabs(in_vec1.m_data[3]);
00496     return vector;
00497 }
00498 
00499 AkForceInline AKSIMD_V4F32 AKSIMD_NEG_V4F32( const AKSIMD_V4F32& in_vec1 )
00500 {
00501     AKSIMD_V4F32 vector;
00502     vector.m_data[0] = -in_vec1.m_data[0];
00503     vector.m_data[1] = -in_vec1.m_data[1];
00504     vector.m_data[2] = -in_vec1.m_data[2];
00505     vector.m_data[3] = -in_vec1.m_data[3];
00506     return vector;
00507 }
00508 
00509 // _mm_sqrt_ps
00510 AkForceInline AKSIMD_V4F32 AKSIMD_SQRT_V4F32( const AKSIMD_V4F32& in_vec )
00511 {
00512         AKSIMD_V4F32 vCompare;
00513         AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
00514         AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
00515         AKSIMD_GETELEMENT_V4F32(vCompare,2) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,2) );
00516         AKSIMD_GETELEMENT_V4F32(vCompare,3) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,3) );
00517 
00518         //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
00519 
00520         return vCompare /*res*/;
00521 }
00522 
00523 AkForceInline AKSIMD_V2F32 AKSIMD_SQRT_V2F32( const AKSIMD_V2F32& in_vec )
00524 {
00525     AKSIMD_V2F32 vCompare;
00526     AKSIMD_GETELEMENT_V4F32(vCompare,0) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,0) );
00527     AKSIMD_GETELEMENT_V4F32(vCompare,1) = sqrt( AKSIMD_GETELEMENT_V4F32(in_vec,1) );
00528     
00529     //AKSIMD_V4F32 res = vrecpeq_f32( vrsqrteq_f32( in_vec ) );
00530     
00531     return vCompare /*res*/;
00532 }
00533 
00534 //@}
00535 ////////////////////////////////////////////////////////////////////////
00536 
00537 
00538 ////////////////////////////////////////////////////////////////////////
00539 /// @name AKSIMD packing / unpacking
00540 //@{
00541 
00542 //
00543 // _mm_unpacklo_epi16
00544 // r0 := a0
00545 // r1 := b0
00546 // r2 := a1
00547 // r3 := b1
00548 // r4 := a2
00549 // r5 := b2
00550 // r6 := a3
00551 // r7 := b3
00552 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKLO_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00553 {
00554     AKSIMD_V4I32 vector;
00555     AkInt16 *pVec1,*pVec2,*pDest;
00556     pVec1 = (AkInt16*)&in_vec1;
00557     pVec2 = (AkInt16*)&in_vec2;
00558     pDest = (AkInt16*)&vector;
00559     
00560     pDest[0] = pVec1[0];
00561     pDest[1] = pVec2[0];    
00562     pDest[2] = pVec1[1];    
00563     pDest[3] = pVec2[1];
00564     pDest[4] = pVec1[2];
00565     pDest[5] = pVec2[2];
00566     pDest[6] = pVec1[3];
00567     pDest[7] = pVec2[3];
00568     
00569     return vector;
00570 }
00571 
00572 // _mm_unpackhi_epi16
00573 AkForceInline AKSIMD_V4I32 AKSIMD_UNPACKHI_VECTOR8I16( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00574 {
00575     AKSIMD_V4I32 vector;
00576     AkInt16 *pVec1,*pVec2,*pDest;
00577     pVec1 = (AkInt16*)&in_vec1;
00578     pVec2 = (AkInt16*)&in_vec2;
00579     pDest = (AkInt16*)&vector;
00580     
00581     pDest[0] = pVec1[4];
00582     pDest[1] = pVec2[4];    
00583     pDest[2] = pVec1[5];    
00584     pDest[3] = pVec2[5];
00585     pDest[4] = pVec1[6];
00586     pDest[5] = pVec2[6];
00587     pDest[6] = pVec1[7];
00588     pDest[7] = pVec2[7];
00589     
00590     return vector;
00591 }
00592 
00593 // _mm_unpacklo_ps
00594 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKLO_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00595 {
00596     AKSIMD_V4F32 vector;
00597     vector.m_data[0] = in_vec1.m_data[0];
00598     vector.m_data[1] = in_vec2.m_data[0];
00599     vector.m_data[2] = in_vec1.m_data[1];
00600     vector.m_data[3] = in_vec2.m_data[1];
00601     
00602     return vector;
00603 }
00604 
00605 // _mm_unpackhi_ps
00606 AkForceInline AKSIMD_V4F32 AKSIMD_UNPACKHI_V4F32( const AKSIMD_V4F32& in_vec1, const AKSIMD_V4F32& in_vec2 )
00607 {
00608     AKSIMD_V4F32 vector;
00609     vector.m_data[0] = in_vec1.m_data[2];
00610     vector.m_data[1] = in_vec2.m_data[2];
00611     vector.m_data[2] = in_vec1.m_data[3];
00612     vector.m_data[3] = in_vec2.m_data[3];
00613     
00614     return vector;
00615 }
00616 
00617 // _mm_packs_epi32
00618 AkForceInline AKSIMD_V4I32 AKSIMD_PACKS_V4I32( const AKSIMD_V4I32& in_vec1, const AKSIMD_V4I32& in_vec2 )
00619 {
00620     AKSIMD_V4I32 vector;
00621     AkInt16 *pDest = (AkInt16*)&vector;
00622     
00623     pDest[0] = AkClamp( in_vec1.m_data[0], -32768, 32767);
00624     pDest[1] = AkClamp( in_vec1.m_data[1], -32768, 32767);  
00625     pDest[2] = AkClamp( in_vec1.m_data[2], -32768, 32767);  
00626     pDest[3] = AkClamp( in_vec1.m_data[3], -32768, 32767);
00627     pDest[4] = AkClamp( in_vec2.m_data[0], -32768, 32767);
00628     pDest[5] = AkClamp( in_vec2.m_data[1], -32768, 32767);
00629     pDest[6] = AkClamp( in_vec2.m_data[2], -32768, 32767);
00630     pDest[7] = AkClamp( in_vec2.m_data[3], -32768, 32767);
00631     
00632     return vector;
00633 }
00634 
00635 //@}
00636 ////////////////////////////////////////////////////////////////////////
00637 
00638 
00639 //#define AKSIMD_GET_ITEM( vec, index ) vec[index]
00640 
00641 
00642 
00643 
00644 ////////////////////////////////////////////////////////////////////////
00645 /// @name AKSIMD shuffling
00646 //@{
00647 
00648 // See _MM_SHUFFLE
00649 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) \
00650     (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
00651 
00652 // See _mm_shuffle_ps
00653 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) )
00654 //#define AKSIMD_SHUFFLE_V4F32( a, b, zyxw )
00655 
00656  AkForceInline AKSIMD_V4F32 AKSIMD_SHUFFLE_V4F32( const AKSIMD_V4F32& xyzw, const AKSIMD_V4F32& abcd, int mask )
00657 {
00658     AKSIMD_V4F32 vector;
00659     vector.m_data[0] = xyzw.m_data[(mask) & 0x3];
00660     vector.m_data[1] = xyzw.m_data[(mask >> 2) & 0x3];
00661     vector.m_data[2] = abcd.m_data[(mask >> 4) & 0x3];
00662     vector.m_data[3] = abcd.m_data[(mask >> 6) & 0x3];
00663     
00664     return vector;
00665 }
00666 
00667 
00668 /// Moves the upper two single-precision, floating-point values of b to
00669 /// the lower two single-precision, floating-point values of the result.
00670 /// The upper two single-precision, floating-point values of a are passed
00671 /// through to the result.
00672 /// r3 := a3; r2 := a2; r1 := b3; r0 := b2 (see _mm_movehl_ps)
00673 #define AKSIMD_MOVEHL_V4F32( a, b ) \
00674     AKSIMD_SHUFFLE_V4F32( (b), (a), AKSIMD_SHUFFLE(3, 2, 3, 2) )
00675 
00676 /// Moves the lower two single-precision, floating-point values of b to
00677 /// the upper two single-precision, floating-point values of the result.
00678 /// The lower two single-precision, floating-point values of a are passed
00679 /// through to the result.
00680 /// r3 := b1 ; r2 := b0 ; r1 := a1 ; r0 := a0 (see _mm_movelh_ps)
00681 #define AKSIMD_MOVELH_V4F32( a, b ) \
00682     AKSIMD_SHUFFLE_V4F32( (a), (b), AKSIMD_SHUFFLE(1, 0, 1, 0) )
00683 
00684 /// Swap the 2 lower floats together and the 2 higher floats together.  
00685 #define AKSIMD_SHUFFLE_BADC( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(2,3,0,1));
00686 
00687 /// Swap the 2 lower floats with the 2 higher floats.   
00688 #define AKSIMD_SHUFFLE_CDAB( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(1,0,3,2));
00689 
00690 /// Barrel-shift all floats by one.
00691 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1))
00692 
00693  /// Duplicates the odd items into the even items (d c b a -> d d b b )
00694 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
00695 
00696  /// Duplicates the even items into the odd items (d c b a -> c c a a )
00697 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
00698 
00699 
00700 //#include <AK/SoundEngine/Platforms/Generic/AkSimdShuffle.h>
00701 
00702 //@}
00703 ////////////////////////////////////////////////////////////////////////
00704 
00705 // Old AKSIMD -- will search-and-replace later
00706 #define AkReal32Vector AKSIMD_V4F32
00707 #define AKSIMD_LOAD1( __scalar__ ) AKSIMD_LOAD1_V4F32( &__scalar__ )
00708 #define AKSIMD_LOADVEC(v) AKSIMD_LOAD_V4F32((const AKSIMD_F32*)((v)))
00709 #define AKSIMD_MUL AKSIMD_MUL_V4F32
00710 #define AKSIMD_STOREVEC AKSIMD_STORE_V4F32
00711 
00712 /// Faked in-place vector horizontal add. 
00713 /// \akwarning 
00714 /// Don't expect this to be very efficient. 
00715 /// /endakwarning
00716 static AkForceInline void AKSIMD_HORIZONTALADD( AKSIMD_V4F32 & vVec )
00717 {   
00718     AKSIMD_V4F32 vHighLow = AKSIMD_MOVEHL_V4F32(vVec, vVec);
00719     vVec = AKSIMD_ADD_V4F32(vVec, vHighLow);
00720     vHighLow = AKSIMD_SHUFFLE_V4F32(vVec, vVec, 0x55);
00721     vVec = AKSIMD_ADD_V4F32(vVec, vHighLow);
00722 } 
00723 
00724 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts
00725 static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
00726 {
00727     static const AKSIMD_V4F32 vSign = { 1.f, -1.f, 1.f, -1.f }; 
00728 
00729     AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0)); 
00730     vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
00731     AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1)); 
00732     vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vSign );
00733     vTmp2 = AKSIMD_MUL_V4F32( vTmp2, vCIn2 );
00734     vTmp2 = AKSIMD_SHUFFLE_BADC( vTmp2 ); 
00735     vTmp2 = AKSIMD_ADD_V4F32( vTmp2, vTmp1 );
00736     return vTmp2;
00737 }
00738 
00739 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
00740 
00741 #define AK_SIGN_BIT( val ) (((AkUInt32)val) >> 31)
00742 
00743 static AkForceInline int AKSIMD_MASK_V4F32( const AKSIMD_V4F32& in_vec )
00744 {
00745     return AK_SIGN_BIT(in_vec.m_data[0]) | AK_SIGN_BIT(in_vec.m_data[1]) << 1 | AK_SIGN_BIT(in_vec.m_data[2]) << 2 |  AK_SIGN_BIT(in_vec.m_data[3]) << 3;
00746 }
00747 
00748 #endif //_AKSIMD_GENERIC_H_
00749 

Cette page a-t-elle été utile ?

Besoin d'aide ?

Des questions ? Des problèmes ? Besoin de plus d'informations ? Contactez-nous, nous pouvons vous aider !

Visitez notre page d'Aide

Décrivez-nous de votre projet. Nous sommes là pour vous aider.

Enregistrez votre projet et nous vous aiderons à démarrer sans aucune obligation !

Partir du bon pied avec Wwise