00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 #ifndef __Random123_ars_dot_hpp__
00033 #define __Random123_ars_dot_hpp__
00034 
00035 #include "features/compilerfeatures.h"
00036 #include "array.h"
00037 
00038 #if R123_USE_AES_NI
00039 
00040 #ifndef ARS1xm128i_DEFAULT_ROUNDS
00041 #define ARS1xm128i_DEFAULT_ROUNDS 7
00042 #endif
00043 
00045 enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00046 
00047 
00048 typedef struct r123array1xm128i ars1xm128i_ctr_t;
00049 typedef struct r123array1xm128i ars1xm128i_key_t;
00050 typedef struct r123array1xm128i ars1xm128i_ukey_t;
00051 R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
00052 R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
00053     __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), 
00054                                    R123_64BIT(0x9E3779B97F4A7C15)); 
00055     
00056 
00057 
00058     __m128i kk = k.v[0].m;
00059     __m128i v = _mm_xor_si128(in.v[0].m, kk);
00060     ars1xm128i_ctr_t ret;
00061     R123_ASSERT(Nrounds<=10);
00062     if( Nrounds>1 ){
00063         kk = _mm_add_epi64(kk, kweyl);
00064         v = _mm_aesenc_si128(v, kk);
00065     }
00066     if( Nrounds>2 ){
00067         kk = _mm_add_epi64(kk, kweyl);
00068         v = _mm_aesenc_si128(v, kk);
00069     }
00070     if( Nrounds>3 ){
00071         kk = _mm_add_epi64(kk, kweyl);
00072         v = _mm_aesenc_si128(v, kk);
00073     }
00074     if( Nrounds>4 ){
00075         kk = _mm_add_epi64(kk, kweyl);
00076         v = _mm_aesenc_si128(v, kk);
00077     }
00078     if( Nrounds>5 ){
00079         kk = _mm_add_epi64(kk, kweyl);
00080         v = _mm_aesenc_si128(v, kk);
00081     }
00082     if( Nrounds>6 ){
00083         kk = _mm_add_epi64(kk, kweyl);
00084         v = _mm_aesenc_si128(v, kk);
00085     }
00086     if( Nrounds>7 ){
00087         kk = _mm_add_epi64(kk, kweyl);
00088         v = _mm_aesenc_si128(v, kk);
00089     }
00090     if( Nrounds>8 ){
00091         kk = _mm_add_epi64(kk, kweyl);
00092         v = _mm_aesenc_si128(v, kk);
00093     }
00094     if( Nrounds>9 ){
00095         kk = _mm_add_epi64(kk, kweyl);
00096         v = _mm_aesenc_si128(v, kk);
00097     }
00098     kk = _mm_add_epi64(kk, kweyl);
00099     v = _mm_aesenclast_si128(v, kk);
00100     ret.v[0].m = v;
00101     return ret;
00102 }
00103 
00107 #define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
00108 
00110 typedef struct r123array4x32 ars4x32_ctr_t;
00112 typedef struct r123array4x32 ars4x32_key_t;
00114 typedef struct r123array4x32 ars4x32_ukey_t;
00116 enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00118 R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
00120 R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
00121     ars1xm128i_ctr_t c128;
00122     ars1xm128i_key_t k128;
00123     c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
00124     k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
00125     c128 = ars1xm128i_R(Nrounds, c128, k128);
00126     _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
00127     return c;
00128 }
00129 
00133 #define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
00134 
00135 #ifdef __cplusplus
00136 namespace r123{
00158 template<unsigned int ROUNDS>
00159 struct ARS1xm128i_R{
00160     typedef ars1xm128i_ctr_t ctr_type;
00161     typedef ars1xm128i_key_t key_type;
00162     typedef ars1xm128i_key_t ukey_type;
00163     static const unsigned int rounds=ROUNDS;
00164     R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00165         return ars1xm128i_R(ROUNDS, ctr, key);
00166     }
00167 };
00168 
00173 template<unsigned int ROUNDS>
00174 struct ARS4x32_R{
00175     typedef ars4x32_ctr_t ctr_type;
00176     typedef ars4x32_key_t key_type;
00177     typedef ars4x32_key_t ukey_type;
00178     static const unsigned int rounds=ROUNDS;
00179     R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00180         return ars4x32_R(ROUNDS, ctr, key);
00181     }
00182 };
00191 typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
00192 typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
00193 } 
00194 
00195 #endif 
00196 
00197 #endif 
00198 
00199 #endif