Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 #ifndef _r123array_dot_h__
00033 #define _r123array_dot_h__
00034 #include "features/compilerfeatures.h"
00035 #include "features/sse.h"
00036 
00037 #ifndef __cplusplus
00038 #define CXXMETHODS(_N, W, T)
00039 #define CXXOVERLOADS(_N, W, T)
00040 #else
00041 
00042 #include <stddef.h>
00043 #include <algorithm>
00044 #include <stdexcept>
00045 #include <iterator>
00046 #include <limits>
00047 #include <iostream>
00048 
00068 template <typename value_type>
00069 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
00070     value_type v=0;
00071     for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
00072         v |= ((value_type)(*p32++)) << (32*i);
00073     return v;
00074 }
00075 
00076 
00077 #define CXXMETHODS(_N, W, T)                                            \
00078     typedef T value_type;                                               \
00079     typedef T* iterator;                                                \
00080     typedef const T* const_iterator;                                    \
00081     typedef value_type& reference;                                      \
00082     typedef const value_type& const_reference;                          \
00083     typedef size_t size_type;                                           \
00084     typedef ptrdiff_t difference_type;                                  \
00085     typedef T* pointer;                                                 \
00086     typedef const T* const_pointer;                                     \
00087     typedef std::reverse_iterator<iterator> reverse_iterator;           \
00088     typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
00089     R123_CUDA_DEVICE reference operator[](size_type i){return v[i];}                     \
00090     R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];}        \
00091     R123_CUDA_DEVICE reference at(size_type i){ if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00092     R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00093     R123_CUDA_DEVICE size_type size() const { return  _N; }                              \
00094     R123_CUDA_DEVICE size_type max_size() const { return _N; }                           \
00095     R123_CUDA_DEVICE bool empty() const { return _N==0; };                               \
00096     R123_CUDA_DEVICE iterator begin() { return &v[0]; }                                  \
00097     R123_CUDA_DEVICE iterator end() { return &v[_N]; }                                   \
00098     R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; }                      \
00099     R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; }                       \
00100     R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; }                     \
00101     R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; }                      \
00102     R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); }        \
00103     R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
00104     R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); }        \
00105     R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
00106     R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
00107     R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
00108     R123_CUDA_DEVICE pointer data(){ return &v[0]; }                                     \
00109     R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; }                         \
00110     R123_CUDA_DEVICE reference front(){ return v[0]; }                                   \
00111     R123_CUDA_DEVICE const_reference front() const{ return v[0]; }                       \
00112     R123_CUDA_DEVICE reference back(){ return v[_N-1]; }                                 \
00113     R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; }                     \
00114     R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
00115          \
00116         for (size_t i = 0; i < _N; ++i) \
00117             if (v[i] != rhs.v[i]) return false; \
00118         return true; \
00119     } \
00120     R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
00121      \
00122     R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
00123     R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
00124          \
00125         for (size_t i = 0; i < _N; ++i) { \
00126             T tmp = v[i]; \
00127             v[i] = rhs.v[i]; \
00128             rhs.v[i] = tmp; \
00129         } \
00130     } \
00131     R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){                         \
00132         
00133 
00134                                                 \
00135         if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
00136             return incr_carefully(n);                                   \
00137         if(n==1){                                                       \
00138             ++v[0];                                                     \
00139             if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this;   \
00140         }else{                                                          \
00141             v[0] += n;                                                  \
00142             if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this;  \
00143         }                                                               \
00144         
00145 
00146 
00147 
00148 
00149 
00150 
00151 
00152                                                            \
00153         ++v[_N>1?1:0];                                                  \
00154         if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
00155         ++v[_N>2?2:0];                                                  \
00156         if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this;  \
00157         ++v[_N>3?3:0];                                                  \
00158         for(size_t i=4; i<_N; ++i){                                     \
00159             if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this;        \
00160             ++v[i];                                                     \
00161         }                                                               \
00162         return *this;                                                   \
00163     }                                                                   \
00164       \
00165                               \
00166     template <typename SeedSeq>                                         \
00167     R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){      \
00168         r123array##_N##x##W ret;                                        \
00169         const size_t Ngen = _N*((3+sizeof(value_type))/4);              \
00170         uint32_t u32[Ngen];                                             \
00171         uint32_t *p32 = &u32[0];                                        \
00172         ss.generate(&u32[0], &u32[Ngen]);                               \
00173         for(size_t i=0; i<_N; ++i){                                     \
00174             ret.v[i] = assemble_from_u32<value_type>(p32);              \
00175             p32 += (3+sizeof(value_type))/4;                            \
00176         }                                                               \
00177         return ret;                                                     \
00178     }                                                                   \
00179 protected:                                                              \
00180     R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
00181          \
00182         value_type vtn;                                                 \
00183         vtn = n;                                                        \
00184         v[0] += n;                                                      \
00185         const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
00186         for(size_t i=1; i<_N; ++i){                                     \
00187             if(rshift){                                                 \
00188                 n >>= rshift;                                           \
00189             }else{                                                      \
00190                 n=0;                                                    \
00191             }                                                           \
00192             if( v[i-1] < vtn )                                          \
00193                 ++n;                                                    \
00194             if( n==0 ) break;                                           \
00195             vtn = n;                                                    \
00196             v[i] += n;                                                  \
00197         }                                                               \
00198         return *this;                                                   \
00199     }                                                                   \
00200     
00201                                                                         
00202 
00203 
00204 
00205 
00206 
00207 
00208 
00209 
00210 
00211 template<typename T>
00212 struct r123arrayinsertable{
00213     const T& v;
00214     r123arrayinsertable(const T& t_) : v(t_) {} 
00215     friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
00216         return os << t.v;
00217     }
00218 };
00219 
00220 template<>
00221 struct r123arrayinsertable<uint8_t>{
00222     const uint8_t& v;
00223     r123arrayinsertable(const uint8_t& t_) : v(t_) {} 
00224     friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
00225         return os << (int)t.v;
00226     }
00227 };
00228 
00229 template<typename T>
00230 struct r123arrayextractable{
00231     T& v;
00232     r123arrayextractable(T& t_) : v(t_) {}
00233     friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
00234         return is >> t.v;
00235     }
00236 };
00237 
00238 template<>
00239 struct r123arrayextractable<uint8_t>{
00240     uint8_t& v;
00241     r123arrayextractable(uint8_t& t_) : v(t_) {} 
00242     friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
00243         int i;
00244         is >>  i;
00245         t.v = i;
00246         return is;
00247     }
00248 };
00249 
00250 #define CXXOVERLOADS(_N, W, T)                                          \
00251                                                                         \
00252 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){   \
00253     os << r123arrayinsertable<T>(a.v[0]);                                  \
00254     for(size_t i=1; i<_N; ++i)                                          \
00255         os << " " << r123arrayinsertable<T>(a.v[i]);                       \
00256     return os;                                                          \
00257 }                                                                       \
00258                                                                         \
00259 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){         \
00260     for(size_t i=0; i<_N; ++i){                                         \
00261         r123arrayextractable<T> x(a.v[i]);                                 \
00262         is >> x;                                                        \
00263     }                                                                   \
00264     return is;                                                          \
00265 }                                                                       \
00266                                                                         \
00267 namespace r123{                                                        \
00268  typedef r123array##_N##x##W Array##_N##x##W;                          \
00269 }
00270                                                                         
00271 #endif 
00272 
00273 
00274 
00275 
00276 
00277 
00278 
00279 
00280 
00281 
00282 
00283 
00284 
00285 
00286 #define _r123array_tpl(_N, W, T)                   \
00287                         \
00288                             \
00289 struct r123array##_N##x##W{                         \
00290  T v[_N];                                       \
00291  CXXMETHODS(_N, W, T)                           \
00292 };                                              \
00293                                                 \
00294 CXXOVERLOADS(_N, W, T)
00295 
00298 _r123array_tpl(1, 32, uint32_t)  
00299 _r123array_tpl(2, 32, uint32_t)  
00300 _r123array_tpl(4, 32, uint32_t)  
00301 _r123array_tpl(8, 32, uint32_t)  
00302 
00303 _r123array_tpl(1, 64, uint64_t)  
00304 _r123array_tpl(2, 64, uint64_t)  
00305 _r123array_tpl(4, 64, uint64_t)  
00306 
00307 _r123array_tpl(16, 8, uint8_t)  
00308 
00309 #if R123_USE_SSE
00310 _r123array_tpl(1, m128i, r123m128i) 
00311 #endif
00312 
00313 
00314 
00315 
00316 
00317 #define R123_W(a)   (8*sizeof(((a *)0)->v[0]))
00318 
00323 #endif
00324