libstdc++
opt_random.h
Go to the documentation of this file.
00001 // Optimizations for random number functions, x86 version -*- C++ -*-
00002 
00003 // Copyright (C) 2012-2013 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/opt_random.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{random}
00028  */
00029 
00030 #ifndef _BITS_OPT_RANDOM_H
00031 #define _BITS_OPT_RANDOM_H 1
00032 
00033 #include <x86intrin.h>
00034 
00035 
00036 #pragma GCC system_header
00037 
00038 
00039 namespace std _GLIBCXX_VISIBILITY(default)
00040 {
00041 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00042 
00043 #ifdef __SSE3__
00044   template<>
00045     template<typename _UniformRandomNumberGenerator>
00046       void
00047       normal_distribution<double>::
00048       __generate(typename normal_distribution<double>::result_type* __f,
00049          typename normal_distribution<double>::result_type* __t,
00050          _UniformRandomNumberGenerator& __urng,
00051          const param_type& __param)
00052       {
00053     typedef uint64_t __uctype;
00054 
00055     if (__f == __t)
00056       return;
00057 
00058     if (_M_saved_available)
00059       {
00060         _M_saved_available = false;
00061         *__f++ = _M_saved * __param.stddev() + __param.mean();
00062 
00063         if (__f == __t)
00064           return;
00065       }
00066 
00067     constexpr uint64_t __maskval = 0xfffffffffffffull;
00068     static const __m128i __mask = _mm_set1_epi64x(__maskval);
00069     static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
00070     static const __m128d __three = _mm_set1_pd(3.0);
00071     const __m128d __av = _mm_set1_pd(__param.mean());
00072 
00073     const __uctype __urngmin = __urng.min();
00074     const __uctype __urngmax = __urng.max();
00075     const __uctype __urngrange = __urngmax - __urngmin;
00076     const __uctype __uerngrange = __urngrange + 1;
00077 
00078     while (__f + 1 < __t)
00079       {
00080         double __le;
00081         __m128d __x;
00082         do
00083           {
00084                 union
00085                 {
00086                   __m128i __i;
00087                   __m128d __d;
00088         } __v;
00089 
00090         if (__urngrange > __maskval)
00091           {
00092             if (__detail::_Power_of_2(__uerngrange))
00093               __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
00094                                  __urng()),
00095                           __mask);
00096             else
00097               {
00098             const __uctype __uerange = __maskval + 1;
00099             const __uctype __scaling = __urngrange / __uerange;
00100             const __uctype __past = __uerange * __scaling;
00101             uint64_t __v1;
00102             do
00103               __v1 = __uctype(__urng()) - __urngmin;
00104             while (__v1 >= __past);
00105             __v1 /= __scaling;
00106             uint64_t __v2;
00107             do
00108               __v2 = __uctype(__urng()) - __urngmin;
00109             while (__v2 >= __past);
00110             __v2 /= __scaling;
00111 
00112             __v.__i = _mm_set_epi64x(__v1, __v2);
00113               }
00114           }
00115         else if (__urngrange == __maskval)
00116           __v.__i = _mm_set_epi64x(__urng(), __urng());
00117         else if ((__urngrange + 2) * __urngrange >= __maskval
00118              && __detail::_Power_of_2(__uerngrange))
00119           {
00120             uint64_t __v1 = __urng() * __uerngrange + __urng();
00121             uint64_t __v2 = __urng() * __uerngrange + __urng();
00122 
00123             __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
00124                         __mask);
00125           }
00126         else
00127           {
00128             size_t __nrng = 2;
00129             __uctype __high = __maskval / __uerngrange / __uerngrange;
00130             while (__high > __uerngrange)
00131               {
00132             ++__nrng;
00133             __high /= __uerngrange;
00134               }
00135             const __uctype __highrange = __high + 1;
00136             const __uctype __scaling = __urngrange / __highrange;
00137             const __uctype __past = __highrange * __scaling;
00138             __uctype __tmp;
00139 
00140             uint64_t __v1;
00141             do
00142               {
00143             do
00144               __tmp = __uctype(__urng()) - __urngmin;
00145             while (__tmp >= __past);
00146             __v1 = __tmp / __scaling;
00147             for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
00148               {
00149                 __tmp = __v1;
00150                 __v1 *= __uerngrange;
00151                 __v1 += __uctype(__urng()) - __urngmin;
00152               }
00153               }
00154             while (__v1 > __maskval || __v1 < __tmp);
00155 
00156             uint64_t __v2;
00157             do
00158               {
00159             do
00160               __tmp = __uctype(__urng()) - __urngmin;
00161             while (__tmp >= __past);
00162             __v2 = __tmp / __scaling;
00163             for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
00164               {
00165                 __tmp = __v2;
00166                 __v2 *= __uerngrange;
00167                 __v2 += __uctype(__urng()) - __urngmin;
00168               }
00169               }
00170             while (__v2 > __maskval || __v2 < __tmp);
00171             
00172             __v.__i = _mm_set_epi64x(__v1, __v2);
00173           }
00174 
00175         __v.__i = _mm_or_si128(__v.__i, __two);
00176         __x = _mm_sub_pd(__v.__d, __three);
00177         __m128d __m = _mm_mul_pd(__x, __x);
00178         __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
00179               }
00180             while (__le == 0.0 || __le >= 1.0);
00181 
00182             double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
00183                              * __param.stddev());
00184 
00185             __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
00186 
00187             _mm_storeu_pd(__f, __x);
00188             __f += 2;
00189           }
00190 
00191         if (__f != __t)
00192           {
00193             result_type __x, __y, __r2;
00194 
00195             __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
00196               __aurng(__urng);
00197 
00198             do
00199               {
00200                 __x = result_type(2.0) * __aurng() - 1.0;
00201                 __y = result_type(2.0) * __aurng() - 1.0;
00202                 __r2 = __x * __x + __y * __y;
00203               }
00204             while (__r2 > 1.0 || __r2 == 0.0);
00205 
00206             const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
00207             _M_saved = __x * __mult;
00208             _M_saved_available = true;
00209             *__f = __y * __mult * __param.stddev() + __param.mean();
00210           }
00211       }
00212 #endif
00213 
00214 
00215 _GLIBCXX_END_NAMESPACE_VERSION
00216 } // namespace
00217 
00218 
00219 #endif // _BITS_OPT_RANDOM_H