36 #ifndef PCG_UINT128_HPP_INCLUDED
37 #define PCG_UINT128_HPP_INCLUDED 1
44 #include <initializer_list>
45 #include <type_traits>
53 #ifndef PCG_LITTLE_ENDIAN
54 #if defined(__BYTE_ORDER__)
55 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
56 #define PCG_LITTLE_ENDIAN 1
57 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
58 #define PCG_LITTLE_ENDIAN 0
60 #error __BYTE_ORDER__ does not match a standard endian, pick a side
62 #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
63 #define PCG_LITTLE_ENDIAN 1
64 #elif __BIG_ENDIAN__ || _BIG_ENDIAN
65 #define PCG_LITTLE_ENDIAN 0
66 #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
67 #define PCG_LITTLE_ENDIAN 1
68 #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
69 || __m68k__ || __mc68000__
70 #define PCG_LITTLE_ENDIAN 0
72 #error Unable to determine target endianness
83 #ifndef PCG_BITCOUNT_T
95 #if defined(__GNUC__) // Any GNU-compatible compiler supporting C++11 has
100 return 31 - __builtin_clz(v);
105 return __builtin_ctz(v);
110 #if UINT64_MAX == ULONG_MAX
111 return 63 - __builtin_clzl(v);
112 #elif UINT64_MAX == ULLONG_MAX
113 return 63 - __builtin_clzll(v);
115 #error Cannot find a function for uint64_t
121 #if UINT64_MAX == ULONG_MAX
122 return __builtin_ctzl(v);
123 #elif UINT64_MAX == ULLONG_MAX
124 return __builtin_ctzll(v);
126 #error Cannot find a function for uint64_t
130 #elif defined(_MSC_VER) // Use MSVC++ intrinsics
134 #pragma intrinsic(_BitScanReverse, _BitScanForward)
135 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
136 #pragma intrinsic(_BitScanReverse64, _BitScanForward64)
142 _BitScanReverse(&i, v);
149 _BitScanForward(&i, v);
155 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
157 _BitScanReverse64(&i, v);
161 uint32_t high = v >> 32;
162 uint32_t low = uint32_t(v);
169 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
171 _BitScanForward64(&i, v);
175 uint32_t high = v >> 32;
176 uint32_t low = uint32_t(v);
181 #else // Otherwise, we fall back to bit twiddling
189 static const uint8_t multiplyDeBruijnBitPos[32] = {
190 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
191 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
200 return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
205 static const uint8_t multiplyDeBruijnBitPos[32] = {
206 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
207 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
210 return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
215 uint32_t high = v >> 32;
216 uint32_t low = uint32_t(v);
223 uint32_t high = v >> 32;
224 uint32_t low = uint32_t(v);
231 template <
typename UInt>
234 return flog2(v) + ((v & (-v)) != v);
237 template <
typename UInt>
240 UInt half_result = y + carryin;
241 UInt result = x + half_result;
242 *carryout = (half_result < y) || (result < x);
246 template <
typename UInt>
249 UInt half_result = y + carryin;
250 UInt result = x - half_result;
251 *carryout = (half_result < y) || (result > x);
256 template <
typename UInt,
typename UIntX2>
261 #if PCG_LITTLE_ENDIAN
286 #if PCG_LITTLE_ENDIAN
296 #if PCG_LITTLE_ENDIAN
305 template<
class Integral,
306 typename std::enable_if<(std::is_integral<Integral>::value
307 &&
sizeof(Integral) <=
sizeof(UIntX2))
310 #if PCG_LITTLE_ENDIAN
319 explicit constexpr
operator uint64_t()
const
324 explicit constexpr
operator uint32_t()
const
329 explicit constexpr
operator int()
const
334 explicit constexpr
operator uint16_t()
const
339 explicit constexpr
operator uint8_t()
const
344 typedef typename std::conditional<std::is_same<uint64_t,
345 unsigned long>::value,
355 explicit constexpr
operator bool()
const
357 return d.v01 ||
d.v23;
360 template<
typename U,
typename V>
363 template<
typename U,
typename V>
367 template<
typename U,
typename V>
370 template<
typename U,
typename V>
373 template<
typename U,
typename V>
376 template<
typename U,
typename V>
379 template<
typename U,
typename V>
382 template<
typename U,
typename V>
385 template<
typename U,
typename V>
388 template<
typename U,
typename V>
391 template<
typename U,
typename V>
394 template<
typename U,
typename V>
397 template<
typename U,
typename V>
400 template<
typename U,
typename V>
403 template<
typename U,
typename V>
406 template<
typename U,
typename V>
409 template<
typename U,
typename V>
412 template<
typename U,
typename V>
415 template<
typename U,
typename V>
421 return *
this = result;
427 return *
this = result;
433 return *
this = result;
439 return *
this = result;
445 return *
this = result;
451 return *
this = result;
457 return *
this = result;
463 return *
this = result;
468 uint_x4 result = *
this >> shift;
469 return *
this = result;
474 uint_x4 result = *
this << shift;
475 return *
this = result;
480 template<
typename U,
typename V>
483 #if PCG_LITTLE_ENDIAN
484 for (uint8_t i = 4; i !=0; ) {
487 for (uint8_t i = 0; i < 4; ++i) {
491 return flog2(v.
wa[i]) + (
sizeof(U)*CHAR_BIT)*i;
496 template<
typename U,
typename V>
499 #if PCG_LITTLE_ENDIAN
500 for (uint8_t i = 0; i < 4; ++i) {
502 for (uint8_t i = 4; i !=0; ) {
508 return (
sizeof(U)*CHAR_BIT)*4;
511 template <
typename UInt,
typename UIntX2>
520 if (orig_dividend < divisor)
523 auto dividend = orig_dividend;
525 auto log2_divisor =
flog2(divisor);
526 auto log2_dividend =
flog2(dividend);
528 bitcount_t logdiff = log2_dividend - log2_divisor;
532 return { ONE, dividend - divisor };
541 auto qfactor = ONE << logdiff;
542 auto factor = divisor << logdiff;
547 while (dividend < factor) {
551 }
while (dividend >= divisor);
553 return { quotient, dividend };
556 template <
typename UInt,
typename UIntX2>
560 return divmod(dividend, divisor).first;
563 template <
typename UInt,
typename UIntX2>
567 return divmod(dividend, divisor).second;
571 template <
typename UInt,
typename UIntX2>
576 bool carryin =
false;
578 UIntX2 a0b0 = UIntX2(a.
w.v0) * UIntX2(b.
w.v0);
580 r.
w.v1 = UInt(a0b0 >> 32);
582 UIntX2 a1b0 = UIntX2(a.
w.v1) * UIntX2(b.
w.v0);
583 r.
w.v2 = UInt(a1b0 >> 32);
590 UIntX2 a0b1 = UIntX2(a.
w.v0) * UIntX2(b.
w.v1);
592 r.
w.v2 =
addwithcarry(r.
w.v2, UInt(a0b1 >> 32), carryin, &carryout);
603 UIntX2 a1b1 = UIntX2(a.
w.v1) * UIntX2(b.
w.v1);
607 r.
w.v3 =
addwithcarry(r.
w.v3, UInt(a1b1 >> 32), carryin, &carryout);
609 r.
d.v23 += a.
d.v01 * b.
d.v23 + a.
d.v23 * b.
d.v01;
615 template <
typename UInt,
typename UIntX2>
621 bool carryin =
false;
634 template <
typename UInt,
typename UIntX2>
640 bool carryin =
false;
654 template <
typename UInt,
typename UIntX2>
661 template <
typename UInt,
typename UIntX2>
668 template <
typename UInt,
typename UIntX2>
675 template <
typename UInt,
typename UIntX2>
681 template <
typename UInt,
typename UIntX2>
687 template <
typename UInt,
typename UIntX2>
690 return (a.
d.v01 == b.
d.v01) && (a.
d.v23 == b.
d.v23);
693 template <
typename UInt,
typename UIntX2>
700 template <
typename UInt,
typename UIntX2>
703 return (a.
d.v23 < b.
d.v23)
704 || ((a.
d.v23 == b.
d.v23) && (a.
d.v01 < b.
d.v01));
707 template <
typename UInt,
typename UIntX2>
713 template <
typename UInt,
typename UIntX2>
719 template <
typename UInt,
typename UIntX2>
727 template <
typename UInt,
typename UIntX2>
732 const bitcount_t bits =
sizeof(UInt) * CHAR_BIT;
739 #if PCG_LITTLE_ENDIAN
740 for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
742 for (uint8_t out = 4-shiftdiv, in = 4; out != 0; ) {
745 r.
wa[out] = (v.
wa[in] << shiftmod) | carryover;
746 carryover = (v.
wa[in] >> (bits - shiftmod));
749 #if PCG_LITTLE_ENDIAN
750 for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
752 for (uint8_t out = 4-shiftdiv, in = 4; out != 0; ) {
755 r.
wa[out] = v.
wa[in];
762 template <
typename UInt,
typename UIntX2>
767 const bitcount_t bits =
sizeof(UInt) * CHAR_BIT;
774 #if PCG_LITTLE_ENDIAN
775 for (uint8_t out = 4-shiftdiv, in = 4; out != 0; ) {
778 for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
780 r.
wa[out] = (v.
wa[in] >> shiftmod) | carryover;
781 carryover = (v.
wa[in] << (bits - shiftmod));
784 #if PCG_LITTLE_ENDIAN
785 for (uint8_t out = 4-shiftdiv, in = 4; out != 0; ) {
788 for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
790 r.
wa[out] = v.
wa[in];
799 #endif // PCG_UINT128_HPP_INCLUDED