hops
pcg_uint128.hpp
Go to the documentation of this file.
1 /*
2  * PCG Random Number Generation for C++
3  *
4  * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
5  * and the PCG Project contributors.
6  *
7  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
8  *
9  * Licensed under the Apache License, Version 2.0 (provided in
10  * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
11  * or under the MIT license (provided in LICENSE-MIT.txt and at
12  * http://opensource.org/licenses/MIT), at your option. This file may not
13  * be copied, modified, or distributed except according to those terms.
14  *
15  * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
16  * express or implied. See your chosen license for details.
17  *
18  * For additional information about the PCG random number generation scheme,
19  * visit http://www.pcg-random.org/.
20  */
21 
22 /*
23  * This code provides a a C++ class that can provide 128-bit (or higher)
24  * integers. To produce 2K-bit integers, it uses two K-bit integers,
25  * placed in a union that allowes the code to also see them as four K/2 bit
26  * integers (and access them either directly name, or by index).
27  *
28  * It may seem like we're reinventing the wheel here, because several
29  * libraries already exist that support large integers, but most existing
30  * libraries provide a very generic multiprecision code, but here we're
31  * operating at a fixed size. Also, most other libraries are fairly
32  * heavyweight. So we use a direct implementation. Sadly, it's much slower
33  * than hand-coded assembly or direct CPU support.
34  */
35 
36 #ifndef PCG_UINT128_HPP_INCLUDED
37 #define PCG_UINT128_HPP_INCLUDED 1
38 
39 #include <cstdint>
40 #include <cstdio>
41 #include <cassert>
42 #include <climits>
43 #include <utility>
44 #include <initializer_list>
45 #include <type_traits>
46 
47 /*
48  * We want to lay the type out the same way that a native type would be laid
49  * out, which means we must know the machine's endian, at compile time.
50  * This ugliness attempts to do so.
51  */
52 
53 #ifndef PCG_LITTLE_ENDIAN
54  #if defined(__BYTE_ORDER__)
55  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
56  #define PCG_LITTLE_ENDIAN 1
57  #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
58  #define PCG_LITTLE_ENDIAN 0
59  #else
60  #error __BYTE_ORDER__ does not match a standard endian, pick a side
61  #endif
62  #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
63  #define PCG_LITTLE_ENDIAN 1
64  #elif __BIG_ENDIAN__ || _BIG_ENDIAN
65  #define PCG_LITTLE_ENDIAN 0
66  #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
67  #define PCG_LITTLE_ENDIAN 1
68  #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
69  || __m68k__ || __mc68000__
70  #define PCG_LITTLE_ENDIAN 0
71  #else
72  #error Unable to determine target endianness
73  #endif
74 #endif
75 
76 namespace pcg_extras {
77 
78 // Recent versions of GCC have intrinsics we can use to quickly compute
79 // the number of leading and trailing zeros in a number. If possible, we
80 // use them, otherwise we fall back to old-fashioned bit twiddling to figure
81 // them out.
82 
83 #ifndef PCG_BITCOUNT_T
84  typedef uint8_t bitcount_t;
85 #else
86  typedef PCG_BITCOUNT_T bitcount_t;
87 #endif
88 
89 /*
90  * Provide some useful helper functions
91  * * flog2 floor(log2(x))
92  * * trailingzeros number of trailing zero bits
93  */
94 
95 #if defined(__GNUC__) // Any GNU-compatible compiler supporting C++11 has
96  // some useful intrinsics we can use.
97 
98 inline bitcount_t flog2(uint32_t v)
99 {
100  return 31 - __builtin_clz(v);
101 }
102 
103 inline bitcount_t trailingzeros(uint32_t v)
104 {
105  return __builtin_ctz(v);
106 }
107 
108 inline bitcount_t flog2(uint64_t v)
109 {
110 #if UINT64_MAX == ULONG_MAX
111  return 63 - __builtin_clzl(v);
112 #elif UINT64_MAX == ULLONG_MAX
113  return 63 - __builtin_clzll(v);
114 #else
115  #error Cannot find a function for uint64_t
116 #endif
117 }
118 
119 inline bitcount_t trailingzeros(uint64_t v)
120 {
121 #if UINT64_MAX == ULONG_MAX
122  return __builtin_ctzl(v);
123 #elif UINT64_MAX == ULLONG_MAX
124  return __builtin_ctzll(v);
125 #else
126  #error Cannot find a function for uint64_t
127 #endif
128 }
129 
130 #elif defined(_MSC_VER) // Use MSVC++ intrinsics
131 
132 #include <intrin.h>
133 
134 #pragma intrinsic(_BitScanReverse, _BitScanForward)
135 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
136 #pragma intrinsic(_BitScanReverse64, _BitScanForward64)
137 #endif
138 
139 inline bitcount_t flog2(uint32_t v)
140 {
141  unsigned long i;
142  _BitScanReverse(&i, v);
143  return i;
144 }
145 
146 inline bitcount_t trailingzeros(uint32_t v)
147 {
148  unsigned long i;
149  _BitScanForward(&i, v);
150  return i;
151 }
152 
153 inline bitcount_t flog2(uint64_t v)
154 {
155 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
156  unsigned long i;
157  _BitScanReverse64(&i, v);
158  return i;
159 #else
160  // 32-bit x86
161  uint32_t high = v >> 32;
162  uint32_t low = uint32_t(v);
163  return high ? 32+flog2(high) : flog2(low);
164 #endif
165 }
166 
167 inline bitcount_t trailingzeros(uint64_t v)
168 {
169 #if defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)
170  unsigned long i;
171  _BitScanForward64(&i, v);
172  return i;
173 #else
174  // 32-bit x86
175  uint32_t high = v >> 32;
176  uint32_t low = uint32_t(v);
177  return low ? trailingzeros(low) : trailingzeros(high)+32;
178 #endif
179 }
180 
181 #else // Otherwise, we fall back to bit twiddling
182  // implementations
183 
184 inline bitcount_t flog2(uint32_t v)
185 {
186  // Based on code by Eric Cole and Mark Dickinson, which appears at
187  // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
188 
189  static const uint8_t multiplyDeBruijnBitPos[32] = {
190  0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
191  8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
192  };
193 
194  v |= v >> 1; // first round down to one less than a power of 2
195  v |= v >> 2;
196  v |= v >> 4;
197  v |= v >> 8;
198  v |= v >> 16;
199 
200  return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
201 }
202 
203 inline bitcount_t trailingzeros(uint32_t v)
204 {
205  static const uint8_t multiplyDeBruijnBitPos[32] = {
206  0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
207  31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
208  };
209 
210  return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
211 }
212 
213 inline bitcount_t flog2(uint64_t v)
214 {
215  uint32_t high = v >> 32;
216  uint32_t low = uint32_t(v);
217 
218  return high ? 32+flog2(high) : flog2(low);
219 }
220 
221 inline bitcount_t trailingzeros(uint64_t v)
222 {
223  uint32_t high = v >> 32;
224  uint32_t low = uint32_t(v);
225 
226  return low ? trailingzeros(low) : trailingzeros(high)+32;
227 }
228 
229 #endif
230 
231 template <typename UInt>
232 inline bitcount_t clog2(UInt v)
233 {
234  return flog2(v) + ((v & (-v)) != v);
235 }
236 
237 template <typename UInt>
238 inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
239 {
240  UInt half_result = y + carryin;
241  UInt result = x + half_result;
242  *carryout = (half_result < y) || (result < x);
243  return result;
244 }
245 
246 template <typename UInt>
247 inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
248 {
249  UInt half_result = y + carryin;
250  UInt result = x - half_result;
251  *carryout = (half_result < y) || (result > x);
252  return result;
253 }
254 
255 
256 template <typename UInt, typename UIntX2>
257 class uint_x4 {
258 // private:
259 public:
260  union {
261 #if PCG_LITTLE_ENDIAN
262  struct {
263  UInt v0, v1, v2, v3;
264  } w;
265  struct {
266  UIntX2 v01, v23;
267  } d;
268 #else
269  struct {
270  UInt v3, v2, v1, v0;
271  } w;
272  struct {
273  UIntX2 v23, v01;
274  } d;
275 #endif
276  // For the array access versions, the code that uses the array
277  // must handle endian itself. Yuck.
278  UInt wa[4];
279  UIntX2 da[2];
280  };
281 
282 public:
283  uint_x4() = default;
284 
285  constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
286 #if PCG_LITTLE_ENDIAN
287  : w{v0, v1, v2, v3}
288 #else
289  : w{v3, v2, v1, v0}
290 #endif
291  {
292  // Nothing (else) to do
293  }
294 
295  constexpr uint_x4(UIntX2 v23, UIntX2 v01)
296 #if PCG_LITTLE_ENDIAN
297  : d{v01,v23}
298 #else
299  : d{v23,v01}
300 #endif
301  {
302  // Nothing (else) to do
303  }
304 
305  template<class Integral,
306  typename std::enable_if<(std::is_integral<Integral>::value
307  && sizeof(Integral) <= sizeof(UIntX2))
308  >::type* = nullptr>
309  constexpr uint_x4(Integral v01)
310 #if PCG_LITTLE_ENDIAN
311  : d{UIntX2(v01),0UL}
312 #else
313  : d{0UL,UIntX2(v01)}
314 #endif
315  {
316  // Nothing (else) to do
317  }
318 
319  explicit constexpr operator uint64_t() const
320  {
321  return d.v01;
322  }
323 
324  explicit constexpr operator uint32_t() const
325  {
326  return w.v0;
327  }
328 
329  explicit constexpr operator int() const
330  {
331  return w.v0;
332  }
333 
334  explicit constexpr operator uint16_t() const
335  {
336  return w.v0;
337  }
338 
339  explicit constexpr operator uint8_t() const
340  {
341  return w.v0;
342  }
343 
344  typedef typename std::conditional<std::is_same<uint64_t,
345  unsigned long>::value,
346  unsigned long long,
347  unsigned long>::type
349 
350  explicit constexpr operator uint_missing_t() const
351  {
352  return d.v01;
353  }
354 
355  explicit constexpr operator bool() const
356  {
357  return d.v01 || d.v23;
358  }
359 
360  template<typename U, typename V>
361  friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
362 
363  template<typename U, typename V>
364  friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
365  divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
366 
367  template<typename U, typename V>
368  friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
369 
370  template<typename U, typename V>
371  friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
372 
373  template<typename U, typename V>
374  friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const uint_x4<U,V>&);
375 
376  template<typename U, typename V>
377  friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const uint_x4<U,V>&);
378 
379  template<typename U, typename V>
380  friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
381 
382  template<typename U, typename V>
383  friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
384 
385  template<typename U, typename V>
386  friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
387 
388  template<typename U, typename V>
389  friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
390 
391  template<typename U, typename V>
392  friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
393 
394  template<typename U, typename V>
395  friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
396 
397  template<typename U, typename V>
398  friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
399 
400  template<typename U, typename V>
401  friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
402 
403  template<typename U, typename V>
404  friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
405 
406  template<typename U, typename V>
407  friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
408 
409  template<typename U, typename V>
410  friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
411 
412  template<typename U, typename V>
413  friend bitcount_t flog2(const uint_x4<U,V>&);
414 
415  template<typename U, typename V>
416  friend bitcount_t trailingzeros(const uint_x4<U,V>&);
417 
419  {
420  uint_x4 result = *this * rhs;
421  return *this = result;
422  }
423 
425  {
426  uint_x4 result = *this / rhs;
427  return *this = result;
428  }
429 
431  {
432  uint_x4 result = *this % rhs;
433  return *this = result;
434  }
435 
437  {
438  uint_x4 result = *this + rhs;
439  return *this = result;
440  }
441 
443  {
444  uint_x4 result = *this - rhs;
445  return *this = result;
446  }
447 
449  {
450  uint_x4 result = *this & rhs;
451  return *this = result;
452  }
453 
455  {
456  uint_x4 result = *this | rhs;
457  return *this = result;
458  }
459 
461  {
462  uint_x4 result = *this ^ rhs;
463  return *this = result;
464  }
465 
467  {
468  uint_x4 result = *this >> shift;
469  return *this = result;
470  }
471 
473  {
474  uint_x4 result = *this << shift;
475  return *this = result;
476  }
477 
478 };
479 
480 template<typename U, typename V>
482 {
483 #if PCG_LITTLE_ENDIAN
484  for (uint8_t i = 4; i !=0; /* dec in loop */) {
485  --i;
486 #else
487  for (uint8_t i = 0; i < 4; ++i) {
488 #endif
489  if (v.wa[i] == 0)
490  continue;
491  return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
492  }
493  abort();
494 }
495 
496 template<typename U, typename V>
498 {
499 #if PCG_LITTLE_ENDIAN
500  for (uint8_t i = 0; i < 4; ++i) {
501 #else
502  for (uint8_t i = 4; i !=0; /* dec in loop */) {
503  --i;
504 #endif
505  if (v.wa[i] != 0)
506  return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
507  }
508  return (sizeof(U)*CHAR_BIT)*4;
509 }
510 
511 template <typename UInt, typename UIntX2>
512 std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
513  divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
514  const uint_x4<UInt,UIntX2>& divisor)
515 {
516  // If the dividend is less than the divisor, the answer is always zero.
517  // This takes care of boundary cases like 0/x (which would otherwise be
518  // problematic because we can't take the log of zero. (The boundary case
519  // of division by zero is undefined.)
520  if (orig_dividend < divisor)
521  return { uint_x4<UInt,UIntX2>(0UL), orig_dividend };
522 
523  auto dividend = orig_dividend;
524 
525  auto log2_divisor = flog2(divisor);
526  auto log2_dividend = flog2(dividend);
527  // assert(log2_dividend >= log2_divisor);
528  bitcount_t logdiff = log2_dividend - log2_divisor;
529 
530  constexpr uint_x4<UInt,UIntX2> ONE(1UL);
531  if (logdiff == 0)
532  return { ONE, dividend - divisor };
533 
534  // Now we change the log difference to
535  // floor(log2(divisor)) - ceil(log2(dividend))
536  // to ensure that we *underestimate* the result.
537  logdiff -= 1;
538 
539  uint_x4<UInt,UIntX2> quotient(0UL);
540 
541  auto qfactor = ONE << logdiff;
542  auto factor = divisor << logdiff;
543 
544  do {
545  dividend -= factor;
546  quotient += qfactor;
547  while (dividend < factor) {
548  factor >>= 1;
549  qfactor >>= 1;
550  }
551  } while (dividend >= divisor);
552 
553  return { quotient, dividend };
554 }
555 
556 template <typename UInt, typename UIntX2>
558  const uint_x4<UInt,UIntX2>& divisor)
559 {
560  return divmod(dividend, divisor).first;
561 }
562 
563 template <typename UInt, typename UIntX2>
565  const uint_x4<UInt,UIntX2>& divisor)
566 {
567  return divmod(dividend, divisor).second;
568 }
569 
570 
571 template <typename UInt, typename UIntX2>
573  const uint_x4<UInt,UIntX2>& b)
574 {
575  uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
576  bool carryin = false;
577  bool carryout;
578  UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
579  r.w.v0 = UInt(a0b0);
580  r.w.v1 = UInt(a0b0 >> 32);
581 
582  UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
583  r.w.v2 = UInt(a1b0 >> 32);
584  r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
585  carryin = carryout;
586  r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
587  carryin = carryout;
588  r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
589 
590  UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
591  carryin = false;
592  r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout);
593  carryin = carryout;
594  r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
595 
596  carryin = false;
597  r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
598  carryin = carryout;
599  r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
600  carryin = carryout;
601  r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
602 
603  UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
604  carryin = false;
605  r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
606  carryin = carryout;
607  r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout);
608 
609  r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
610 
611  return r;
612 }
613 
614 
615 template <typename UInt, typename UIntX2>
617  const uint_x4<UInt,UIntX2>& b)
618 {
619  uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
620 
621  bool carryin = false;
622  bool carryout;
623  r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
624  carryin = carryout;
625  r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
626  carryin = carryout;
627  r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
628  carryin = carryout;
629  r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
630 
631  return r;
632 }
633 
634 template <typename UInt, typename UIntX2>
636  const uint_x4<UInt,UIntX2>& b)
637 {
638  uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
639 
640  bool carryin = false;
641  bool carryout;
642  r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
643  carryin = carryout;
644  r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
645  carryin = carryout;
646  r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
647  carryin = carryout;
648  r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
649 
650  return r;
651 }
652 
653 
654 template <typename UInt, typename UIntX2>
656  const uint_x4<UInt,UIntX2>& b)
657 {
658  return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
659 }
660 
661 template <typename UInt, typename UIntX2>
663  const uint_x4<UInt,UIntX2>& b)
664 {
665  return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
666 }
667 
668 template <typename UInt, typename UIntX2>
670  const uint_x4<UInt,UIntX2>& b)
671 {
672  return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
673 }
674 
675 template <typename UInt, typename UIntX2>
677 {
678  return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
679 }
680 
681 template <typename UInt, typename UIntX2>
683 {
684  return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
685 }
686 
687 template <typename UInt, typename UIntX2>
689 {
690  return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
691 }
692 
693 template <typename UInt, typename UIntX2>
695 {
696  return !operator==(a,b);
697 }
698 
699 
700 template <typename UInt, typename UIntX2>
702 {
703  return (a.d.v23 < b.d.v23)
704  || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
705 }
706 
707 template <typename UInt, typename UIntX2>
709 {
710  return operator<(b,a);
711 }
712 
713 template <typename UInt, typename UIntX2>
715 {
716  return !(operator<(b,a));
717 }
718 
719 template <typename UInt, typename UIntX2>
721 {
722  return !(operator<(a,b));
723 }
724 
725 
726 
727 template <typename UInt, typename UIntX2>
729  const bitcount_t shift)
730 {
731  uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
732  const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
733  const bitcount_t bitmask = bits - 1;
734  const bitcount_t shiftdiv = shift / bits;
735  const bitcount_t shiftmod = shift & bitmask;
736 
737  if (shiftmod) {
738  UInt carryover = 0;
739 #if PCG_LITTLE_ENDIAN
740  for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
741 #else
742  for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
743  --out, --in;
744 #endif
745  r.wa[out] = (v.wa[in] << shiftmod) | carryover;
746  carryover = (v.wa[in] >> (bits - shiftmod));
747  }
748  } else {
749 #if PCG_LITTLE_ENDIAN
750  for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
751 #else
752  for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
753  --out, --in;
754 #endif
755  r.wa[out] = v.wa[in];
756  }
757  }
758 
759  return r;
760 }
761 
762 template <typename UInt, typename UIntX2>
764  const bitcount_t shift)
765 {
766  uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
767  const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
768  const bitcount_t bitmask = bits - 1;
769  const bitcount_t shiftdiv = shift / bits;
770  const bitcount_t shiftmod = shift & bitmask;
771 
772  if (shiftmod) {
773  UInt carryover = 0;
774 #if PCG_LITTLE_ENDIAN
775  for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
776  --out, --in;
777 #else
778  for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
779 #endif
780  r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
781  carryover = (v.wa[in] << (bits - shiftmod));
782  }
783  } else {
784 #if PCG_LITTLE_ENDIAN
785  for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
786  --out, --in;
787 #else
788  for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
789 #endif
790  r.wa[out] = v.wa[in];
791  }
792  }
793 
794  return r;
795 }
796 
797 } // namespace pcg_extras
798 
799 #endif // PCG_UINT128_HPP_INCLUDED
pcg_extras::flog2
bitcount_t flog2(uint32_t v)
Definition: pcg_uint128.hpp:184
pcg_extras::operator<
bool operator<(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:701
pcg_extras::uint_x4::v0
UInt v0
Definition: pcg_uint128.hpp:270
pcg_extras::uint_x4::v23
UIntX2 v23
Definition: pcg_uint128.hpp:273
pcg_extras::uint_x4::operator+
friend uint_x4< U, V > operator+(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator^=
uint_x4 & operator^=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:460
pcg_extras::addwithcarry
UInt addwithcarry(UInt x, UInt y, bool carryin, bool *carryout)
Definition: pcg_uint128.hpp:238
pcg_extras::divmod
std::pair< uint_x4< UInt, UIntX2 >, uint_x4< UInt, UIntX2 > > divmod(const uint_x4< UInt, UIntX2 > &orig_dividend, const uint_x4< UInt, UIntX2 > &divisor)
Definition: pcg_uint128.hpp:513
pcg_extras::uint_x4::v01
UIntX2 v01
Definition: pcg_uint128.hpp:273
pcg_extras::operator<<
std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &out, pcg128_t value)
Definition: pcg_extras.hpp:121
pcg_extras::subwithcarry
UInt subwithcarry(UInt x, UInt y, bool carryin, bool *carryout)
Definition: pcg_uint128.hpp:247
pcg_extras::uint_x4::operator>=
friend bool operator>=(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator!=
friend bool operator!=(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::divmod
friend std::pair< uint_x4< U, V >, uint_x4< U, V > > divmod(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator+=
uint_x4 & operator+=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:436
pcg_extras::operator/
uint_x4< UInt, UIntX2 > operator/(const uint_x4< UInt, UIntX2 > &dividend, const uint_x4< UInt, UIntX2 > &divisor)
Definition: pcg_uint128.hpp:557
pcg_extras::uint_x4::uint_x4
constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
Definition: pcg_uint128.hpp:285
pcg_extras::uint_x4::uint_missing_t
std::conditional< std::is_same< uint64_t, unsigned long >::value, unsigned long long, unsigned long >::type uint_missing_t
Definition: pcg_uint128.hpp:348
pcg_extras::operator*
uint_x4< UInt, UIntX2 > operator*(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:572
pcg_extras::uint_x4::operator&
friend uint_x4< U, V > operator&(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator|
friend uint_x4< U, V > operator|(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::v2
UInt v2
Definition: pcg_uint128.hpp:270
pcg_extras::operator~
uint_x4< UInt, UIntX2 > operator~(const uint_x4< UInt, UIntX2 > &v)
Definition: pcg_uint128.hpp:676
pcg_extras::uint_x4::uint_x4
constexpr uint_x4(UIntX2 v23, UIntX2 v01)
Definition: pcg_uint128.hpp:295
pcg_extras::uint_x4
Definition: pcg_uint128.hpp:257
pcg_extras::trailingzeros
bitcount_t trailingzeros(uint32_t v)
Definition: pcg_uint128.hpp:203
pcg_extras::operator^
uint_x4< UInt, UIntX2 > operator^(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:669
pcg_extras::operator&
uint_x4< UInt, UIntX2 > operator&(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:655
pcg_extras::uint_x4::v1
UInt v1
Definition: pcg_uint128.hpp:270
pcg_extras::clog2
bitcount_t clog2(UInt v)
Definition: pcg_uint128.hpp:232
pcg_extras::operator-
uint_x4< UInt, UIntX2 > operator-(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:635
pcg_extras::uint_x4::operator-=
uint_x4 & operator-=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:442
pcg_extras::uint_x4::operator%=
uint_x4 & operator%=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:430
pcg_extras::operator|
uint_x4< UInt, UIntX2 > operator|(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:662
pcg_extras::uint_x4::operator*
friend uint_x4< U, V > operator*(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator<<
friend uint_x4< U, V > operator<<(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator~
friend uint_x4< U, V > operator~(const uint_x4< U, V > &)
pcg_extras::uint_x4::operator>>=
uint_x4 & operator>>=(bitcount_t shift)
Definition: pcg_uint128.hpp:466
pcg_extras::uint_x4::operator*=
uint_x4 & operator*=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:418
pcg_extras::uint_x4::operator<=
friend bool operator<=(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator|=
uint_x4 & operator|=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:454
pcg_extras::uint_x4::operator<<=
uint_x4 & operator<<=(bitcount_t shift)
Definition: pcg_uint128.hpp:472
pcg_extras::bitcount_t
uint8_t bitcount_t
Definition: pcg_extras.hpp:105
pcg_extras::operator!=
bool operator!=(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:694
pcg_extras::uint_x4::operator-
friend uint_x4< U, V > operator-(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::operator<=
bool operator<=(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:714
pcg_extras::uint_x4::v3
UInt v3
Definition: pcg_uint128.hpp:270
pcg_extras::operator%
uint_x4< UInt, UIntX2 > operator%(const uint_x4< UInt, UIntX2 > &dividend, const uint_x4< UInt, UIntX2 > &divisor)
Definition: pcg_uint128.hpp:564
pcg_extras::uint_x4::flog2
friend bitcount_t flog2(const uint_x4< U, V > &)
Definition: pcg_uint128.hpp:481
pcg_extras::uint_x4::operator^
friend uint_x4< U, V > operator^(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::operator<
friend bool operator<(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::d
struct pcg_extras::uint_x4::@1::@4 d
pcg_extras::uint_x4::operator&=
uint_x4 & operator&=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:448
pcg_extras::uint_x4::wa
UInt wa[4]
Definition: pcg_uint128.hpp:278
pcg_extras::uint_x4::uint_x4
constexpr uint_x4(Integral v01)
Definition: pcg_uint128.hpp:309
pcg_extras::operator+
uint_x4< UInt, UIntX2 > operator+(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:616
pcg_extras::uint_x4::operator==
friend bool operator==(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras
Definition: pcg_extras.hpp:85
pcg_extras::uint_x4::da
UIntX2 da[2]
Definition: pcg_uint128.hpp:279
pcg_extras::uint_x4::operator>>
friend uint_x4< U, V > operator>>(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::operator>
bool operator>(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:708
pcg_extras::operator>>
std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &in, pcg128_t &value)
Definition: pcg_extras.hpp:165
pcg_extras::uint_x4::operator>
friend bool operator>(const uint_x4< U, V > &, const uint_x4< U, V > &)
pcg_extras::uint_x4::uint_x4
uint_x4()=default
pcg_extras::operator>=
bool operator>=(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:720
pcg_extras::uint_x4::trailingzeros
friend bitcount_t trailingzeros(const uint_x4< U, V > &)
Definition: pcg_uint128.hpp:497
pcg_extras::uint_x4::operator/=
uint_x4 & operator/=(const uint_x4 &rhs)
Definition: pcg_uint128.hpp:424
pcg_extras::uint_x4::w
struct pcg_extras::uint_x4::@1::@3 w
pcg_extras::operator==
bool operator==(const uint_x4< UInt, UIntX2 > &a, const uint_x4< UInt, UIntX2 > &b)
Definition: pcg_uint128.hpp:688