libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2023 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #pragma GCC diagnostic push
47 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48 // 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49 #pragma GCC diagnostic ignored "-Wnarrowing"
50 #include <arm_neon.h>
51 #pragma GCC diagnostic pop
52 #endif
53 
54 /** @ingroup ts_simd
55  * @{
56  */
57 /* There are several closely related types, with the following naming
58  * convention:
59  * _Tp: vectorizable (arithmetic) type (or any type)
60  * _TV: __vector_type_t<_Tp, _Np>
61  * _TW: _SimdWrapper<_Tp, _Np>
62  * _TI: __intrinsic_type_t<_Tp, _Np>
63  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64  * If one additional type is needed use _U instead of _T.
65  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66  *
67  * More naming conventions:
68  * _Ap or _Abi: An ABI tag from the simd_abi namespace
69  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70  * _IV, _IW as for _TV, _TW
71  * _Np: number of elements (not bytes)
72  * _Bytes: number of bytes
73  *
74  * Variable names:
75  * __k: mask object (vector- or bitmask)
76  */
77 _GLIBCXX_SIMD_BEGIN_NAMESPACE
78 
79 #if !_GLIBCXX_SIMD_X86INTRIN
80 using __m128 [[__gnu__::__vector_size__(16)]] = float;
81 using __m128d [[__gnu__::__vector_size__(16)]] = double;
82 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83 using __m256 [[__gnu__::__vector_size__(32)]] = float;
84 using __m256d [[__gnu__::__vector_size__(32)]] = double;
85 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86 using __m512 [[__gnu__::__vector_size__(64)]] = float;
87 using __m512d [[__gnu__::__vector_size__(64)]] = double;
88 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89 #endif
90 
91 namespace simd_abi {
92 // simd_abi forward declarations {{{
93 // implementation details:
94 struct _Scalar;
95 
96 template <int _Np>
97  struct _Fixed;
98 
99 // There are two major ABIs that appear on different architectures.
100 // Both have non-boolean values packed into an N Byte register
101 // -> #elements = N / sizeof(T)
102 // Masks differ:
103 // 1. Use value vector registers for masks (all 0 or all 1)
104 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105 // value vector
106 //
107 // Both can be partially used, masking off the rest when doing horizontal
108 // operations or operations that can trap (e.g. FP_INVALID or integer division
109 // by 0). This is encoded as the number of used bytes.
110 template <int _UsedBytes>
111  struct _VecBuiltin;
112 
113 template <int _UsedBytes>
114  struct _VecBltnBtmsk;
115 
116 template <typename _Tp, int _Np>
117  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118 
119 template <int _UsedBytes = 16>
120  using _Sse = _VecBuiltin<_UsedBytes>;
121 
122 template <int _UsedBytes = 32>
123  using _Avx = _VecBuiltin<_UsedBytes>;
124 
125 template <int _UsedBytes = 64>
126  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127 
128 template <int _UsedBytes = 16>
129  using _Neon = _VecBuiltin<_UsedBytes>;
130 
131 // implementation-defined:
132 using __sse = _Sse<>;
133 using __avx = _Avx<>;
134 using __avx512 = _Avx512<>;
135 using __neon = _Neon<>;
136 using __neon128 = _Neon<16>;
137 using __neon64 = _Neon<8>;
138 
139 // standard:
140 template <typename _Tp, size_t _Np, typename...>
141  struct deduce;
142 
143 template <int _Np>
144  using fixed_size = _Fixed<_Np>;
145 
146 using scalar = _Scalar;
147 
148 // }}}
149 } // namespace simd_abi
150 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151 template <typename _Tp>
152  struct is_simd;
153 
154 template <typename _Tp>
155  struct is_simd_mask;
156 
157 template <typename _Tp, typename _Abi>
158  class simd;
159 
160 template <typename _Tp, typename _Abi>
161  class simd_mask;
162 
163 template <typename _Tp, typename _Abi>
164  struct simd_size;
165 
166 // }}}
167 // load/store flags {{{
168 struct element_aligned_tag
169 {
170  template <typename _Tp, typename _Up = typename _Tp::value_type>
171  static constexpr size_t _S_alignment = alignof(_Up);
172 
173  template <typename _Tp, typename _Up>
174  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175  _S_apply(_Up* __ptr)
176  { return __ptr; }
177 };
178 
179 struct vector_aligned_tag
180 {
181  template <typename _Tp, typename _Up = typename _Tp::value_type>
182  static constexpr size_t _S_alignment
183  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184 
185  template <typename _Tp, typename _Up>
186  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187  _S_apply(_Up* __ptr)
188  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189 };
190 
191 template <size_t _Np> struct overaligned_tag
192 {
193  template <typename _Tp, typename _Up = typename _Tp::value_type>
194  static constexpr size_t _S_alignment = _Np;
195 
196  template <typename _Tp, typename _Up>
197  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198  _S_apply(_Up* __ptr)
199  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200 };
201 
202 inline constexpr element_aligned_tag element_aligned = {};
203 
204 inline constexpr vector_aligned_tag vector_aligned = {};
205 
206 template <size_t _Np>
207  inline constexpr overaligned_tag<_Np> overaligned = {};
208 
209 // }}}
210 template <size_t _Xp>
211  using _SizeConstant = integral_constant<size_t, _Xp>;
212 // constexpr feature detection{{{
213 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245 
246 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249 constexpr inline bool __support_neon_float =
250 #if defined __GCC_IEC_559
251  __GCC_IEC_559 == 0;
252 #elif defined __FAST_MATH__
253  true;
254 #else
255  false;
256 #endif
257 
258 #ifdef _ARCH_PWR10
259 constexpr inline bool __have_power10vec = true;
260 #else
261 constexpr inline bool __have_power10vec = false;
262 #endif
263 #ifdef __POWER9_VECTOR__
264 constexpr inline bool __have_power9vec = true;
265 #else
266 constexpr inline bool __have_power9vec = false;
267 #endif
268 #if defined __POWER8_VECTOR__
269 constexpr inline bool __have_power8vec = true;
270 #else
271 constexpr inline bool __have_power8vec = __have_power9vec;
272 #endif
273 #if defined __VSX__
274 constexpr inline bool __have_power_vsx = true;
275 #else
276 constexpr inline bool __have_power_vsx = __have_power8vec;
277 #endif
278 #if defined __ALTIVEC__
279 constexpr inline bool __have_power_vmx = true;
280 #else
281 constexpr inline bool __have_power_vmx = __have_power_vsx;
282 #endif
283 
284 // }}}
285 
286 namespace __detail
287 {
288 #ifdef math_errhandling
289  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291  // must be guessed.
292  template <int = math_errhandling>
293  constexpr bool
294  __handle_fpexcept_impl(int)
295  { return math_errhandling & MATH_ERREXCEPT; }
296 #endif
297 
298  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299  // ignored, otherwise implement correct exception behavior.
300  constexpr bool
301  __handle_fpexcept_impl(float)
302  {
303 #if defined __FAST_MATH__
304  return false;
305 #else
306  return true;
307 #endif
308  }
309 
310  /// True if math functions must raise floating-point exceptions as specified by C17.
311  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312 
313  constexpr std::uint_least64_t
314  __floating_point_flags()
315  {
316  std::uint_least64_t __flags = 0;
317  if constexpr (_S_handle_fpexcept)
318  __flags |= 1;
319 #ifdef __FAST_MATH__
320  __flags |= 1 << 1;
321 #elif __FINITE_MATH_ONLY__
322  __flags |= 2 << 1;
323 #elif __GCC_IEC_559 < 2
324  __flags |= 3 << 1;
325 #endif
326  __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327  return __flags;
328  }
329 
330  constexpr std::uint_least64_t
331  __machine_flags()
332  {
333  if constexpr (__have_mmx || __have_sse)
334  return __have_mmx
335  | (__have_sse << 1)
336  | (__have_sse2 << 2)
337  | (__have_sse3 << 3)
338  | (__have_ssse3 << 4)
339  | (__have_sse4_1 << 5)
340  | (__have_sse4_2 << 6)
341  | (__have_xop << 7)
342  | (__have_avx << 8)
343  | (__have_avx2 << 9)
344  | (__have_bmi << 10)
345  | (__have_bmi2 << 11)
346  | (__have_lzcnt << 12)
347  | (__have_sse4a << 13)
348  | (__have_fma << 14)
349  | (__have_fma4 << 15)
350  | (__have_f16c << 16)
351  | (__have_popcnt << 17)
352  | (__have_avx512f << 18)
353  | (__have_avx512dq << 19)
354  | (__have_avx512vl << 20)
355  | (__have_avx512bw << 21)
356  | (__have_avx512bitalg << 22)
357  | (__have_avx512vbmi2 << 23)
358  | (__have_avx512vbmi << 24)
359  | (__have_avx512ifma << 25)
360  | (__have_avx512cd << 26)
361  | (__have_avx512vnni << 27)
362  | (__have_avx512vpopcntdq << 28)
363  | (__have_avx512vp2intersect << 29);
364  else if constexpr (__have_neon)
365  return __have_neon
366  | (__have_neon_a32 << 1)
367  | (__have_neon_a64 << 2)
368  | (__have_neon_a64 << 2)
369  | (__support_neon_float << 3);
370  else if constexpr (__have_power_vmx)
371  return __have_power_vmx
372  | (__have_power_vsx << 1)
373  | (__have_power8vec << 2)
374  | (__have_power9vec << 3)
375  | (__have_power10vec << 4);
376  else
377  return 0;
378  }
379 
380  namespace
381  {
382  struct _OdrEnforcer {};
383  }
384 
385  template <std::uint_least64_t...>
386  struct _MachineFlagsTemplate {};
387 
388  /**@internal
389  * Use this type as default template argument to all function templates that
390  * are not declared always_inline. It ensures, that a function
391  * specialization, which the compiler decides not to inline, has a unique symbol
392  * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393  * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394  * users link TUs compiled with different flags. This is especially important
395  * for using simd in libraries.
396  */
397  using __odr_helper
398  = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399  _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400 
401  struct _Minimum
402  {
403  template <typename _Tp>
404  _GLIBCXX_SIMD_INTRINSIC constexpr
405  _Tp
406  operator()(_Tp __a, _Tp __b) const
407  {
408  using std::min;
409  return min(__a, __b);
410  }
411  };
412 
413  struct _Maximum
414  {
415  template <typename _Tp>
416  _GLIBCXX_SIMD_INTRINSIC constexpr
417  _Tp
418  operator()(_Tp __a, _Tp __b) const
419  {
420  using std::max;
421  return max(__a, __b);
422  }
423  };
424 } // namespace __detail
425 
426 // unrolled/pack execution helpers
427 // __execute_n_times{{{
428 template <typename _Fp, size_t... _I>
429  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430  void
431  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432  { ((void)__f(_SizeConstant<_I>()), ...); }
433 
434 template <typename _Fp>
435  _GLIBCXX_SIMD_INTRINSIC constexpr void
436  __execute_on_index_sequence(_Fp&&, index_sequence<>)
437  { }
438 
439 template <size_t _Np, typename _Fp>
440  _GLIBCXX_SIMD_INTRINSIC constexpr void
441  __execute_n_times(_Fp&& __f)
442  {
443  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444  make_index_sequence<_Np>{});
445  }
446 
447 // }}}
448 // __generate_from_n_evaluations{{{
449 template <typename _R, typename _Fp, size_t... _I>
450  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451  _R
452  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453  { return _R{__f(_SizeConstant<_I>())...}; }
454 
455 template <size_t _Np, typename _R, typename _Fp>
456  _GLIBCXX_SIMD_INTRINSIC constexpr _R
457  __generate_from_n_evaluations(_Fp&& __f)
458  {
459  return __execute_on_index_sequence_with_return<_R>(
460  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461  }
462 
463 // }}}
464 // __call_with_n_evaluations{{{
465 template <size_t... _I, typename _F0, typename _FArgs>
466  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467  auto
468  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469  { return __f0(__fargs(_SizeConstant<_I>())...); }
470 
471 template <size_t _Np, typename _F0, typename _FArgs>
472  _GLIBCXX_SIMD_INTRINSIC constexpr auto
473  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474  {
475  return __call_with_n_evaluations(make_index_sequence<_Np>{},
476  static_cast<_F0&&>(__f0),
477  static_cast<_FArgs&&>(__fargs));
478  }
479 
480 // }}}
481 // __call_with_subscripts{{{
482 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484  auto
485  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486  { return __fun(__x[_First + _It]...); }
487 
488 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489  _GLIBCXX_SIMD_INTRINSIC constexpr auto
490  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491  {
492  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493  make_index_sequence<_Np>(),
494  static_cast<_Fp&&>(__fun));
495  }
496 
497 // }}}
498 
499 // vvv ---- type traits ---- vvv
500 // integer type aliases{{{
501 using _UChar = unsigned char;
502 using _SChar = signed char;
503 using _UShort = unsigned short;
504 using _UInt = unsigned int;
505 using _ULong = unsigned long;
506 using _ULLong = unsigned long long;
507 using _LLong = long long;
508 
509 //}}}
510 // __first_of_pack{{{
511 template <typename _T0, typename...>
512  struct __first_of_pack
513  { using type = _T0; };
514 
515 template <typename... _Ts>
516  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517 
518 //}}}
519 // __value_type_or_identity_t {{{
520 template <typename _Tp>
521  typename _Tp::value_type
522  __value_type_or_identity_impl(int);
523 
524 template <typename _Tp>
525  _Tp
526  __value_type_or_identity_impl(float);
527 
528 template <typename _Tp>
529  using __value_type_or_identity_t
530  = decltype(__value_type_or_identity_impl<_Tp>(int()));
531 
532 // }}}
533 // __is_vectorizable {{{
534 template <typename _Tp>
535  struct __is_vectorizable : public is_arithmetic<_Tp> {};
536 
537 template <>
538  struct __is_vectorizable<bool> : public false_type {};
539 
540 template <typename _Tp>
541  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542 
543 // Deduces to a vectorizable type
544 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545  using _Vectorizable = _Tp;
546 
547 // }}}
548 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
549 template <typename _Ptr, typename _ValueType>
550  struct __is_possible_loadstore_conversion
551  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552 
553 template <>
554  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555 
556 // Deduces to a type allowed for load/store with the given value type.
557 template <typename _Ptr, typename _ValueType,
558  typename = enable_if_t<
559  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560  using _LoadStorePtr = _Ptr;
561 
562 // }}}
563 // __is_bitmask{{{
564 template <typename _Tp, typename = void_t<>>
565  struct __is_bitmask : false_type {};
566 
567 template <typename _Tp>
568  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569 
570 // the __mmaskXX case:
571 template <typename _Tp>
572  struct __is_bitmask<_Tp,
573  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574  : true_type {};
575 
576 // }}}
577 // __int_for_sizeof{{{
578 #pragma GCC diagnostic push
579 #pragma GCC diagnostic ignored "-Wpedantic"
580 template <size_t _Bytes>
581  constexpr auto
582  __int_for_sizeof()
583  {
584  static_assert(_Bytes > 0);
585  if constexpr (_Bytes == sizeof(int))
586  return int();
587  #ifdef __clang__
588  else if constexpr (_Bytes == sizeof(char))
589  return char();
590  #else
591  else if constexpr (_Bytes == sizeof(_SChar))
592  return _SChar();
593  #endif
594  else if constexpr (_Bytes == sizeof(short))
595  return short();
596  #ifndef __clang__
597  else if constexpr (_Bytes == sizeof(long))
598  return long();
599  #endif
600  else if constexpr (_Bytes == sizeof(_LLong))
601  return _LLong();
602  #ifdef __SIZEOF_INT128__
603  else if constexpr (_Bytes == sizeof(__int128))
604  return __int128();
605  #endif // __SIZEOF_INT128__
606  else if constexpr (_Bytes % sizeof(int) == 0)
607  {
608  constexpr size_t _Np = _Bytes / sizeof(int);
609  struct _Ip
610  {
611  int _M_data[_Np];
612 
613  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
614  operator&(_Ip __rhs) const
615  {
616  return __generate_from_n_evaluations<_Np, _Ip>(
617  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
618  return __rhs._M_data[__i] & _M_data[__i];
619  });
620  }
621 
622  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
623  operator|(_Ip __rhs) const
624  {
625  return __generate_from_n_evaluations<_Np, _Ip>(
626  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
627  return __rhs._M_data[__i] | _M_data[__i];
628  });
629  }
630 
631  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
632  operator^(_Ip __rhs) const
633  {
634  return __generate_from_n_evaluations<_Np, _Ip>(
635  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
636  return __rhs._M_data[__i] ^ _M_data[__i];
637  });
638  }
639 
640  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
641  operator~() const
642  {
643  return __generate_from_n_evaluations<_Np, _Ip>(
644  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
645  }
646  };
647  return _Ip{};
648  }
649  else
650  static_assert(_Bytes == 0, "this should be unreachable");
651  }
652 #pragma GCC diagnostic pop
653 
654 template <typename _Tp>
655  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
656 
657 template <size_t _Np>
658  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
659 
660 // }}}
661 // __is_fixed_size_abi{{{
662 template <typename _Tp>
663  struct __is_fixed_size_abi : false_type {};
664 
665 template <int _Np>
666  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
667 
668 template <typename _Tp>
669  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
670 
671 // }}}
672 // __is_scalar_abi {{{
673 template <typename _Abi>
674  constexpr bool
675  __is_scalar_abi()
676  { return is_same_v<simd_abi::scalar, _Abi>; }
677 
678 // }}}
679 // __abi_bytes_v {{{
680 template <template <int> class _Abi, int _Bytes>
681  constexpr int
682  __abi_bytes_impl(_Abi<_Bytes>*)
683  { return _Bytes; }
684 
685 template <typename _Tp>
686  constexpr int
687  __abi_bytes_impl(_Tp*)
688  { return -1; }
689 
690 template <typename _Abi>
691  inline constexpr int __abi_bytes_v
692  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
693 
694 // }}}
695 // __is_builtin_bitmask_abi {{{
696 template <typename _Abi>
697  constexpr bool
698  __is_builtin_bitmask_abi()
699  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
700 
701 // }}}
702 // __is_sse_abi {{{
703 template <typename _Abi>
704  constexpr bool
705  __is_sse_abi()
706  {
707  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
708  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
709  }
710 
711 // }}}
712 // __is_avx_abi {{{
713 template <typename _Abi>
714  constexpr bool
715  __is_avx_abi()
716  {
717  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
718  return _Bytes > 16 && _Bytes <= 32
719  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
720  }
721 
722 // }}}
723 // __is_avx512_abi {{{
724 template <typename _Abi>
725  constexpr bool
726  __is_avx512_abi()
727  {
728  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
729  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
730  }
731 
732 // }}}
733 // __is_neon_abi {{{
734 template <typename _Abi>
735  constexpr bool
736  __is_neon_abi()
737  {
738  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
739  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
740  }
741 
742 // }}}
743 // __make_dependent_t {{{
744 template <typename, typename _Up>
745  struct __make_dependent
746  { using type = _Up; };
747 
748 template <typename _Tp, typename _Up>
749  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
750 
751 // }}}
752 // ^^^ ---- type traits ---- ^^^
753 
754 // __invoke_ub{{{
755 template <typename... _Args>
756  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
757  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
758  {
759 #ifdef _GLIBCXX_DEBUG_UB
760  __builtin_fprintf(stderr, __msg, __args...);
761  __builtin_trap();
762 #else
763  __builtin_unreachable();
764 #endif
765  }
766 
767 // }}}
768 // __assert_unreachable{{{
769 template <typename _Tp>
770  struct __assert_unreachable
771  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
772 
773 // }}}
774 // __size_or_zero_v {{{
775 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
776  constexpr size_t
777  __size_or_zero_dispatch(int)
778  { return _Np; }
779 
780 template <typename _Tp, typename _Ap>
781  constexpr size_t
782  __size_or_zero_dispatch(float)
783  { return 0; }
784 
785 template <typename _Tp, typename _Ap>
786  inline constexpr size_t __size_or_zero_v
787  = __size_or_zero_dispatch<_Tp, _Ap>(0);
788 
789 // }}}
790 // __div_roundup {{{
791 inline constexpr size_t
792 __div_roundup(size_t __a, size_t __b)
793 { return (__a + __b - 1) / __b; }
794 
795 // }}}
796 // _ExactBool{{{
797 class _ExactBool
798 {
799  const bool _M_data;
800 
801 public:
802  _GLIBCXX_SIMD_INTRINSIC constexpr
803  _ExactBool(bool __b) : _M_data(__b) {}
804 
805  _ExactBool(int) = delete;
806 
807  _GLIBCXX_SIMD_INTRINSIC constexpr
808  operator bool() const
809  { return _M_data; }
810 };
811 
812 // }}}
813 // __may_alias{{{
814 /**@internal
815  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
816  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
817  * that support it).
818  */
819 template <typename _Tp>
820  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
821 
822 // }}}
823 // _UnsupportedBase {{{
824 // simd and simd_mask base for unsupported <_Tp, _Abi>
825 struct _UnsupportedBase
826 {
827  _UnsupportedBase() = delete;
828  _UnsupportedBase(const _UnsupportedBase&) = delete;
829  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
830  ~_UnsupportedBase() = delete;
831 };
832 
833 // }}}
834 // _InvalidTraits {{{
835 /**
836  * @internal
837  * Defines the implementation of __a given <_Tp, _Abi>.
838  *
839  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
840  * possible. Static assertions in the type definition do not suffice. It is
841  * important that SFINAE works.
842  */
843 struct _InvalidTraits
844 {
845  using _IsValid = false_type;
846  using _SimdBase = _UnsupportedBase;
847  using _MaskBase = _UnsupportedBase;
848 
849  static constexpr size_t _S_full_size = 0;
850  static constexpr bool _S_is_partial = false;
851 
852  static constexpr size_t _S_simd_align = 1;
853  struct _SimdImpl;
854  struct _SimdMember {};
855  struct _SimdCastType;
856 
857  static constexpr size_t _S_mask_align = 1;
858  struct _MaskImpl;
859  struct _MaskMember {};
860  struct _MaskCastType;
861 };
862 
863 // }}}
864 // _SimdTraits {{{
865 template <typename _Tp, typename _Abi, typename = void_t<>>
866  struct _SimdTraits : _InvalidTraits {};
867 
868 // }}}
869 // __private_init, __bitset_init{{{
870 /**
871  * @internal
872  * Tag used for private init constructor of simd and simd_mask
873  */
874 inline constexpr struct _PrivateInit {} __private_init = {};
875 
876 inline constexpr struct _BitsetInit {} __bitset_init = {};
877 
878 // }}}
879 // __is_narrowing_conversion<_From, _To>{{{
880 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
881  bool = is_arithmetic_v<_To>>
882  struct __is_narrowing_conversion;
883 
884 // ignore "signed/unsigned mismatch" in the following trait.
885 // The implicit conversions will do the right thing here.
886 template <typename _From, typename _To>
887  struct __is_narrowing_conversion<_From, _To, true, true>
888  : public __bool_constant<(
889  __digits_v<_From> > __digits_v<_To>
890  || __finite_max_v<_From> > __finite_max_v<_To>
891  || __finite_min_v<_From> < __finite_min_v<_To>
892  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
893 
894 template <typename _Tp>
895  struct __is_narrowing_conversion<_Tp, bool, true, true>
896  : public true_type {};
897 
898 template <>
899  struct __is_narrowing_conversion<bool, bool, true, true>
900  : public false_type {};
901 
902 template <typename _Tp>
903  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
904  : public false_type {};
905 
906 template <typename _From, typename _To>
907  struct __is_narrowing_conversion<_From, _To, false, true>
908  : public negation<is_convertible<_From, _To>> {};
909 
910 // }}}
911 // __converts_to_higher_integer_rank{{{
912 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
913  struct __converts_to_higher_integer_rank : public true_type {};
914 
915 // this may fail for char -> short if sizeof(char) == sizeof(short)
916 template <typename _From, typename _To>
917  struct __converts_to_higher_integer_rank<_From, _To, false>
918  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
919 
920 // }}}
921 // __data(simd/simd_mask) {{{
922 template <typename _Tp, typename _Ap>
923  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
924  __data(const simd<_Tp, _Ap>& __x);
925 
926 template <typename _Tp, typename _Ap>
927  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
928  __data(simd<_Tp, _Ap>& __x);
929 
930 template <typename _Tp, typename _Ap>
931  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
932  __data(const simd_mask<_Tp, _Ap>& __x);
933 
934 template <typename _Tp, typename _Ap>
935  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
936  __data(simd_mask<_Tp, _Ap>& __x);
937 
938 // }}}
939 // _SimdConverter {{{
940 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
941  typename = void>
942  struct _SimdConverter;
943 
944 template <typename _Tp, typename _Ap>
945  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
946  {
947  template <typename _Up>
948  _GLIBCXX_SIMD_INTRINSIC const _Up&
949  operator()(const _Up& __x)
950  { return __x; }
951  };
952 
953 // }}}
954 // __to_value_type_or_member_type {{{
955 template <typename _V>
956  _GLIBCXX_SIMD_INTRINSIC constexpr auto
957  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
958  { return __data(__x); }
959 
960 template <typename _V>
961  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
962  __to_value_type_or_member_type(const typename _V::value_type& __x)
963  { return __x; }
964 
965 // }}}
966 // __bool_storage_member_type{{{
967 template <size_t _Size>
968  struct __bool_storage_member_type;
969 
970 template <size_t _Size>
971  using __bool_storage_member_type_t =
972  typename __bool_storage_member_type<_Size>::type;
973 
974 // }}}
975 // _SimdTuple {{{
976 // why not tuple?
977 // 1. tuple gives no guarantee about the storage order, but I require
978 // storage
979 // equivalent to array<_Tp, _Np>
980 // 2. direct access to the element type (first template argument)
981 // 3. enforces equal element type, only different _Abi types are allowed
982 template <typename _Tp, typename... _Abis>
983  struct _SimdTuple;
984 
985 //}}}
986 // __fixed_size_storage_t {{{
987 template <typename _Tp, int _Np>
988  struct __fixed_size_storage;
989 
990 template <typename _Tp, int _Np>
991  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
992 
993 // }}}
994 // _SimdWrapper fwd decl{{{
995 template <typename _Tp, size_t _Size, typename = void_t<>>
996  struct _SimdWrapper;
997 
998 template <typename _Tp>
999  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
1000 template <typename _Tp>
1001  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
1002 template <typename _Tp>
1003  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
1004 template <typename _Tp>
1005  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1006 
1007 // }}}
1008 // __is_simd_wrapper {{{
1009 template <typename _Tp>
1010  struct __is_simd_wrapper : false_type {};
1011 
1012 template <typename _Tp, size_t _Np>
1013  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1014 
1015 template <typename _Tp>
1016  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1017 
1018 // }}}
1019 // _BitOps {{{
1020 struct _BitOps
1021 {
1022  // _S_bit_iteration {{{
1023  template <typename _Tp, typename _Fp>
1024  static void
1025  _S_bit_iteration(_Tp __mask, _Fp&& __f)
1026  {
1027  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1028  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1029  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1030  __k = __mask;
1031  else
1032  __k = __mask.to_ullong();
1033  while(__k)
1034  {
1035  __f(std::__countr_zero(__k));
1036  __k &= (__k - 1);
1037  }
1038  }
1039 
1040  //}}}
1041 };
1042 
1043 //}}}
1044 // __increment, __decrement {{{
1045 template <typename _Tp = void>
1046  struct __increment
1047  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1048 
1049 template <>
1050  struct __increment<void>
1051  {
1052  template <typename _Tp>
1053  constexpr _Tp
1054  operator()(_Tp __a) const
1055  { return ++__a; }
1056  };
1057 
1058 template <typename _Tp = void>
1059  struct __decrement
1060  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1061 
1062 template <>
1063  struct __decrement<void>
1064  {
1065  template <typename _Tp>
1066  constexpr _Tp
1067  operator()(_Tp __a) const
1068  { return --__a; }
1069  };
1070 
1071 // }}}
1072 // _ValuePreserving(OrInt) {{{
1073 template <typename _From, typename _To,
1074  typename = enable_if_t<negation<
1075  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1076  using _ValuePreserving = _From;
1077 
1078 template <typename _From, typename _To,
1079  typename _DecayedFrom = __remove_cvref_t<_From>,
1080  typename = enable_if_t<conjunction<
1081  is_convertible<_From, _To>,
1082  disjunction<
1083  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1084  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1085  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1086  using _ValuePreservingOrInt = _From;
1087 
1088 // }}}
1089 // __intrinsic_type {{{
1090 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1091  struct __intrinsic_type;
1092 
1093 template <typename _Tp, size_t _Size>
1094  using __intrinsic_type_t =
1095  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1096 
1097 template <typename _Tp>
1098  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1099 template <typename _Tp>
1100  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1101 template <typename _Tp>
1102  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1103 template <typename _Tp>
1104  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1105 template <typename _Tp>
1106  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1107 template <typename _Tp>
1108  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1109 
1110 // }}}
1111 // _BitMask {{{
1112 template <size_t _Np, bool _Sanitized = false>
1113  struct _BitMask;
1114 
1115 template <size_t _Np, bool _Sanitized>
1116  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1117 
1118 template <size_t _Np>
1119  using _SanitizedBitMask = _BitMask<_Np, true>;
1120 
1121 template <size_t _Np, bool _Sanitized>
1122  struct _BitMask
1123  {
1124  static_assert(_Np > 0);
1125 
1126  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1127 
1128  using _Tp = conditional_t<_Np == 1, bool,
1129  make_unsigned_t<__int_with_sizeof_t<std::min(
1130  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1131 
1132  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1133 
1134  _Tp _M_bits[_S_array_size];
1135 
1136  static constexpr int _S_unused_bits
1137  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1138 
1139  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1140 
1141  constexpr _BitMask() noexcept = default;
1142 
1143  constexpr _BitMask(unsigned long long __x) noexcept
1144  : _M_bits{static_cast<_Tp>(__x)} {}
1145 
1146  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1147 
1148  constexpr _BitMask(const _BitMask&) noexcept = default;
1149 
1150  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1151  && _Sanitized == true>>
1152  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1153  : _BitMask(__rhs._M_sanitized()) {}
1154 
1155  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1156  {
1157  static_assert(_S_array_size == 1);
1158  return _M_bits[0];
1159  }
1160 
1161  // precondition: is sanitized
1162  constexpr _Tp
1163  _M_to_bits() const noexcept
1164  {
1165  static_assert(_S_array_size == 1);
1166  return _M_bits[0];
1167  }
1168 
1169  // precondition: is sanitized
1170  constexpr unsigned long long
1171  to_ullong() const noexcept
1172  {
1173  static_assert(_S_array_size == 1);
1174  return _M_bits[0];
1175  }
1176 
1177  // precondition: is sanitized
1178  constexpr unsigned long
1179  to_ulong() const noexcept
1180  {
1181  static_assert(_S_array_size == 1);
1182  return _M_bits[0];
1183  }
1184 
1185  constexpr bitset<_Np>
1186  _M_to_bitset() const noexcept
1187  {
1188  static_assert(_S_array_size == 1);
1189  return _M_bits[0];
1190  }
1191 
1192  constexpr decltype(auto)
1193  _M_sanitized() const noexcept
1194  {
1195  if constexpr (_Sanitized)
1196  return *this;
1197  else if constexpr (_Np == 1)
1198  return _SanitizedBitMask<_Np>(_M_bits[0]);
1199  else
1200  {
1201  _SanitizedBitMask<_Np> __r = {};
1202  for (int __i = 0; __i < _S_array_size; ++__i)
1203  __r._M_bits[__i] = _M_bits[__i];
1204  if constexpr (_S_unused_bits > 0)
1205  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1206  return __r;
1207  }
1208  }
1209 
1210  template <size_t _Mp, bool _LSanitized>
1211  constexpr _BitMask<_Np + _Mp, _Sanitized>
1212  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1213  {
1214  constexpr size_t _RN = _Np + _Mp;
1215  using _Rp = _BitMask<_RN, _Sanitized>;
1216  if constexpr (_Rp::_S_array_size == 1)
1217  {
1218  _Rp __r{{_M_bits[0]}};
1219  __r._M_bits[0] <<= _Mp;
1220  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1221  return __r;
1222  }
1223  else
1224  __assert_unreachable<_Rp>();
1225  }
1226 
1227  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1228  // significant bits. If the operation implicitly produces a sanitized bitmask,
1229  // the result type will have _Sanitized set.
1230  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1231  constexpr auto
1232  _M_extract() const noexcept
1233  {
1234  static_assert(_Np > _DropLsb);
1235  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1236  "not implemented for bitmasks larger than one ullong");
1237  if constexpr (_NewSize == 1)
1238  // must sanitize because the return _Tp is bool
1239  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1240  else
1241  return _BitMask<_NewSize,
1242  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1243  && _NewSize + _DropLsb <= _Np)
1244  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1245  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1246  >> _DropLsb);
1247  }
1248 
1249  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1250  constexpr bool
1251  all() const noexcept
1252  {
1253  if constexpr (_Np == 1)
1254  return _M_bits[0];
1255  else if constexpr (!_Sanitized)
1256  return _M_sanitized().all();
1257  else
1258  {
1259  constexpr _Tp __allbits = ~_Tp();
1260  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1261  if (_M_bits[__i] != __allbits)
1262  return false;
1263  return _M_bits[_S_array_size - 1] == _S_bitmask;
1264  }
1265  }
1266 
1267  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1268  // false.
1269  constexpr bool
1270  any() const noexcept
1271  {
1272  if constexpr (_Np == 1)
1273  return _M_bits[0];
1274  else if constexpr (!_Sanitized)
1275  return _M_sanitized().any();
1276  else
1277  {
1278  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1279  if (_M_bits[__i] != 0)
1280  return true;
1281  return _M_bits[_S_array_size - 1] != 0;
1282  }
1283  }
1284 
1285  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1286  constexpr bool
1287  none() const noexcept
1288  {
1289  if constexpr (_Np == 1)
1290  return !_M_bits[0];
1291  else if constexpr (!_Sanitized)
1292  return _M_sanitized().none();
1293  else
1294  {
1295  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1296  if (_M_bits[__i] != 0)
1297  return false;
1298  return _M_bits[_S_array_size - 1] == 0;
1299  }
1300  }
1301 
1302  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1303  // false.
1304  constexpr int
1305  count() const noexcept
1306  {
1307  if constexpr (_Np == 1)
1308  return _M_bits[0];
1309  else if constexpr (!_Sanitized)
1310  return _M_sanitized().none();
1311  else
1312  {
1313  int __result = __builtin_popcountll(_M_bits[0]);
1314  for (int __i = 1; __i < _S_array_size; ++__i)
1315  __result += __builtin_popcountll(_M_bits[__i]);
1316  return __result;
1317  }
1318  }
1319 
1320  // Returns the bit at offset __i as bool.
1321  constexpr bool
1322  operator[](size_t __i) const noexcept
1323  {
1324  if constexpr (_Np == 1)
1325  return _M_bits[0];
1326  else if constexpr (_S_array_size == 1)
1327  return (_M_bits[0] >> __i) & 1;
1328  else
1329  {
1330  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1331  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1332  return (_M_bits[__j] >> __shift) & 1;
1333  }
1334  }
1335 
1336  template <size_t __i>
1337  constexpr bool
1338  operator[](_SizeConstant<__i>) const noexcept
1339  {
1340  static_assert(__i < _Np);
1341  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1342  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1343  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1344  }
1345 
1346  // Set the bit at offset __i to __x.
1347  constexpr void
1348  set(size_t __i, bool __x) noexcept
1349  {
1350  if constexpr (_Np == 1)
1351  _M_bits[0] = __x;
1352  else if constexpr (_S_array_size == 1)
1353  {
1354  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1355  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1356  }
1357  else
1358  {
1359  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1360  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1361  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1362  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1363  }
1364  }
1365 
1366  template <size_t __i>
1367  constexpr void
1368  set(_SizeConstant<__i>, bool __x) noexcept
1369  {
1370  static_assert(__i < _Np);
1371  if constexpr (_Np == 1)
1372  _M_bits[0] = __x;
1373  else
1374  {
1375  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1376  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1377  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1378  _M_bits[__j] &= __mask;
1379  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1380  }
1381  }
1382 
1383  // Inverts all bits. Sanitized input leads to sanitized output.
1384  constexpr _BitMask
1385  operator~() const noexcept
1386  {
1387  if constexpr (_Np == 1)
1388  return !_M_bits[0];
1389  else
1390  {
1391  _BitMask __result{};
1392  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1393  __result._M_bits[__i] = ~_M_bits[__i];
1394  if constexpr (_Sanitized)
1395  __result._M_bits[_S_array_size - 1]
1396  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1397  else
1398  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1399  return __result;
1400  }
1401  }
1402 
1403  constexpr _BitMask&
1404  operator^=(const _BitMask& __b) & noexcept
1405  {
1406  __execute_n_times<_S_array_size>(
1407  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1408  return *this;
1409  }
1410 
1411  constexpr _BitMask&
1412  operator|=(const _BitMask& __b) & noexcept
1413  {
1414  __execute_n_times<_S_array_size>(
1415  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1416  return *this;
1417  }
1418 
1419  constexpr _BitMask&
1420  operator&=(const _BitMask& __b) & noexcept
1421  {
1422  __execute_n_times<_S_array_size>(
1423  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1424  return *this;
1425  }
1426 
1427  friend constexpr _BitMask
1428  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1429  {
1430  _BitMask __r = __a;
1431  __r ^= __b;
1432  return __r;
1433  }
1434 
1435  friend constexpr _BitMask
1436  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1437  {
1438  _BitMask __r = __a;
1439  __r |= __b;
1440  return __r;
1441  }
1442 
1443  friend constexpr _BitMask
1444  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1445  {
1446  _BitMask __r = __a;
1447  __r &= __b;
1448  return __r;
1449  }
1450 
1451  _GLIBCXX_SIMD_INTRINSIC
1452  constexpr bool
1453  _M_is_constprop() const
1454  {
1455  if constexpr (_S_array_size == 0)
1456  return __builtin_constant_p(_M_bits[0]);
1457  else
1458  {
1459  for (int __i = 0; __i < _S_array_size; ++__i)
1460  if (!__builtin_constant_p(_M_bits[__i]))
1461  return false;
1462  return true;
1463  }
1464  }
1465  };
1466 
1467 // }}}
1468 
1469 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1470 // __min_vector_size {{{
1471 template <typename _Tp = void>
1472  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1473 
1474 #if _GLIBCXX_SIMD_HAVE_NEON
1475 template <>
1476  inline constexpr int __min_vector_size<void> = 8;
1477 #else
1478 template <>
1479  inline constexpr int __min_vector_size<void> = 16;
1480 #endif
1481 
1482 // }}}
1483 // __vector_type {{{
1484 template <typename _Tp, size_t _Np, typename = void>
1485  struct __vector_type_n {};
1486 
1487 // substition failure for 0-element case
1488 template <typename _Tp>
1489  struct __vector_type_n<_Tp, 0, void> {};
1490 
1491 // special case 1-element to be _Tp itself
1492 template <typename _Tp>
1493  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1494  { using type = _Tp; };
1495 
1496 // else, use GNU-style builtin vector types
1497 template <typename _Tp, size_t _Np>
1498  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1499  {
1500  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1501 
1502  static constexpr size_t _S_Bytes =
1503 #ifdef __i386__
1504  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1505  // those objects are passed via MMX registers and nothing ever calls EMMS.
1506  _S_Np2 == 8 ? 16 :
1507 #endif
1508  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1509  : _S_Np2;
1510 
1511  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1512  };
1513 
1514 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1515  struct __vector_type;
1516 
1517 template <typename _Tp, size_t _Bytes>
1518  struct __vector_type<_Tp, _Bytes, 0>
1519  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1520 
1521 template <typename _Tp, size_t _Size>
1522  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1523 
1524 template <typename _Tp>
1525  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1526 template <typename _Tp>
1527  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1528 template <typename _Tp>
1529  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1530 template <typename _Tp>
1531  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1532 template <typename _Tp>
1533  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1534 template <typename _Tp>
1535  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1536 
1537 // }}}
1538 // __is_vector_type {{{
1539 template <typename _Tp, typename = void_t<>>
1540  struct __is_vector_type : false_type {};
1541 
1542 template <typename _Tp>
1543  struct __is_vector_type<
1544  _Tp, void_t<typename __vector_type<
1545  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1546  : is_same<_Tp, typename __vector_type<
1547  remove_reference_t<decltype(declval<_Tp>()[0])>,
1548  sizeof(_Tp)>::type> {};
1549 
1550 template <typename _Tp>
1551  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1552 
1553 // }}}
1554 // __is_intrinsic_type {{{
1555 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1556 template <typename _Tp>
1557  using __is_intrinsic_type = __is_vector_type<_Tp>;
1558 #else // not SSE (x86)
1559 template <typename _Tp, typename = void_t<>>
1560  struct __is_intrinsic_type : false_type {};
1561 
1562 template <typename _Tp>
1563  struct __is_intrinsic_type<
1564  _Tp, void_t<typename __intrinsic_type<
1565  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1566  : is_same<_Tp, typename __intrinsic_type<
1567  remove_reference_t<decltype(declval<_Tp>()[0])>,
1568  sizeof(_Tp)>::type> {};
1569 #endif
1570 
1571 template <typename _Tp>
1572  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1573 
1574 // }}}
1575 // _VectorTraits{{{
1576 template <typename _Tp, typename = void_t<>>
1577  struct _VectorTraitsImpl;
1578 
1579 template <typename _Tp>
1580  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1581  || __is_intrinsic_type_v<_Tp>>>
1582  {
1583  using type = _Tp;
1584  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1585  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1586  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1587  template <typename _Up, int _W = _S_full_size>
1588  static constexpr bool _S_is
1589  = is_same_v<value_type, _Up> && _W == _S_full_size;
1590  };
1591 
1592 template <typename _Tp, size_t _Np>
1593  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1594  void_t<__vector_type_t<_Tp, _Np>>>
1595  {
1596  using type = __vector_type_t<_Tp, _Np>;
1597  using value_type = _Tp;
1598  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1599  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1600  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1601  static constexpr int _S_partial_width = _Np;
1602  template <typename _Up, int _W = _S_full_size>
1603  static constexpr bool _S_is
1604  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1605  };
1606 
1607 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1608  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1609 
1610 // }}}
1611 // __as_vector{{{
1612 template <typename _V>
1613  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1614  __as_vector(_V __x)
1615  {
1616  if constexpr (__is_vector_type_v<_V>)
1617  return __x;
1618  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1619  return __data(__x)._M_data;
1620  else if constexpr (__is_vectorizable_v<_V>)
1621  return __vector_type_t<_V, 2>{__x};
1622  else
1623  return __x._M_data;
1624  }
1625 
1626 // }}}
1627 // __as_wrapper{{{
1628 template <size_t _Np = 0, typename _V>
1629  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1630  __as_wrapper(_V __x)
1631  {
1632  if constexpr (__is_vector_type_v<_V>)
1633  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1634  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1635  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1636  {
1637  static_assert(_V::size() == _Np);
1638  return __data(__x);
1639  }
1640  else
1641  {
1642  static_assert(_V::_S_size == _Np);
1643  return __x;
1644  }
1645  }
1646 
1647 // }}}
1648 // __intrin_bitcast{{{
1649 template <typename _To, typename _From>
1650  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1651  __intrin_bitcast(_From __v)
1652  {
1653  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1654  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1655  if constexpr (sizeof(_To) == sizeof(_From))
1656  return reinterpret_cast<_To>(__v);
1657  else if constexpr (sizeof(_From) > sizeof(_To))
1658  if constexpr (sizeof(_To) >= 16)
1659  return reinterpret_cast<const __may_alias<_To>&>(__v);
1660  else
1661  {
1662  _To __r;
1663  __builtin_memcpy(&__r, &__v, sizeof(_To));
1664  return __r;
1665  }
1666 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1667  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1668  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1669  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1670  else if constexpr (__have_avx512f && sizeof(_From) == 16
1671  && sizeof(_To) == 64)
1672  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1673  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1674  else if constexpr (__have_avx512f && sizeof(_From) == 32
1675  && sizeof(_To) == 64)
1676  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1677  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1678 #endif // _GLIBCXX_SIMD_X86INTRIN
1679  else if constexpr (sizeof(__v) <= 8)
1680  return reinterpret_cast<_To>(
1681  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1682  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1683  else
1684  {
1685  static_assert(sizeof(_To) > sizeof(_From));
1686  _To __r = {};
1687  __builtin_memcpy(&__r, &__v, sizeof(_From));
1688  return __r;
1689  }
1690  }
1691 
1692 // }}}
1693 // __vector_bitcast{{{
1694 template <typename _To, size_t _NN = 0, typename _From,
1695  typename _FromVT = _VectorTraits<_From>,
1696  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1697  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1698  __vector_bitcast(_From __x)
1699  {
1700  using _R = __vector_type_t<_To, _Np>;
1701  return __intrin_bitcast<_R>(__x);
1702  }
1703 
1704 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1705  size_t _Np
1706  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1707  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1708  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1709  {
1710  static_assert(_Np > 1);
1711  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1712  }
1713 
1714 // }}}
1715 // __convert_x86 declarations {{{
1716 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1718  _To __convert_x86(_Tp);
1719 
1720 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1721  _To __convert_x86(_Tp, _Tp);
1722 
1723 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1724  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1725 
1726 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1727  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1728 
1729 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1730  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1731  _Tp, _Tp, _Tp, _Tp);
1732 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1733 
1734 //}}}
1735 // __bit_cast {{{
1736 template <typename _To, typename _From>
1737  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1738  __bit_cast(const _From __x)
1739  {
1740 #if __has_builtin(__builtin_bit_cast)
1741  return __builtin_bit_cast(_To, __x);
1742 #else
1743  static_assert(sizeof(_To) == sizeof(_From));
1744  constexpr bool __to_is_vectorizable
1745  = is_arithmetic_v<_To> || is_enum_v<_To>;
1746  constexpr bool __from_is_vectorizable
1747  = is_arithmetic_v<_From> || is_enum_v<_From>;
1748  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1749  return reinterpret_cast<_To>(__x);
1750  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1751  {
1752  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1753  return reinterpret_cast<_To>(_FV{__x});
1754  }
1755  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1756  {
1757  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1758  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1759  return reinterpret_cast<_TV>(_FV{__x})[0];
1760  }
1761  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1762  {
1763  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1764  return reinterpret_cast<_TV>(__x)[0];
1765  }
1766  else
1767  {
1768  _To __r;
1769  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1770  reinterpret_cast<const char*>(&__x), sizeof(_To));
1771  return __r;
1772  }
1773 #endif
1774  }
1775 
1776 // }}}
1777 // __to_intrin {{{
1778 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1779  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1780  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1781  __to_intrin(_Tp __x)
1782  {
1783  static_assert(sizeof(__x) <= sizeof(_R),
1784  "__to_intrin may never drop values off the end");
1785  if constexpr (sizeof(__x) == sizeof(_R))
1786  return reinterpret_cast<_R>(__as_vector(__x));
1787  else
1788  {
1789  using _Up = __int_for_sizeof_t<_Tp>;
1790  return reinterpret_cast<_R>(
1791  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1792  }
1793  }
1794 
1795 // }}}
1796 // __make_vector{{{
1797 template <typename _Tp, typename... _Args>
1798  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1799  __make_vector(const _Args&... __args)
1800  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1801 
1802 // }}}
1803 // __vector_broadcast{{{
1804 template <size_t _Np, typename _Tp, size_t... _I>
1805  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1806  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1807  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1808 
1809 template <size_t _Np, typename _Tp>
1810  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1811  __vector_broadcast(_Tp __x)
1812  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1813 
1814 // }}}
1815 // __generate_vector{{{
1816  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1817  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1818  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1819  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1820 
1821 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1822  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1823  __generate_vector(_Gp&& __gen)
1824  {
1825  if constexpr (__is_vector_type_v<_V>)
1826  return __generate_vector_impl<typename _VVT::value_type,
1827  _VVT::_S_full_size>(
1828  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1829  else
1830  return __generate_vector_impl<typename _VVT::value_type,
1831  _VVT::_S_partial_width>(
1832  static_cast<_Gp&&>(__gen),
1833  make_index_sequence<_VVT::_S_partial_width>());
1834  }
1835 
1836 template <typename _Tp, size_t _Np, typename _Gp>
1837  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1838  __generate_vector(_Gp&& __gen)
1839  {
1840  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1841  make_index_sequence<_Np>());
1842  }
1843 
1844 // }}}
1845 // __xor{{{
1846 template <typename _TW>
1847  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848  __xor(_TW __a, _TW __b) noexcept
1849  {
1850  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1851  {
1852  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853  _VectorTraitsImpl<_TW>>::value_type;
1854  if constexpr (is_floating_point_v<_Tp>)
1855  {
1856  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1857  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1858  ^ __vector_bitcast<_Ip>(__b));
1859  }
1860  else if constexpr (__is_vector_type_v<_TW>)
1861  return __a ^ __b;
1862  else
1863  return __a._M_data ^ __b._M_data;
1864  }
1865  else
1866  return __a ^ __b;
1867  }
1868 
1869 // }}}
1870 // __or{{{
1871 template <typename _TW>
1872  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1873  __or(_TW __a, _TW __b) noexcept
1874  {
1875  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1876  {
1877  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1878  _VectorTraitsImpl<_TW>>::value_type;
1879  if constexpr (is_floating_point_v<_Tp>)
1880  {
1881  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1882  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1883  | __vector_bitcast<_Ip>(__b));
1884  }
1885  else if constexpr (__is_vector_type_v<_TW>)
1886  return __a | __b;
1887  else
1888  return __a._M_data | __b._M_data;
1889  }
1890  else
1891  return __a | __b;
1892  }
1893 
1894 // }}}
1895 // __and{{{
1896 template <typename _TW>
1897  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1898  __and(_TW __a, _TW __b) noexcept
1899  {
1900  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1901  {
1902  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1903  _VectorTraitsImpl<_TW>>::value_type;
1904  if constexpr (is_floating_point_v<_Tp>)
1905  {
1906  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1907  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1908  & __vector_bitcast<_Ip>(__b));
1909  }
1910  else if constexpr (__is_vector_type_v<_TW>)
1911  return __a & __b;
1912  else
1913  return __a._M_data & __b._M_data;
1914  }
1915  else
1916  return __a & __b;
1917  }
1918 
1919 // }}}
1920 // __andnot{{{
1921 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1922 static constexpr struct
1923 {
1924  _GLIBCXX_SIMD_INTRINSIC __v4sf
1925  operator()(__v4sf __a, __v4sf __b) const noexcept
1926  { return __builtin_ia32_andnps(__a, __b); }
1927 
1928  _GLIBCXX_SIMD_INTRINSIC __v2df
1929  operator()(__v2df __a, __v2df __b) const noexcept
1930  { return __builtin_ia32_andnpd(__a, __b); }
1931 
1932  _GLIBCXX_SIMD_INTRINSIC __v2di
1933  operator()(__v2di __a, __v2di __b) const noexcept
1934  { return __builtin_ia32_pandn128(__a, __b); }
1935 
1936  _GLIBCXX_SIMD_INTRINSIC __v8sf
1937  operator()(__v8sf __a, __v8sf __b) const noexcept
1938  { return __builtin_ia32_andnps256(__a, __b); }
1939 
1940  _GLIBCXX_SIMD_INTRINSIC __v4df
1941  operator()(__v4df __a, __v4df __b) const noexcept
1942  { return __builtin_ia32_andnpd256(__a, __b); }
1943 
1944  _GLIBCXX_SIMD_INTRINSIC __v4di
1945  operator()(__v4di __a, __v4di __b) const noexcept
1946  {
1947  if constexpr (__have_avx2)
1948  return __builtin_ia32_andnotsi256(__a, __b);
1949  else
1950  return reinterpret_cast<__v4di>(
1951  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1952  reinterpret_cast<__v4df>(__b)));
1953  }
1954 
1955  _GLIBCXX_SIMD_INTRINSIC __v16sf
1956  operator()(__v16sf __a, __v16sf __b) const noexcept
1957  {
1958  if constexpr (__have_avx512dq)
1959  return _mm512_andnot_ps(__a, __b);
1960  else
1961  return reinterpret_cast<__v16sf>(
1962  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1963  reinterpret_cast<__v8di>(__b)));
1964  }
1965 
1966  _GLIBCXX_SIMD_INTRINSIC __v8df
1967  operator()(__v8df __a, __v8df __b) const noexcept
1968  {
1969  if constexpr (__have_avx512dq)
1970  return _mm512_andnot_pd(__a, __b);
1971  else
1972  return reinterpret_cast<__v8df>(
1973  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1974  reinterpret_cast<__v8di>(__b)));
1975  }
1976 
1977  _GLIBCXX_SIMD_INTRINSIC __v8di
1978  operator()(__v8di __a, __v8di __b) const noexcept
1979  { return _mm512_andnot_si512(__a, __b); }
1980 } _S_x86_andnot;
1981 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1982 
1983 template <typename _TW>
1984  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1985  __andnot(_TW __a, _TW __b) noexcept
1986  {
1987  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1988  {
1989  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1990  _VectorTraitsImpl<_TW>>;
1991  using _Tp = typename _TVT::value_type;
1992 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1993  if constexpr (sizeof(_TW) >= 16)
1994  {
1995  const auto __ai = __to_intrin(__a);
1996  const auto __bi = __to_intrin(__b);
1997  if (!__builtin_is_constant_evaluated()
1998  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1999  {
2000  const auto __r = _S_x86_andnot(__ai, __bi);
2001  if constexpr (is_convertible_v<decltype(__r), _TW>)
2002  return __r;
2003  else
2004  return reinterpret_cast<typename _TVT::type>(__r);
2005  }
2006  }
2007 #endif // _GLIBCXX_SIMD_X86INTRIN
2008  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2009  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2010  & __vector_bitcast<_Ip>(__b));
2011  }
2012  else
2013  return ~__a & __b;
2014  }
2015 
2016 // }}}
2017 // __not{{{
2018 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2019  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2020  __not(_Tp __a) noexcept
2021  {
2022  if constexpr (is_floating_point_v<typename _TVT::value_type>)
2023  return reinterpret_cast<typename _TVT::type>(
2024  ~__vector_bitcast<unsigned>(__a));
2025  else
2026  return ~__a;
2027  }
2028 
2029 // }}}
2030 // __concat{{{
2031 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2032  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2033  constexpr _R
2034  __concat(_Tp a_, _Tp b_)
2035  {
2036 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2037  using _W
2038  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2039  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2040  long long, typename _TVT::value_type>>;
2041  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2042  const auto __a = __vector_bitcast<_W>(a_);
2043  const auto __b = __vector_bitcast<_W>(b_);
2044  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2045 #else
2046  constexpr int input_width = _TVT::_S_full_size;
2047  const _Tp& __a = a_;
2048  const _Tp& __b = b_;
2049  using _Up = _R;
2050 #endif
2051  if constexpr (input_width == 2)
2052  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2053  else if constexpr (input_width == 4)
2054  return reinterpret_cast<_R>(
2055  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2056  else if constexpr (input_width == 8)
2057  return reinterpret_cast<_R>(
2058  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2059  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2060  else if constexpr (input_width == 16)
2061  return reinterpret_cast<_R>(
2062  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2063  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2064  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2065  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2066  __b[12], __b[13], __b[14], __b[15]});
2067  else if constexpr (input_width == 32)
2068  return reinterpret_cast<_R>(
2069  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2070  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2071  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2072  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2073  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2074  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2075  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2076  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2077  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2078  __b[31]});
2079  }
2080 
2081 // }}}
2082 // __zero_extend {{{
2083 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2084  struct _ZeroExtendProxy
2085  {
2086  using value_type = typename _TVT::value_type;
2087  static constexpr size_t _Np = _TVT::_S_full_size;
2088  const _Tp __x;
2089 
2090  template <typename _To, typename _ToVT = _VectorTraits<_To>,
2091  typename
2092  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2093  _GLIBCXX_SIMD_INTRINSIC operator _To() const
2094  {
2095  constexpr size_t _ToN = _ToVT::_S_full_size;
2096  if constexpr (_ToN == _Np)
2097  return __x;
2098  else if constexpr (_ToN == 2 * _Np)
2099  {
2100 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2101  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2102  return __vector_bitcast<value_type>(
2103  _mm256_insertf128_ps(__m256(), __x, 0));
2104  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2105  return __vector_bitcast<value_type>(
2106  _mm256_insertf128_pd(__m256d(), __x, 0));
2107  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2108  return __vector_bitcast<value_type>(
2109  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2110  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2111  {
2112  if constexpr (__have_avx512dq)
2113  return __vector_bitcast<value_type>(
2114  _mm512_insertf32x8(__m512(), __x, 0));
2115  else
2116  return reinterpret_cast<__m512>(
2117  _mm512_insertf64x4(__m512d(),
2118  reinterpret_cast<__m256d>(__x), 0));
2119  }
2120  else if constexpr (__have_avx512f
2121  && _TVT::template _S_is<double, 4>)
2122  return __vector_bitcast<value_type>(
2123  _mm512_insertf64x4(__m512d(), __x, 0));
2124  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2125  return __vector_bitcast<value_type>(
2126  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2127 #endif
2128  return __concat(__x, _Tp());
2129  }
2130  else if constexpr (_ToN == 4 * _Np)
2131  {
2132 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2133  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2134  {
2135  return __vector_bitcast<value_type>(
2136  _mm512_insertf64x2(__m512d(), __x, 0));
2137  }
2138  else if constexpr (__have_avx512f
2139  && is_floating_point_v<value_type>)
2140  {
2141  return __vector_bitcast<value_type>(
2142  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2143  0));
2144  }
2145  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2146  {
2147  return __vector_bitcast<value_type>(
2148  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2149  }
2150 #endif
2151  return __concat(__concat(__x, _Tp()),
2152  __vector_type_t<value_type, _Np * 2>());
2153  }
2154  else if constexpr (_ToN == 8 * _Np)
2155  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2156  __vector_type_t<value_type, _Np * 4>());
2157  else if constexpr (_ToN == 16 * _Np)
2158  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2159  __vector_type_t<value_type, _Np * 8>());
2160  else
2161  __assert_unreachable<_Tp>();
2162  }
2163  };
2164 
2165 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2166  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2167  __zero_extend(_Tp __x)
2168  { return {__x}; }
2169 
2170 // }}}
2171 // __extract<_Np, By>{{{
2172 template <int _Offset,
2173  int _SplitBy,
2174  typename _Tp,
2175  typename _TVT = _VectorTraits<_Tp>,
2176  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2177  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2178  __extract(_Tp __in)
2179  {
2180  using value_type = typename _TVT::value_type;
2181 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2182  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2183  {
2184  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2185  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2186  else if constexpr (is_floating_point_v<value_type>)
2187  return __vector_bitcast<value_type>(
2188  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2189  else
2190  return reinterpret_cast<_R>(
2191  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2192  _Offset));
2193  }
2194  else
2195 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2196  {
2197 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2198  using _W = conditional_t<
2199  is_floating_point_v<value_type>, double,
2200  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2201  static_assert(sizeof(_R) % sizeof(_W) == 0);
2202  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2203  using _Up = __vector_type_t<_W, __return_width>;
2204  const auto __x = __vector_bitcast<_W>(__in);
2205 #else
2206  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2207  using _Up = _R;
2208  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2209  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2210 #endif
2211  constexpr int _O = _Offset * __return_width;
2212  return __call_with_subscripts<__return_width, _O>(
2213  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2214  return reinterpret_cast<_R>(_Up{__entries...});
2215  });
2216  }
2217  }
2218 
2219 // }}}
2220 // __lo/__hi64[z]{{{
2221 template <typename _Tp,
2222  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2223  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2224  __lo64(_Tp __x)
2225  {
2226  _R __r{};
2227  __builtin_memcpy(&__r, &__x, 8);
2228  return __r;
2229  }
2230 
2231 template <typename _Tp,
2232  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2233  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2234  __hi64(_Tp __x)
2235  {
2236  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2237  _R __r{};
2238  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2239  return __r;
2240  }
2241 
2242 template <typename _Tp,
2243  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2244  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2245  __hi64z([[maybe_unused]] _Tp __x)
2246  {
2247  _R __r{};
2248  if constexpr (sizeof(_Tp) == 16)
2249  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2250  return __r;
2251  }
2252 
2253 // }}}
2254 // __lo/__hi128{{{
2255 template <typename _Tp>
2256  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2257  __lo128(_Tp __x)
2258  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2259 
2260 template <typename _Tp>
2261  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2262  __hi128(_Tp __x)
2263  {
2264  static_assert(sizeof(__x) == 32);
2265  return __extract<1, 2>(__x);
2266  }
2267 
2268 // }}}
2269 // __lo/__hi256{{{
2270 template <typename _Tp>
2271  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2272  __lo256(_Tp __x)
2273  {
2274  static_assert(sizeof(__x) == 64);
2275  return __extract<0, 2>(__x);
2276  }
2277 
2278 template <typename _Tp>
2279  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2280  __hi256(_Tp __x)
2281  {
2282  static_assert(sizeof(__x) == 64);
2283  return __extract<1, 2>(__x);
2284  }
2285 
2286 // }}}
2287 // __auto_bitcast{{{
2288 template <typename _Tp>
2289  struct _AutoCast
2290  {
2291  static_assert(__is_vector_type_v<_Tp>);
2292 
2293  const _Tp __x;
2294 
2295  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2296  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2297  { return __intrin_bitcast<typename _UVT::type>(__x); }
2298  };
2299 
2300 template <typename _Tp>
2301  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2302  __auto_bitcast(const _Tp& __x)
2303  { return {__x}; }
2304 
2305 template <typename _Tp, size_t _Np>
2306  _GLIBCXX_SIMD_INTRINSIC constexpr
2307  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2308  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2309  { return {__x._M_data}; }
2310 
2311 // }}}
2312 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2313 
2314 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2315 // __bool_storage_member_type{{{
2316 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2317 template <size_t _Size>
2318  struct __bool_storage_member_type
2319  {
2320  static_assert((_Size & (_Size - 1)) != 0,
2321  "This trait may only be used for non-power-of-2 sizes. "
2322  "Power-of-2 sizes must be specialized.");
2323  using type =
2324  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2325  };
2326 
2327 template <>
2328  struct __bool_storage_member_type<1> { using type = bool; };
2329 
2330 template <>
2331  struct __bool_storage_member_type<2> { using type = __mmask8; };
2332 
2333 template <>
2334  struct __bool_storage_member_type<4> { using type = __mmask8; };
2335 
2336 template <>
2337  struct __bool_storage_member_type<8> { using type = __mmask8; };
2338 
2339 template <>
2340  struct __bool_storage_member_type<16> { using type = __mmask16; };
2341 
2342 template <>
2343  struct __bool_storage_member_type<32> { using type = __mmask32; };
2344 
2345 template <>
2346  struct __bool_storage_member_type<64> { using type = __mmask64; };
2347 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2348 
2349 // }}}
2350 // __intrinsic_type (x86){{{
2351 // the following excludes bool via __is_vectorizable
2352 #if _GLIBCXX_SIMD_HAVE_SSE
2353 template <typename _Tp, size_t _Bytes>
2354  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2355  {
2356  static_assert(!is_same_v<_Tp, long double>,
2357  "no __intrinsic_type support for long double on x86");
2358 
2359  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2360 
2361  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2362  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2363  };
2364 #endif // _GLIBCXX_SIMD_HAVE_SSE
2365 
2366 // }}}
2367 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2368 // __intrinsic_type (ARM){{{
2369 #if _GLIBCXX_SIMD_HAVE_NEON
2370 template <>
2371  struct __intrinsic_type<float, 8, void>
2372  { using type = float32x2_t; };
2373 
2374 template <>
2375  struct __intrinsic_type<float, 16, void>
2376  { using type = float32x4_t; };
2377 
2378 template <>
2379  struct __intrinsic_type<double, 8, void>
2380  {
2381 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2382  using type = float64x1_t;
2383 #endif
2384  };
2385 
2386 template <>
2387  struct __intrinsic_type<double, 16, void>
2388  {
2389 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2390  using type = float64x2_t;
2391 #endif
2392  };
2393 
2394 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2395 template <> \
2396  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2397  _Np * _Bits / 8, void> \
2398  { using type = int##_Bits##x##_Np##_t; }; \
2399 template <> \
2400  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2401  _Np * _Bits / 8, void> \
2402  { using type = uint##_Bits##x##_Np##_t; }
2403 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2404 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2405 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2406 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2407 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2408 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2409 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2410 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2411 #undef _GLIBCXX_SIMD_ARM_INTRIN
2412 
2413 template <typename _Tp, size_t _Bytes>
2414  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2415  {
2416  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2417 
2418  using _Ip = __int_for_sizeof_t<_Tp>;
2419 
2420  using _Up = conditional_t<
2421  is_floating_point_v<_Tp>, _Tp,
2422  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2423 
2424  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2425  "should use explicit specialization above");
2426 
2427  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2428  };
2429 #endif // _GLIBCXX_SIMD_HAVE_NEON
2430 
2431 // }}}
2432 // __intrinsic_type (PPC){{{
2433 #ifdef __ALTIVEC__
2434 template <typename _Tp>
2435  struct __intrinsic_type_impl;
2436 
2437 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2438  template <> \
2439  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2440 _GLIBCXX_SIMD_PPC_INTRIN(float);
2441 #ifdef __VSX__
2442 _GLIBCXX_SIMD_PPC_INTRIN(double);
2443 #endif
2444 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2445 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2446 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2447 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2448 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2449 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2450 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2451 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2452 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2453 #endif
2454 #ifdef __VSX__
2455 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2456 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2457 #endif
2458 #undef _GLIBCXX_SIMD_PPC_INTRIN
2459 
2460 template <typename _Tp, size_t _Bytes>
2461  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2462  {
2463  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2464 
2465  // allow _Tp == long double with -mlong-double-64
2466  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2467  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2468 
2469 #ifndef __VSX__
2470  static_assert(!(is_same_v<_Tp, double>
2471  || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2472  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2473 #endif
2474 
2475  static constexpr auto __element_type()
2476  {
2477  if constexpr (is_floating_point_v<_Tp>)
2478  {
2479  if constexpr (_S_is_ldouble)
2480  return double {};
2481  else
2482  return _Tp {};
2483  }
2484  else if constexpr (is_signed_v<_Tp>)
2485  {
2486  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2487  return _SChar {};
2488  else if constexpr (sizeof(_Tp) == sizeof(short))
2489  return short {};
2490  else if constexpr (sizeof(_Tp) == sizeof(int))
2491  return int {};
2492  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2493  return _LLong {};
2494  }
2495  else
2496  {
2497  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2498  return _UChar {};
2499  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2500  return _UShort {};
2501  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2502  return _UInt {};
2503  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2504  return _ULLong {};
2505  }
2506  }
2507 
2508  using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2509  };
2510 #endif // __ALTIVEC__
2511 
2512 // }}}
2513 // _SimdWrapper<bool>{{{1
2514 template <size_t _Width>
2515  struct _SimdWrapper<bool, _Width,
2516  void_t<typename __bool_storage_member_type<_Width>::type>>
2517  {
2518  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2519  using value_type = bool;
2520 
2521  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2522 
2523  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2524  __as_full_vector() const
2525  { return _M_data; }
2526 
2527  _GLIBCXX_SIMD_INTRINSIC constexpr
2528  _SimdWrapper() = default;
2529 
2530  _GLIBCXX_SIMD_INTRINSIC constexpr
2531  _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2532 
2533  _GLIBCXX_SIMD_INTRINSIC
2534  operator const _BuiltinType&() const
2535  { return _M_data; }
2536 
2537  _GLIBCXX_SIMD_INTRINSIC
2538  operator _BuiltinType&()
2539  { return _M_data; }
2540 
2541  _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2542  __intrin() const
2543  { return _M_data; }
2544 
2545  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2546  operator[](size_t __i) const
2547  { return _M_data & (_BuiltinType(1) << __i); }
2548 
2549  template <size_t __i>
2550  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2551  operator[](_SizeConstant<__i>) const
2552  { return _M_data & (_BuiltinType(1) << __i); }
2553 
2554  _GLIBCXX_SIMD_INTRINSIC constexpr void
2555  _M_set(size_t __i, value_type __x)
2556  {
2557  if (__x)
2558  _M_data |= (_BuiltinType(1) << __i);
2559  else
2560  _M_data &= ~(_BuiltinType(1) << __i);
2561  }
2562 
2563  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2564  _M_is_constprop() const
2565  { return __builtin_constant_p(_M_data); }
2566 
2567  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2568  _M_is_constprop_none_of() const
2569  {
2570  if (__builtin_constant_p(_M_data))
2571  {
2572  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2573  constexpr _BuiltinType __active_mask
2574  = ~_BuiltinType() >> (__nbits - _Width);
2575  return (_M_data & __active_mask) == 0;
2576  }
2577  return false;
2578  }
2579 
2580  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2581  _M_is_constprop_all_of() const
2582  {
2583  if (__builtin_constant_p(_M_data))
2584  {
2585  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2586  constexpr _BuiltinType __active_mask
2587  = ~_BuiltinType() >> (__nbits - _Width);
2588  return (_M_data & __active_mask) == __active_mask;
2589  }
2590  return false;
2591  }
2592 
2593  _BuiltinType _M_data;
2594  };
2595 
2596 // _SimdWrapperBase{{{1
2597 template <bool _MustZeroInitPadding, typename _BuiltinType>
2598  struct _SimdWrapperBase;
2599 
2600 template <typename _BuiltinType>
2601  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2602  {
2603  _GLIBCXX_SIMD_INTRINSIC constexpr
2604  _SimdWrapperBase() = default;
2605 
2606  _GLIBCXX_SIMD_INTRINSIC constexpr
2607  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2608 
2609  _BuiltinType _M_data;
2610  };
2611 
2612 template <typename _BuiltinType>
2613  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2614  // never become SNaN
2615  {
2616  _GLIBCXX_SIMD_INTRINSIC constexpr
2617  _SimdWrapperBase() : _M_data() {}
2618 
2619  _GLIBCXX_SIMD_INTRINSIC constexpr
2620  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2621 
2622  _BuiltinType _M_data;
2623  };
2624 
2625 // }}}
2626 // _SimdWrapper{{{
2627 template <typename _Tp, size_t _Width>
2628  struct _SimdWrapper<
2629  _Tp, _Width,
2630  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2631  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2632  && sizeof(_Tp) * _Width
2633  == sizeof(__vector_type_t<_Tp, _Width>),
2634  __vector_type_t<_Tp, _Width>>
2635  {
2636  using _Base
2637  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2638  && sizeof(_Tp) * _Width
2639  == sizeof(__vector_type_t<_Tp, _Width>),
2640  __vector_type_t<_Tp, _Width>>;
2641 
2642  static_assert(__is_vectorizable_v<_Tp>);
2643  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2644 
2645  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2646  using value_type = _Tp;
2647 
2648  static inline constexpr size_t _S_full_size
2649  = sizeof(_BuiltinType) / sizeof(value_type);
2650  static inline constexpr int _S_size = _Width;
2651  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2652 
2653  using _Base::_M_data;
2654 
2655  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2656  __as_full_vector() const
2657  { return _M_data; }
2658 
2659  _GLIBCXX_SIMD_INTRINSIC constexpr
2660  _SimdWrapper(initializer_list<_Tp> __init)
2661  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2662  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2663  return __init.begin()[__i.value];
2664  })) {}
2665 
2666  _GLIBCXX_SIMD_INTRINSIC constexpr
2667  _SimdWrapper() = default;
2668 
2669  _GLIBCXX_SIMD_INTRINSIC constexpr
2670  _SimdWrapper(const _SimdWrapper&) = default;
2671 
2672  _GLIBCXX_SIMD_INTRINSIC constexpr
2673  _SimdWrapper(_SimdWrapper&&) = default;
2674 
2675  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2676  operator=(const _SimdWrapper&) = default;
2677 
2678  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2679  operator=(_SimdWrapper&&) = default;
2680 
2681  template <typename _V, typename = enable_if_t<disjunction_v<
2682  is_same<_V, __vector_type_t<_Tp, _Width>>,
2683  is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2684  _GLIBCXX_SIMD_INTRINSIC constexpr
2685  _SimdWrapper(_V __x)
2686  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2687  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2688 
2689  template <typename... _As,
2690  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2691  && sizeof...(_As) <= _Width)>>
2692  _GLIBCXX_SIMD_INTRINSIC constexpr
2693  operator _SimdTuple<_Tp, _As...>() const
2694  {
2695  return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2696  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2697  { return _M_data[int(__i)]; });
2698  }
2699 
2700  _GLIBCXX_SIMD_INTRINSIC constexpr
2701  operator const _BuiltinType&() const
2702  { return _M_data; }
2703 
2704  _GLIBCXX_SIMD_INTRINSIC constexpr
2705  operator _BuiltinType&()
2706  { return _M_data; }
2707 
2708  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2709  operator[](size_t __i) const
2710  { return _M_data[__i]; }
2711 
2712  template <size_t __i>
2713  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2714  operator[](_SizeConstant<__i>) const
2715  { return _M_data[__i]; }
2716 
2717  _GLIBCXX_SIMD_INTRINSIC constexpr void
2718  _M_set(size_t __i, _Tp __x)
2719  {
2720  if (__builtin_is_constant_evaluated())
2721  _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2722  return __j == __i ? __x : _M_data[__j()];
2723  });
2724  else
2725  _M_data[__i] = __x;
2726  }
2727 
2728  _GLIBCXX_SIMD_INTRINSIC
2729  constexpr bool
2730  _M_is_constprop() const
2731  { return __builtin_constant_p(_M_data); }
2732 
2733  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2734  _M_is_constprop_none_of() const
2735  {
2736  if (__builtin_constant_p(_M_data))
2737  {
2738  bool __r = true;
2739  if constexpr (is_floating_point_v<_Tp>)
2740  {
2741  using _Ip = __int_for_sizeof_t<_Tp>;
2742  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2743  __execute_n_times<_Width>(
2744  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2745  }
2746  else
2747  __execute_n_times<_Width>(
2748  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2749  if (__builtin_constant_p(__r))
2750  return __r;
2751  }
2752  return false;
2753  }
2754 
2755  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2756  _M_is_constprop_all_of() const
2757  {
2758  if (__builtin_constant_p(_M_data))
2759  {
2760  bool __r = true;
2761  if constexpr (is_floating_point_v<_Tp>)
2762  {
2763  using _Ip = __int_for_sizeof_t<_Tp>;
2764  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2765  __execute_n_times<_Width>(
2766  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2767  }
2768  else
2769  __execute_n_times<_Width>(
2770  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2771  if (__builtin_constant_p(__r))
2772  return __r;
2773  }
2774  return false;
2775  }
2776  };
2777 
2778 // }}}
2779 
2780 // __vectorized_sizeof {{{
2781 template <typename _Tp>
2782  constexpr size_t
2783  __vectorized_sizeof()
2784  {
2785  if constexpr (!__is_vectorizable_v<_Tp>)
2786  return 0;
2787 
2788  if constexpr (sizeof(_Tp) <= 8)
2789  {
2790  // X86:
2791  if constexpr (__have_avx512bw)
2792  return 64;
2793  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2794  return 64;
2795  if constexpr (__have_avx2)
2796  return 32;
2797  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2798  return 32;
2799  if constexpr (__have_sse2)
2800  return 16;
2801  if constexpr (__have_sse && is_same_v<_Tp, float>)
2802  return 16;
2803  /* The following is too much trouble because of mixed MMX and x87 code.
2804  * While nothing here explicitly calls MMX instructions of registers,
2805  * they are still emitted but no EMMS cleanup is done.
2806  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2807  return 8;
2808  */
2809 
2810  // PowerPC:
2811  if constexpr (__have_power8vec
2812  || (__have_power_vmx && (sizeof(_Tp) < 8))
2813  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2814  return 16;
2815 
2816  // ARM:
2817  if constexpr (__have_neon_a64)
2818  return 16;
2819  if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2820  or is_same_v<_Tp, float>))
2821  return 16;
2822  if constexpr (__have_neon
2823  && sizeof(_Tp) < 8
2824  // Only allow fp if the user allows non-ICE559 fp (e.g.
2825  // via -ffast-math). ARMv7 NEON fp is not conforming to
2826  // IEC559.
2827  && (__support_neon_float || !is_floating_point_v<_Tp>))
2828  return 16;
2829  }
2830 
2831  return sizeof(_Tp);
2832  }
2833 
2834 // }}}
2835 namespace simd_abi {
2836 // most of simd_abi is defined in simd_detail.h
2837 template <typename _Tp>
2838  inline constexpr int max_fixed_size
2839  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2840 
2841 // compatible {{{
2842 #if defined __x86_64__ || defined __aarch64__
2843 template <typename _Tp>
2844  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2845 #elif defined __ARM_NEON
2846 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2847 // ABI?)
2848 template <typename _Tp>
2849  using compatible
2850  = conditional_t<(sizeof(_Tp) < 8
2851  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2852  _VecBuiltin<16>, scalar>;
2853 #else
2854 template <typename>
2855  using compatible = scalar;
2856 #endif
2857 
2858 // }}}
2859 // native {{{
2860 template <typename _Tp>
2861  constexpr auto
2862  __determine_native_abi()
2863  {
2864  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2865  if constexpr (__bytes == sizeof(_Tp))
2866  return static_cast<scalar*>(nullptr);
2867  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2868  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2869  else
2870  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2871  }
2872 
2873 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2874  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2875 
2876 // }}}
2877 // __default_abi {{{
2878 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2879 template <typename _Tp>
2880  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2881 #else
2882 template <typename _Tp>
2883  using __default_abi = compatible<_Tp>;
2884 #endif
2885 
2886 // }}}
2887 } // namespace simd_abi
2888 
2889 // traits {{{1
2890 template <typename _Tp>
2891  struct is_simd_flag_type
2892  : false_type
2893  {};
2894 
2895 template <>
2896  struct is_simd_flag_type<element_aligned_tag>
2897  : true_type
2898  {};
2899 
2900 template <>
2901  struct is_simd_flag_type<vector_aligned_tag>
2902  : true_type
2903  {};
2904 
2905 template <size_t _Np>
2906  struct is_simd_flag_type<overaligned_tag<_Np>>
2907  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
2908  {};
2909 
2910 template <typename _Tp>
2911  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
2912 
2913 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
2914  using _IsSimdFlagType = _Tp;
2915 
2916 // is_abi_tag {{{2
2917 template <typename _Tp, typename = void_t<>>
2918  struct is_abi_tag : false_type {};
2919 
2920 template <typename _Tp>
2921  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2922  : public _Tp::_IsValidAbiTag {};
2923 
2924 template <typename _Tp>
2925  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2926 
2927 // is_simd(_mask) {{{2
2928 template <typename _Tp>
2929  struct is_simd : public false_type {};
2930 
2931 template <typename _Tp>
2932  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2933 
2934 template <typename _Tp>
2935  struct is_simd_mask : public false_type {};
2936 
2937 template <typename _Tp>
2938 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2939 
2940 // simd_size {{{2
2941 template <typename _Tp, typename _Abi, typename = void>
2942  struct __simd_size_impl {};
2943 
2944 template <typename _Tp, typename _Abi>
2945  struct __simd_size_impl<
2946  _Tp, _Abi,
2947  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2948  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2949 
2950 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2951  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2952 
2953 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2954  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2955 
2956 // simd_abi::deduce {{{2
2957 template <typename _Tp, size_t _Np, typename = void>
2958  struct __deduce_impl;
2959 
2960 namespace simd_abi {
2961 /**
2962  * @tparam _Tp The requested `value_type` for the elements.
2963  * @tparam _Np The requested number of elements.
2964  * @tparam _Abis This parameter is ignored, since this implementation cannot
2965  * make any use of it. Either __a good native ABI is matched and used as `type`
2966  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2967  * the best matching native ABIs.
2968  */
2969 template <typename _Tp, size_t _Np, typename...>
2970  struct deduce : __deduce_impl<_Tp, _Np> {};
2971 
2972 template <typename _Tp, size_t _Np, typename... _Abis>
2973  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2974 } // namespace simd_abi
2975 
2976 // }}}2
2977 // rebind_simd {{{2
2978 template <typename _Tp, typename _V, typename = void>
2979  struct rebind_simd;
2980 
2981 template <typename _Tp, typename _Up, typename _Abi>
2982  struct rebind_simd<_Tp, simd<_Up, _Abi>,
2983  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2984  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2985 
2986 template <typename _Tp, typename _Up, typename _Abi>
2987  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
2988  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2989  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2990 
2991 template <typename _Tp, typename _V>
2992  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2993 
2994 // resize_simd {{{2
2995 template <int _Np, typename _V, typename = void>
2996  struct resize_simd;
2997 
2998 template <int _Np, typename _Tp, typename _Abi>
2999  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3000  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3001 
3002 template <int _Np, typename _Tp, typename _Abi>
3003  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3004  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3005 
3006 template <int _Np, typename _V>
3007  using resize_simd_t = typename resize_simd<_Np, _V>::type;
3008 
3009 // }}}2
3010 // memory_alignment {{{2
3011 template <typename _Tp, typename _Up = typename _Tp::value_type>
3012  struct memory_alignment
3013  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3014 
3015 template <typename _Tp, typename _Up = typename _Tp::value_type>
3016  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3017 
3018 // class template simd [simd] {{{1
3019 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3020  class simd;
3021 
3022 template <typename _Tp, typename _Abi>
3023  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3024 
3025 template <typename _Tp>
3026  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3027 
3028 template <typename _Tp, int _Np>
3029  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3030 
3031 template <typename _Tp, size_t _Np>
3032  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3033 
3034 // class template simd_mask [simd_mask] {{{1
3035 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3036  class simd_mask;
3037 
3038 template <typename _Tp, typename _Abi>
3039  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3040 
3041 template <typename _Tp>
3042  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3043 
3044 template <typename _Tp, int _Np>
3045  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3046 
3047 template <typename _Tp, size_t _Np>
3048  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3049 
3050 // casts [simd.casts] {{{1
3051 // static_simd_cast {{{2
3052 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3053  struct __static_simd_cast_return_type;
3054 
3055 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3056  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3057  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3058 
3059 template <typename _Tp, typename _Up, typename _Ap>
3060  struct __static_simd_cast_return_type<
3061  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3062  { using type = _Tp; };
3063 
3064 template <typename _Tp, typename _Ap>
3065  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3066 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3067  enable_if_t<__is_vectorizable_v<_Tp>>
3068 #else
3069  void
3070 #endif
3071  >
3072  { using type = simd<_Tp, _Ap>; };
3073 
3074 template <typename _Tp, typename = void>
3075  struct __safe_make_signed { using type = _Tp;};
3076 
3077 template <typename _Tp>
3078  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3079  {
3080  // the extra make_unsigned_t is because of PR85951
3081  using type = make_signed_t<make_unsigned_t<_Tp>>;
3082  };
3083 
3084 template <typename _Tp>
3085  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3086 
3087 template <typename _Tp, typename _Up, typename _Ap>
3088  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3089 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3090  enable_if_t<__is_vectorizable_v<_Tp>>
3091 #else
3092  void
3093 #endif
3094  >
3095  {
3096  using type = conditional_t<
3097  (is_integral_v<_Up> && is_integral_v<_Tp> &&
3098 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3099  is_signed_v<_Up> != is_signed_v<_Tp> &&
3100 #endif
3101  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3102  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3103  };
3104 
3105 template <typename _Tp, typename _Up, typename _Ap,
3106  typename _R
3107  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3108  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3109  static_simd_cast(const simd<_Up, _Ap>& __x)
3110  {
3111  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3112  return __x;
3113  else
3114  {
3115  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3116  __c;
3117  return _R(__private_init, __c(__data(__x)));
3118  }
3119  }
3120 
3121 namespace __proposed {
3122 template <typename _Tp, typename _Up, typename _Ap,
3123  typename _R
3124  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3125  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3126  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3127  {
3128  using _RM = typename _R::mask_type;
3129  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3130  typename _RM::simd_type::value_type>(__x)};
3131  }
3132 
3133 template <typename _To, typename _Up, typename _Abi>
3134  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3135  _To
3136  simd_bit_cast(const simd<_Up, _Abi>& __x)
3137  {
3138  using _Tp = typename _To::value_type;
3139  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3140  using _From = simd<_Up, _Abi>;
3141  using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3142  // with concepts, the following should be constraints
3143  static_assert(sizeof(_To) == sizeof(_From));
3144  static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3145  static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3146 #if __has_builtin(__builtin_bit_cast)
3147  return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3148 #else
3149  return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3150 #endif
3151  }
3152 
3153 template <typename _To, typename _Up, typename _Abi>
3154  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3155  _To
3156  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3157  {
3158  using _From = simd_mask<_Up, _Abi>;
3159  static_assert(sizeof(_To) == sizeof(_From));
3160  static_assert(is_trivially_copyable_v<_From>);
3161  // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3162  // copyable.
3163  if constexpr (is_simd_v<_To>)
3164  {
3165  using _Tp = typename _To::value_type;
3166  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3167  static_assert(is_trivially_copyable_v<_ToMember>);
3168 #if __has_builtin(__builtin_bit_cast)
3169  return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3170 #else
3171  return {__private_init, __bit_cast<_ToMember>(__x)};
3172 #endif
3173  }
3174  else
3175  {
3176  static_assert(is_trivially_copyable_v<_To>);
3177 #if __has_builtin(__builtin_bit_cast)
3178  return __builtin_bit_cast(_To, __x);
3179 #else
3180  return __bit_cast<_To>(__x);
3181 #endif
3182  }
3183  }
3184 } // namespace __proposed
3185 
3186 // simd_cast {{{2
3187 template <typename _Tp, typename _Up, typename _Ap,
3188  typename _To = __value_type_or_identity_t<_Tp>>
3189  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3190  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3191  -> decltype(static_simd_cast<_Tp>(__x))
3192  { return static_simd_cast<_Tp>(__x); }
3193 
3194 namespace __proposed {
3195 template <typename _Tp, typename _Up, typename _Ap,
3196  typename _To = __value_type_or_identity_t<_Tp>>
3197  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3198  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3199  -> decltype(static_simd_cast<_Tp>(__x))
3200  { return static_simd_cast<_Tp>(__x); }
3201 } // namespace __proposed
3202 
3203 // }}}2
3204 // resizing_simd_cast {{{
3205 namespace __proposed {
3206 /* Proposed spec:
3207 
3208 template <class T, class U, class Abi>
3209 T resizing_simd_cast(const simd<U, Abi>& x)
3210 
3211 p1 Constraints:
3212  - is_simd_v<T> is true and
3213  - T::value_type is the same type as U
3214 
3215 p2 Returns:
3216  A simd object with the i^th element initialized to x[i] for all i in the
3217  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3218  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3219 
3220 template <class T, class U, class Abi>
3221 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3222 
3223 p1 Constraints: is_simd_mask_v<T> is true
3224 
3225 p2 Returns:
3226  A simd_mask object with the i^th element initialized to x[i] for all i in
3227 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3228  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3229 
3230  */
3231 
3232 template <typename _Tp, typename _Up, typename _Ap>
3233  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3234  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3235  resizing_simd_cast(const simd<_Up, _Ap>& __x)
3236  {
3237  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3238  return __x;
3239  else if (__builtin_is_constant_evaluated())
3240  return _Tp([&](auto __i) constexpr {
3241  return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3242  });
3243  else if constexpr (simd_size_v<_Up, _Ap> == 1)
3244  {
3245  _Tp __r{};
3246  __r[0] = __x[0];
3247  return __r;
3248  }
3249  else if constexpr (_Tp::size() == 1)
3250  return __x[0];
3251  else if constexpr (sizeof(_Tp) == sizeof(__x)
3252  && !__is_fixed_size_abi_v<_Ap>)
3253  return {__private_init,
3254  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3255  _Ap::_S_masked(__data(__x))._M_data)};
3256  else
3257  {
3258  _Tp __r{};
3259  __builtin_memcpy(&__data(__r), &__data(__x),
3260  sizeof(_Up)
3261  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3262  return __r;
3263  }
3264  }
3265 
3266 template <typename _Tp, typename _Up, typename _Ap>
3267  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3268  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3269  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3270  {
3271  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3272  typename _Tp::simd_type::value_type>(__x)};
3273  }
3274 } // namespace __proposed
3275 
3276 // }}}
3277 // to_fixed_size {{{2
3278 template <typename _Tp, int _Np>
3279  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3280  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3281  { return __x; }
3282 
3283 template <typename _Tp, int _Np>
3284  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3285  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3286  { return __x; }
3287 
3288 template <typename _Tp, typename _Ap>
3289  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3290  to_fixed_size(const simd<_Tp, _Ap>& __x)
3291  {
3292  using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3293  return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3294  }
3295 
3296 template <typename _Tp, typename _Ap>
3297  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3298  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3299  {
3300  return {__private_init,
3301  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3302  }
3303 
3304 // to_native {{{2
3305 template <typename _Tp, int _Np>
3306  _GLIBCXX_SIMD_INTRINSIC
3307  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3308  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3309  {
3310  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3311  __x.copy_to(__mem, vector_aligned);
3312  return {__mem, vector_aligned};
3313  }
3314 
3315 template <typename _Tp, int _Np>
3316  _GLIBCXX_SIMD_INTRINSIC
3317  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3318  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3319  {
3320  return native_simd_mask<_Tp>(
3321  __private_init,
3322  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3323  }
3324 
3325 // to_compatible {{{2
3326 template <typename _Tp, int _Np>
3327  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3328  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3329  {
3330  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3331  __x.copy_to(__mem, vector_aligned);
3332  return {__mem, vector_aligned};
3333  }
3334 
3335 template <typename _Tp, int _Np>
3336  _GLIBCXX_SIMD_INTRINSIC
3337  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3338  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3339  {
3340  return simd_mask<_Tp>(
3341  __private_init,
3342  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3343  }
3344 
3345 // masked assignment [simd_mask.where] {{{1
3346 
3347 // where_expression {{{1
3348 // const_where_expression<M, T> {{{2
3349 template <typename _M, typename _Tp>
3350  class const_where_expression
3351  {
3352  using _V = _Tp;
3353  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3354 
3355  struct _Wrapper { using value_type = _V; };
3356 
3357  protected:
3358  using _Impl = typename _V::_Impl;
3359 
3360  using value_type =
3361  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3362 
3363  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3364  __get_mask(const const_where_expression& __x)
3365  { return __x._M_k; }
3366 
3367  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3368  __get_lvalue(const const_where_expression& __x)
3369  { return __x._M_value; }
3370 
3371  const _M& _M_k;
3372  _Tp& _M_value;
3373 
3374  public:
3375  const_where_expression(const const_where_expression&) = delete;
3376 
3377  const_where_expression& operator=(const const_where_expression&) = delete;
3378 
3379  _GLIBCXX_SIMD_INTRINSIC constexpr
3380  const_where_expression(const _M& __kk, const _Tp& dd)
3381  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3382 
3383  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3384  operator-() const&&
3385  {
3386  return {__private_init,
3387  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3388  __data(_M_value))};
3389  }
3390 
3391  template <typename _Up, typename _Flags>
3392  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3393  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3394  {
3395  return {__private_init,
3396  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3397  _Flags::template _S_apply<_V>(__mem))};
3398  }
3399 
3400  template <typename _Up, typename _Flags>
3401  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3402  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3403  {
3404  _Impl::_S_masked_store(__data(_M_value),
3405  _Flags::template _S_apply<_V>(__mem),
3406  __data(_M_k));
3407  }
3408  };
3409 
3410 // const_where_expression<bool, T> {{{2
3411 template <typename _Tp>
3412  class const_where_expression<bool, _Tp>
3413  {
3414  using _M = bool;
3415  using _V = _Tp;
3416 
3417  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3418 
3419  struct _Wrapper { using value_type = _V; };
3420 
3421  protected:
3422  using value_type
3423  = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3424 
3425  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3426  __get_mask(const const_where_expression& __x)
3427  { return __x._M_k; }
3428 
3429  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3430  __get_lvalue(const const_where_expression& __x)
3431  { return __x._M_value; }
3432 
3433  const bool _M_k;
3434  _Tp& _M_value;
3435 
3436  public:
3437  const_where_expression(const const_where_expression&) = delete;
3438  const_where_expression& operator=(const const_where_expression&) = delete;
3439 
3440  _GLIBCXX_SIMD_INTRINSIC constexpr
3441  const_where_expression(const bool __kk, const _Tp& dd)
3442  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3443 
3444  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3445  operator-() const&&
3446  { return _M_k ? -_M_value : _M_value; }
3447 
3448  template <typename _Up, typename _Flags>
3449  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3450  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3451  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3452 
3453  template <typename _Up, typename _Flags>
3454  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3455  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3456  {
3457  if (_M_k)
3458  __mem[0] = _M_value;
3459  }
3460  };
3461 
3462 // where_expression<M, T> {{{2
3463 template <typename _M, typename _Tp>
3464  class where_expression : public const_where_expression<_M, _Tp>
3465  {
3466  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3467 
3468  static_assert(!is_const<_Tp>::value,
3469  "where_expression may only be instantiated with __a non-const "
3470  "_Tp parameter");
3471 
3472  using typename const_where_expression<_M, _Tp>::value_type;
3473  using const_where_expression<_M, _Tp>::_M_k;
3474  using const_where_expression<_M, _Tp>::_M_value;
3475 
3476  static_assert(
3477  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3478  static_assert(_M::size() == _Tp::size(), "");
3479 
3480  _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3481  __get_lvalue(where_expression& __x)
3482  { return __x._M_value; }
3483 
3484  public:
3485  where_expression(const where_expression&) = delete;
3486  where_expression& operator=(const where_expression&) = delete;
3487 
3488  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3489  where_expression(const _M& __kk, _Tp& dd)
3490  : const_where_expression<_M, _Tp>(__kk, dd) {}
3491 
3492  template <typename _Up>
3493  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3494  operator=(_Up&& __x) &&
3495  {
3496  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3497  __to_value_type_or_member_type<_Tp>(
3498  static_cast<_Up&&>(__x)));
3499  }
3500 
3501 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3502  template <typename _Up> \
3503  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3504  operator __op##=(_Up&& __x)&& \
3505  { \
3506  _Impl::template _S_masked_cassign( \
3507  __data(_M_k), __data(_M_value), \
3508  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3509  [](auto __impl, auto __lhs, auto __rhs) \
3510  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3511  { return __impl.__name(__lhs, __rhs); }); \
3512  } \
3513  static_assert(true)
3514  _GLIBCXX_SIMD_OP_(+, _S_plus);
3515  _GLIBCXX_SIMD_OP_(-, _S_minus);
3516  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3517  _GLIBCXX_SIMD_OP_(/, _S_divides);
3518  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3519  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3520  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3521  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3522  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3523  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3524 #undef _GLIBCXX_SIMD_OP_
3525 
3526  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3527  operator++() &&
3528  {
3529  __data(_M_value)
3530  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3531  }
3532 
3533  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3534  operator++(int) &&
3535  {
3536  __data(_M_value)
3537  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3538  }
3539 
3540  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3541  operator--() &&
3542  {
3543  __data(_M_value)
3544  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3545  }
3546 
3547  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3548  operator--(int) &&
3549  {
3550  __data(_M_value)
3551  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3552  }
3553 
3554  // intentionally hides const_where_expression::copy_from
3555  template <typename _Up, typename _Flags>
3556  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3557  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3558  {
3559  __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3560  _Flags::template _S_apply<_Tp>(__mem));
3561  }
3562  };
3563 
3564 // where_expression<bool, T> {{{2
3565 template <typename _Tp>
3566  class where_expression<bool, _Tp>
3567  : public const_where_expression<bool, _Tp>
3568  {
3569  using _M = bool;
3570  using typename const_where_expression<_M, _Tp>::value_type;
3571  using const_where_expression<_M, _Tp>::_M_k;
3572  using const_where_expression<_M, _Tp>::_M_value;
3573 
3574  public:
3575  where_expression(const where_expression&) = delete;
3576  where_expression& operator=(const where_expression&) = delete;
3577 
3578  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3579  where_expression(const _M& __kk, _Tp& dd)
3580  : const_where_expression<_M, _Tp>(__kk, dd) {}
3581 
3582 #define _GLIBCXX_SIMD_OP_(__op) \
3583  template <typename _Up> \
3584  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3585  operator __op(_Up&& __x)&& \
3586  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3587 
3588  _GLIBCXX_SIMD_OP_(=)
3589  _GLIBCXX_SIMD_OP_(+=)
3590  _GLIBCXX_SIMD_OP_(-=)
3591  _GLIBCXX_SIMD_OP_(*=)
3592  _GLIBCXX_SIMD_OP_(/=)
3593  _GLIBCXX_SIMD_OP_(%=)
3594  _GLIBCXX_SIMD_OP_(&=)
3595  _GLIBCXX_SIMD_OP_(|=)
3596  _GLIBCXX_SIMD_OP_(^=)
3597  _GLIBCXX_SIMD_OP_(<<=)
3598  _GLIBCXX_SIMD_OP_(>>=)
3599  #undef _GLIBCXX_SIMD_OP_
3600 
3601  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3602  operator++() &&
3603  { if (_M_k) ++_M_value; }
3604 
3605  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3606  operator++(int) &&
3607  { if (_M_k) ++_M_value; }
3608 
3609  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3610  operator--() &&
3611  { if (_M_k) --_M_value; }
3612 
3613  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3614  operator--(int) &&
3615  { if (_M_k) --_M_value; }
3616 
3617  // intentionally hides const_where_expression::copy_from
3618  template <typename _Up, typename _Flags>
3619  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3620  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3621  { if (_M_k) _M_value = __mem[0]; }
3622  };
3623 
3624 // where {{{1
3625 template <typename _Tp, typename _Ap>
3626  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3627  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3628  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3629  { return {__k, __value}; }
3630 
3631 template <typename _Tp, typename _Ap>
3632  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3633  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3634  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3635  { return {__k, __value}; }
3636 
3637 template <typename _Tp, typename _Ap>
3638  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3639  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3640  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3641  { return {__k, __value}; }
3642 
3643 template <typename _Tp, typename _Ap>
3644  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3645  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3646  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3647  { return {__k, __value}; }
3648 
3649 template <typename _Tp>
3650  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3651  where(_ExactBool __k, _Tp& __value)
3652  { return {__k, __value}; }
3653 
3654 template <typename _Tp>
3655  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3656  where(_ExactBool __k, const _Tp& __value)
3657  { return {__k, __value}; }
3658 
3659 template <typename _Tp, typename _Ap>
3660  _GLIBCXX_SIMD_CONSTEXPR void
3661  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3662 
3663 template <typename _Tp, typename _Ap>
3664  _GLIBCXX_SIMD_CONSTEXPR void
3665  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3666 
3667 // proposed mask iterations {{{1
3668 namespace __proposed {
3669 template <size_t _Np>
3670  class where_range
3671  {
3672  const bitset<_Np> __bits;
3673 
3674  public:
3675  where_range(bitset<_Np> __b) : __bits(__b) {}
3676 
3677  class iterator
3678  {
3679  size_t __mask;
3680  size_t __bit;
3681 
3682  _GLIBCXX_SIMD_INTRINSIC void
3683  __next_bit()
3684  { __bit = __builtin_ctzl(__mask); }
3685 
3686  _GLIBCXX_SIMD_INTRINSIC void
3687  __reset_lsb()
3688  {
3689  // 01100100 - 1 = 01100011
3690  __mask &= (__mask - 1);
3691  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3692  }
3693 
3694  public:
3695  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3696  iterator(const iterator&) = default;
3697  iterator(iterator&&) = default;
3698 
3699  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3700  operator->() const
3701  { return __bit; }
3702 
3703  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3704  operator*() const
3705  { return __bit; }
3706 
3707  _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3708  operator++()
3709  {
3710  __reset_lsb();
3711  __next_bit();
3712  return *this;
3713  }
3714 
3715  _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3716  operator++(int)
3717  {
3718  iterator __tmp = *this;
3719  __reset_lsb();
3720  __next_bit();
3721  return __tmp;
3722  }
3723 
3724  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3725  operator==(const iterator& __rhs) const
3726  { return __mask == __rhs.__mask; }
3727 
3728  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3729  operator!=(const iterator& __rhs) const
3730  { return __mask != __rhs.__mask; }
3731  };
3732 
3733  iterator
3734  begin() const
3735  { return __bits.to_ullong(); }
3736 
3737  iterator
3738  end() const
3739  { return 0; }
3740  };
3741 
3742 template <typename _Tp, typename _Ap>
3743  where_range<simd_size_v<_Tp, _Ap>>
3744  where(const simd_mask<_Tp, _Ap>& __k)
3745  { return __k.__to_bitset(); }
3746 
3747 } // namespace __proposed
3748 
3749 // }}}1
3750 // reductions [simd.reductions] {{{1
3751 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3752  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3753  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3754  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3755 
3756 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3757  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3758  reduce(const const_where_expression<_M, _V>& __x,
3759  typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3760  {
3761  if (__builtin_expect(none_of(__get_mask(__x)), false))
3762  return __identity_element;
3763 
3764  _V __tmp = __identity_element;
3765  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3766  __data(__get_lvalue(__x)));
3767  return reduce(__tmp, __binary_op);
3768  }
3769 
3770 template <typename _M, typename _V>
3771  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3772  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3773  { return reduce(__x, 0, __binary_op); }
3774 
3775 template <typename _M, typename _V>
3776  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3777  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3778  { return reduce(__x, 1, __binary_op); }
3779 
3780 template <typename _M, typename _V>
3781  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3782  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3783  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3784 
3785 template <typename _M, typename _V>
3786  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3787  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3788  { return reduce(__x, 0, __binary_op); }
3789 
3790 template <typename _M, typename _V>
3791  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3792  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3793  { return reduce(__x, 0, __binary_op); }
3794 
3795 template <typename _Tp, typename _Abi>
3796  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3797  hmin(const simd<_Tp, _Abi>& __v) noexcept
3798  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3799 
3800 template <typename _Tp, typename _Abi>
3801  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3802  hmax(const simd<_Tp, _Abi>& __v) noexcept
3803  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3804 
3805 template <typename _M, typename _V>
3806  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3807  typename _V::value_type
3808  hmin(const const_where_expression<_M, _V>& __x) noexcept
3809  {
3810  using _Tp = typename _V::value_type;
3811  constexpr _Tp __id_elem =
3812 #ifdef __FINITE_MATH_ONLY__
3813  __finite_max_v<_Tp>;
3814 #else
3815  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3816 #endif
3817  _V __tmp = __id_elem;
3818  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3819  __data(__get_lvalue(__x)));
3820  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3821  }
3822 
3823 template <typename _M, typename _V>
3824  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3825  typename _V::value_type
3826  hmax(const const_where_expression<_M, _V>& __x) noexcept
3827  {
3828  using _Tp = typename _V::value_type;
3829  constexpr _Tp __id_elem =
3830 #ifdef __FINITE_MATH_ONLY__
3831  __finite_min_v<_Tp>;
3832 #else
3833  [] {
3834  if constexpr (__value_exists_v<__infinity, _Tp>)
3835  return -__infinity_v<_Tp>;
3836  else
3837  return __finite_min_v<_Tp>;
3838  }();
3839 #endif
3840  _V __tmp = __id_elem;
3841  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3842  __data(__get_lvalue(__x)));
3843  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3844  }
3845 
3846 // }}}1
3847 // algorithms [simd.alg] {{{
3848 template <typename _Tp, typename _Ap>
3849  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3850  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3851  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3852 
3853 template <typename _Tp, typename _Ap>
3854  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3855  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3856  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3857 
3858 template <typename _Tp, typename _Ap>
3859  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3860  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3861  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3862  {
3863  const auto pair_of_members
3864  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3865  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3866  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3867  }
3868 
3869 template <typename _Tp, typename _Ap>
3870  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3871  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3872  {
3873  using _Impl = typename _Ap::_SimdImpl;
3874  return {__private_init,
3875  _Impl::_S_min(__data(__hi),
3876  _Impl::_S_max(__data(__lo), __data(__v)))};
3877  }
3878 
3879 // }}}
3880 
3881 template <size_t... _Sizes, typename _Tp, typename _Ap,
3882  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3883  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3884  split(const simd<_Tp, _Ap>&);
3885 
3886 // __extract_part {{{
3887 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3888  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3889  _SimdWrapper<_Tp, _Np / _Total * _Combine>
3890  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3891 
3892 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3893  _GLIBCXX_SIMD_INTRINSIC constexpr auto
3894  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3895 
3896 // }}}
3897 // _SizeList {{{
3898 template <size_t _V0, size_t... _Values>
3899  struct _SizeList
3900  {
3901  template <size_t _I>
3902  static constexpr size_t
3903  _S_at(_SizeConstant<_I> = {})
3904  {
3905  if constexpr (_I == 0)
3906  return _V0;
3907  else
3908  return _SizeList<_Values...>::template _S_at<_I - 1>();
3909  }
3910 
3911  template <size_t _I>
3912  static constexpr auto
3913  _S_before(_SizeConstant<_I> = {})
3914  {
3915  if constexpr (_I == 0)
3916  return _SizeConstant<0>();
3917  else
3918  return _SizeConstant<
3919  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3920  }
3921 
3922  template <size_t _Np>
3923  static constexpr auto
3924  _S_pop_front(_SizeConstant<_Np> = {})
3925  {
3926  if constexpr (_Np == 0)
3927  return _SizeList();
3928  else
3929  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3930  }
3931  };
3932 
3933 // }}}
3934 // __extract_center {{{
3935 template <typename _Tp, size_t _Np>
3936  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3937  __extract_center(_SimdWrapper<_Tp, _Np> __x)
3938  {
3939  static_assert(_Np >= 4);
3940  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3941 #if _GLIBCXX_SIMD_X86INTRIN // {{{
3942  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3943  {
3944  const auto __intrin = __to_intrin(__x);
3945  if constexpr (is_integral_v<_Tp>)
3946  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3947  _mm512_shuffle_i32x4(__intrin, __intrin,
3948  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3949  else if constexpr (sizeof(_Tp) == 4)
3950  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3951  _mm512_shuffle_f32x4(__intrin, __intrin,
3952  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3953  else if constexpr (sizeof(_Tp) == 8)
3954  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3955  _mm512_shuffle_f64x2(__intrin, __intrin,
3956  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3957  else
3958  __assert_unreachable<_Tp>();
3959  }
3960  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3961  return __vector_bitcast<_Tp>(
3962  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3963  __hi128(__vector_bitcast<double>(__x)), 1));
3964  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3965  return __vector_bitcast<_Tp>(
3966  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3967  __lo128(__vector_bitcast<_LLong>(__x)),
3968  sizeof(_Tp) * _Np / 4));
3969  else
3970 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3971  {
3972  __vector_type_t<_Tp, _Np / 2> __r;
3973  __builtin_memcpy(&__r,
3974  reinterpret_cast<const char*>(&__x)
3975  + sizeof(_Tp) * _Np / 4,
3976  sizeof(_Tp) * _Np / 2);
3977  return __r;
3978  }
3979  }
3980 
3981 template <typename _Tp, typename _A0, typename... _As>
3982  _GLIBCXX_SIMD_INTRINSIC
3983  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3984  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3985  {
3986  if constexpr (sizeof...(_As) == 0)
3987  return __extract_center(__x.first);
3988  else
3989  return __extract_part<1, 4, 2>(__x);
3990  }
3991 
3992 // }}}
3993 // __split_wrapper {{{
3994 template <size_t... _Sizes, typename _Tp, typename... _As>
3995  auto
3996  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3997  {
3998  return split<_Sizes...>(
3999  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4000  __x));
4001  }
4002 
4003 // }}}
4004 
4005 // split<simd>(simd) {{{
4006 template <typename _V, typename _Ap,
4007  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4008  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4009  && is_simd_v<_V>, array<_V, _Parts>>
4010  split(const simd<typename _V::value_type, _Ap>& __x)
4011  {
4012  using _Tp = typename _V::value_type;
4013  if constexpr (_Parts == 1)
4014  {
4015  return {simd_cast<_V>(__x)};
4016  }
4017  else if (__x._M_is_constprop())
4018  {
4019  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4020  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4021  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4022  { return __x[__i * _V::size() + __j]; });
4023  });
4024  }
4025  else if constexpr (
4026  __is_fixed_size_abi_v<_Ap>
4027  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4028  || (__is_fixed_size_abi_v<typename _V::abi_type>
4029  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4030  )))
4031  {
4032  // fixed_size -> fixed_size (w/o padding) or scalar
4033 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4034  const __may_alias<_Tp>* const __element_ptr
4035  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4036  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4037  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4038  { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4039 #else
4040  const auto& __xx = __data(__x);
4041  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4042  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4043  [[maybe_unused]] constexpr size_t __offset
4044  = decltype(__i)::value * _V::size();
4045  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4046  constexpr _SizeConstant<__j + __offset> __k;
4047  return __xx[__k];
4048  });
4049  });
4050 #endif
4051  }
4052  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4053  {
4054  // normally memcpy should work here as well
4055  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4056  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4057  }
4058  else
4059  {
4060  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4061  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4062  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4063  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4064  return __x[__i * _V::size() + __j];
4065  });
4066  else
4067  return _V(__private_init,
4068  __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4069  });
4070  }
4071  }
4072 
4073 // }}}
4074 // split<simd_mask>(simd_mask) {{{
4075 template <typename _V, typename _Ap,
4076  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4077  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4078  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4079  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4080  {
4081  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4082  return {__x};
4083  else if constexpr (_Parts == 1)
4084  return {__proposed::static_simd_cast<_V>(__x)};
4085  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4086  && __is_avx_abi<_Ap>())
4087  return {_V(__private_init, __lo128(__data(__x))),
4088  _V(__private_init, __hi128(__data(__x)))};
4089  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4090  {
4091  const bitset __bits = __x.__to_bitset();
4092  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4093  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4094  constexpr size_t __offset = __i * _V::size();
4095  return _V(__bitset_init, (__bits >> __offset).to_ullong());
4096  });
4097  }
4098  else
4099  {
4100  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4101  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4102  constexpr size_t __offset = __i * _V::size();
4103  return _V(__private_init,
4104  [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4105  return __x[__j + __offset];
4106  });
4107  });
4108  }
4109  }
4110 
4111 // }}}
4112 // split<_Sizes...>(simd) {{{
4113 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4114  _GLIBCXX_SIMD_ALWAYS_INLINE
4115  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4116  split(const simd<_Tp, _Ap>& __x)
4117  {
4118  using _SL = _SizeList<_Sizes...>;
4119  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4120  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4121  constexpr size_t _N0 = _SL::template _S_at<0>();
4122  using _V = __deduced_simd<_Tp, _N0>;
4123 
4124  if (__x._M_is_constprop())
4125  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4126  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4127  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4128  constexpr size_t __offset = _SL::_S_before(__i);
4129  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4130  return __x[__offset + __j];
4131  });
4132  });
4133  else if constexpr (_Np == _N0)
4134  {
4135  static_assert(sizeof...(_Sizes) == 1);
4136  return {simd_cast<_V>(__x)};
4137  }
4138  else if constexpr // split from fixed_size, such that __x::first.size == _N0
4139  (__is_fixed_size_abi_v<
4140  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4141  {
4142  static_assert(
4143  !__is_fixed_size_abi_v<typename _V::abi_type>,
4144  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4145  "fixed_size_simd "
4146  "when deduced?");
4147  // extract first and recurse (__split_wrapper is needed to deduce a new
4148  // _Sizes pack)
4149  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4150  __split_wrapper(_SL::template _S_pop_front<1>(),
4151  __data(__x).second));
4152  }
4153  else if constexpr ((!is_same_v<simd_abi::scalar,
4154  simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4155  && (!__is_fixed_size_abi_v<
4156  simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4157  {
4158  if constexpr (((_Sizes * 2 == _Np) && ...))
4159  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4160  {__private_init, __extract_part<1, 2>(__data(__x))}};
4161  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4162  _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4163  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4164  {__private_init, __extract_part<1, 3>(__data(__x))},
4165  {__private_init, __extract_part<2, 3>(__data(__x))}};
4166  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4167  _SizeList<2 * _Np / 3, _Np / 3>>)
4168  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4169  {__private_init, __extract_part<2, 3>(__data(__x))}};
4170  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4171  _SizeList<_Np / 3, 2 * _Np / 3>>)
4172  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4173  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4174  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4175  _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4176  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4177  {__private_init, __extract_part<2, 4>(__data(__x))},
4178  {__private_init, __extract_part<3, 4>(__data(__x))}};
4179  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4180  _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4181  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4182  {__private_init, __extract_part<1, 4>(__data(__x))},
4183  {__private_init, __extract_part<1, 2>(__data(__x))}};
4184  else if constexpr (is_same_v<_SizeList<_Sizes...>,
4185  _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4186  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4187  {__private_init, __extract_center(__data(__x))},
4188  {__private_init, __extract_part<3, 4>(__data(__x))}};
4189  else if constexpr (((_Sizes * 4 == _Np) && ...))
4190  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4191  {__private_init, __extract_part<1, 4>(__data(__x))},
4192  {__private_init, __extract_part<2, 4>(__data(__x))},
4193  {__private_init, __extract_part<3, 4>(__data(__x))}};
4194  // else fall through
4195  }
4196 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4197  const __may_alias<_Tp>* const __element_ptr
4198  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4199  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4200  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4201  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4202  constexpr size_t __offset = _SL::_S_before(__i);
4203  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4204  constexpr size_t __a
4205  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4206  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4207  constexpr size_t __alignment = __b == 0 ? __a : __b;
4208  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4209  });
4210 #else
4211  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4212  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4213  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4214  const auto& __xx = __data(__x);
4215  using _Offset = decltype(_SL::_S_before(__i));
4216  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4217  constexpr _SizeConstant<_Offset::value + __j> __k;
4218  return __xx[__k];
4219  });
4220  });
4221 #endif
4222  }
4223 
4224 // }}}
4225 
4226 // __subscript_in_pack {{{
4227 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4228  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4229  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4230  {
4231  if constexpr (_I < simd_size_v<_Tp, _Ap>)
4232  return __x[_I];
4233  else
4234  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4235  }
4236 
4237 // }}}
4238 // __store_pack_of_simd {{{
4239 template <typename _Tp, typename _A0, typename... _As>
4240  _GLIBCXX_SIMD_INTRINSIC void
4241  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4242  {
4243  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4244  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4245  if constexpr (sizeof...(__xs) > 0)
4246  __store_pack_of_simd(__mem + __n_bytes, __xs...);
4247  }
4248 
4249 // }}}
4250 // concat(simd...) {{{
4251 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4252  inline _GLIBCXX_SIMD_CONSTEXPR
4253  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4254  concat(const simd<_Tp, _As>&... __xs)
4255  {
4256  using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4257  if constexpr (sizeof...(__xs) == 1)
4258  return simd_cast<_Rp>(__xs...);
4259  else if ((... && __xs._M_is_constprop()))
4260  return simd<_Tp,
4261  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
4262  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4263  { return __subscript_in_pack<__i>(__xs...); });
4264  else
4265  {
4266  _Rp __r{};
4267  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4268  return __r;
4269  }
4270  }
4271 
4272 // }}}
4273 // concat(array<simd>) {{{
4274 template <typename _Tp, typename _Abi, size_t _Np>
4275  _GLIBCXX_SIMD_ALWAYS_INLINE
4276  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4277  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4278  {
4279  return __call_with_subscripts<_Np>(
4280  __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4281  return concat(__xs...);
4282  });
4283  }
4284 
4285 // }}}
4286 
4287 /// @cond undocumented
4288 // _SmartReference {{{
4289 template <typename _Up, typename _Accessor = _Up,
4290  typename _ValueType = typename _Up::value_type>
4291  class _SmartReference
4292  {
4293  friend _Accessor;
4294  int _M_index;
4295  _Up& _M_obj;
4296 
4297  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4298  _M_read() const noexcept
4299  {
4300  if constexpr (is_arithmetic_v<_Up>)
4301  return _M_obj;
4302  else
4303  return _M_obj[_M_index];
4304  }
4305 
4306  template <typename _Tp>
4307  _GLIBCXX_SIMD_INTRINSIC constexpr void
4308  _M_write(_Tp&& __x) const
4309  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4310 
4311  public:
4312  _GLIBCXX_SIMD_INTRINSIC constexpr
4313  _SmartReference(_Up& __o, int __i) noexcept
4314  : _M_index(__i), _M_obj(__o) {}
4315 
4316  using value_type = _ValueType;
4317 
4318  _GLIBCXX_SIMD_INTRINSIC
4319  _SmartReference(const _SmartReference&) = delete;
4320 
4321  _GLIBCXX_SIMD_INTRINSIC constexpr
4322  operator value_type() const noexcept
4323  { return _M_read(); }
4324 
4325  template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4326  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4327  operator=(_Tp&& __x) &&
4328  {
4329  _M_write(static_cast<_Tp&&>(__x));
4330  return {_M_obj, _M_index};
4331  }
4332 
4333 #define _GLIBCXX_SIMD_OP_(__op) \
4334  template <typename _Tp, \
4335  typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4336  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4337  typename = _ValuePreservingOrInt<_TT, value_type>> \
4338  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4339  operator __op##=(_Tp&& __x) && \
4340  { \
4341  const value_type& __lhs = _M_read(); \
4342  _M_write(__lhs __op __x); \
4343  return {_M_obj, _M_index}; \
4344  }
4345  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4346  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4347  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4348 #undef _GLIBCXX_SIMD_OP_
4349 
4350  template <typename _Tp = void,
4351  typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4352  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4353  operator++() &&
4354  {
4355  value_type __x = _M_read();
4356  _M_write(++__x);
4357  return {_M_obj, _M_index};
4358  }
4359 
4360  template <typename _Tp = void,
4361  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4362  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4363  operator++(int) &&
4364  {
4365  const value_type __r = _M_read();
4366  value_type __x = __r;
4367  _M_write(++__x);
4368  return __r;
4369  }
4370 
4371  template <typename _Tp = void,
4372  typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4373  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4374  operator--() &&
4375  {
4376  value_type __x = _M_read();
4377  _M_write(--__x);
4378  return {_M_obj, _M_index};
4379  }
4380 
4381  template <typename _Tp = void,
4382  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4383  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4384  operator--(int) &&
4385  {
4386  const value_type __r = _M_read();
4387  value_type __x = __r;
4388  _M_write(--__x);
4389  return __r;
4390  }
4391 
4392  _GLIBCXX_SIMD_INTRINSIC friend void
4393  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4394  conjunction<
4395  is_nothrow_constructible<value_type, _SmartReference&&>,
4396  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4397  {
4398  value_type __tmp = static_cast<_SmartReference&&>(__a);
4399  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4400  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4401  }
4402 
4403  _GLIBCXX_SIMD_INTRINSIC friend void
4404  swap(value_type& __a, _SmartReference&& __b) noexcept(
4405  conjunction<
4406  is_nothrow_constructible<value_type, value_type&&>,
4407  is_nothrow_assignable<value_type&, value_type&&>,
4408  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4409  {
4410  value_type __tmp(std::move(__a));
4411  __a = static_cast<value_type>(__b);
4412  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4413  }
4414 
4415  _GLIBCXX_SIMD_INTRINSIC friend void
4416  swap(_SmartReference&& __a, value_type& __b) noexcept(
4417  conjunction<
4418  is_nothrow_constructible<value_type, _SmartReference&&>,
4419  is_nothrow_assignable<value_type&, value_type&&>,
4420  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4421  {
4422  value_type __tmp(__a);
4423  static_cast<_SmartReference&&>(__a) = std::move(__b);
4424  __b = std::move(__tmp);
4425  }
4426  };
4427 
4428 // }}}
4429 // __scalar_abi_wrapper {{{
4430 template <int _Bytes>
4431  struct __scalar_abi_wrapper
4432  {
4433  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4434  template <typename _Tp> static constexpr size_t _S_size = 1;
4435  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4436 
4437  template <typename _Tp, typename _Abi = simd_abi::scalar>
4438  static constexpr bool _S_is_valid_v
4439  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4440  };
4441 
4442 // }}}
4443 // __decay_abi metafunction {{{
4444 template <typename _Tp>
4445  struct __decay_abi { using type = _Tp; };
4446 
4447 template <int _Bytes>
4448  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4449  { using type = simd_abi::scalar; };
4450 
4451 // }}}
4452 // __find_next_valid_abi metafunction {{{1
4453 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4454 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4455 // recursion at 2 elements in the resulting ABI tag. In this case
4456 // type::_S_is_valid_v<_Tp> may be false.
4457 template <template <int> class _Abi, int _Bytes, typename _Tp>
4458  struct __find_next_valid_abi
4459  {
4460  static constexpr auto
4461  _S_choose()
4462  {
4463  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4464  using _NextAbi = _Abi<_NextBytes>;
4465  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4466  return _Abi<_Bytes>();
4467  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4468  && _NextAbi::template _S_is_valid_v<_Tp>)
4469  return _NextAbi();
4470  else
4471  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4472  }
4473 
4474  using type = decltype(_S_choose());
4475  };
4476 
4477 template <int _Bytes, typename _Tp>
4478  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4479  { using type = simd_abi::scalar; };
4480 
4481 // _AbiList {{{1
4482 template <template <int> class...>
4483  struct _AbiList
4484  {
4485  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4486  template <typename, int> using _FirstValidAbi = void;
4487  template <typename, int> using _BestAbi = void;
4488  };
4489 
4490 template <template <int> class _A0, template <int> class... _Rest>
4491  struct _AbiList<_A0, _Rest...>
4492  {
4493  template <typename _Tp, int _Np>
4494  static constexpr bool _S_has_valid_abi
4495  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4496  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4497 
4498  template <typename _Tp, int _Np>
4499  using _FirstValidAbi = conditional_t<
4500  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4501  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4502  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4503 
4504  template <typename _Tp, int _Np>
4505  static constexpr auto
4506  _S_determine_best_abi()
4507  {
4508  static_assert(_Np >= 1);
4509  constexpr int _Bytes = sizeof(_Tp) * _Np;
4510  if constexpr (_Np == 1)
4511  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4512  else
4513  {
4514  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4515  // _A0<_Bytes> is good if:
4516  // 1. The ABI tag is valid for _Tp
4517  // 2. The storage overhead is no more than padding to fill the next
4518  // power-of-2 number of bytes
4519  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4520  _Tp> && __fullsize / 2 < _Np)
4521  return typename __decay_abi<_A0<_Bytes>>::type{};
4522  else
4523  {
4524  using _Bp =
4525  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4526  if constexpr (_Bp::template _S_is_valid_v<
4527  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4528  return _Bp{};
4529  else
4530  return
4531  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4532  }
4533  }
4534  }
4535 
4536  template <typename _Tp, int _Np>
4537  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4538  };
4539 
4540 // }}}1
4541 
4542 // the following lists all native ABIs, which makes them accessible to
4543 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4544 // matters: Whatever comes first has higher priority.
4545 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4546  __scalar_abi_wrapper>;
4547 
4548 // valid _SimdTraits specialization {{{1
4549 template <typename _Tp, typename _Abi>
4550  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4551  : _Abi::template __traits<_Tp> {};
4552 
4553 // __deduce_impl specializations {{{1
4554 // try all native ABIs (including scalar) first
4555 template <typename _Tp, size_t _Np>
4556  struct __deduce_impl<
4557  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4558  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4559 
4560 // fall back to fixed_size only if scalar and native ABIs don't match
4561 template <typename _Tp, size_t _Np, typename = void>
4562  struct __deduce_fixed_size_fallback {};
4563 
4564 template <typename _Tp, size_t _Np>
4565  struct __deduce_fixed_size_fallback<_Tp, _Np,
4566  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4567  { using type = simd_abi::fixed_size<_Np>; };
4568 
4569 template <typename _Tp, size_t _Np, typename>
4570  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4571 
4572 //}}}1
4573 /// @endcond
4574 
4575 // simd_mask {{{
4576 template <typename _Tp, typename _Abi>
4577  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4578  {
4579  // types, tags, and friends {{{
4580  using _Traits = _SimdTraits<_Tp, _Abi>;
4581  using _MemberType = typename _Traits::_MaskMember;
4582 
4583  // We map all masks with equal element sizeof to a single integer type, the
4584  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4585  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4586  // template specializations in the implementation classes.
4587  using _Ip = __int_for_sizeof_t<_Tp>;
4588  static constexpr _Ip* _S_type_tag = nullptr;
4589 
4590  friend typename _Traits::_MaskBase;
4591  friend class simd<_Tp, _Abi>; // to construct masks on return
4592  friend typename _Traits::_SimdImpl; // to construct masks on return and
4593  // inspect data on masked operations
4594  public:
4595  using _Impl = typename _Traits::_MaskImpl;
4596  friend _Impl;
4597 
4598  // }}}
4599  // member types {{{
4600  using value_type = bool;
4601  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4602  using simd_type = simd<_Tp, _Abi>;
4603  using abi_type = _Abi;
4604 
4605  // }}}
4606  static constexpr size_t size() // {{{
4607  { return __size_or_zero_v<_Tp, _Abi>; }
4608 
4609  // }}}
4610  // constructors & assignment {{{
4611  simd_mask() = default;
4612  simd_mask(const simd_mask&) = default;
4613  simd_mask(simd_mask&&) = default;
4614  simd_mask& operator=(const simd_mask&) = default;
4615  simd_mask& operator=(simd_mask&&) = default;
4616 
4617  // }}}
4618  // access to internal representation (optional feature) {{{
4619  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4620  simd_mask(typename _Traits::_MaskCastType __init)
4621  : _M_data{__init} {}
4622  // conversions to internal type is done in _MaskBase
4623 
4624  // }}}
4625  // bitset interface (extension to be proposed) {{{
4626  // TS_FEEDBACK:
4627  // Conversion of simd_mask to and from bitset makes it much easier to
4628  // interface with other facilities. I suggest adding `static
4629  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4630  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4631  __from_bitset(bitset<size()> bs)
4632  { return {__bitset_init, bs}; }
4633 
4634  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4635  __to_bitset() const
4636  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4637 
4638  // }}}
4639  // explicit broadcast constructor {{{
4640  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4641  simd_mask(value_type __x)
4642  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4643 
4644  // }}}
4645  // implicit type conversion constructor {{{
4646  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4647  // proposed improvement
4648  template <typename _Up, typename _A2,
4649  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4650  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4651  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4652  simd_mask(const simd_mask<_Up, _A2>& __x)
4653  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4654  #else
4655  // conforming to ISO/IEC 19570:2018
4656  template <typename _Up, typename = enable_if_t<conjunction<
4657  is_same<abi_type, simd_abi::fixed_size<size()>>,
4658  is_same<_Up, _Up>>::value>>
4659  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4660  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4661  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4662  #endif
4663 
4664  // }}}
4665  // load constructor {{{
4666  template <typename _Flags>
4667  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4668  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4669  : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4670 
4671  template <typename _Flags>
4672  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4673  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4674  : _M_data{}
4675  {
4676  _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4677  _Flags::template _S_apply<simd_mask>(__mem));
4678  }
4679 
4680  // }}}
4681  // loads [simd_mask.load] {{{
4682  template <typename _Flags>
4683  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4684  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4685  { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4686 
4687  // }}}
4688  // stores [simd_mask.store] {{{
4689  template <typename _Flags>
4690  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4691  copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4692  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4693 
4694  // }}}
4695  // scalar access {{{
4696  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4697  operator[](size_t __i)
4698  {
4699  if (__i >= size())
4700  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4701  return {_M_data, int(__i)};
4702  }
4703 
4704  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4705  operator[](size_t __i) const
4706  {
4707  if (__i >= size())
4708  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4709  if constexpr (__is_scalar_abi<_Abi>())
4710  return _M_data;
4711  else
4712  return static_cast<bool>(_M_data[__i]);
4713  }
4714 
4715  // }}}
4716  // negation {{{
4717  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4718  operator!() const
4719  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4720 
4721  // }}}
4722  // simd_mask binary operators [simd_mask.binary] {{{
4723  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4724  // simd_mask<int> && simd_mask<uint> needs disambiguation
4725  template <typename _Up, typename _A2,
4726  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4727  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4728  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4729  {
4730  return {__private_init,
4731  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4732  }
4733 
4734  template <typename _Up, typename _A2,
4735  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4736  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4737  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4738  {
4739  return {__private_init,
4740  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4741  }
4742  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4743 
4744  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4745  operator&&(const simd_mask& __x, const simd_mask& __y)
4746  { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4747 
4748  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4749  operator||(const simd_mask& __x, const simd_mask& __y)
4750  { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4751 
4752  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4753  operator&(const simd_mask& __x, const simd_mask& __y)
4754  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4755 
4756  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4757  operator|(const simd_mask& __x, const simd_mask& __y)
4758  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4759 
4760  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4761  operator^(const simd_mask& __x, const simd_mask& __y)
4762  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4763 
4764  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4765  operator&=(simd_mask& __x, const simd_mask& __y)
4766  {
4767  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4768  return __x;
4769  }
4770 
4771  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4772  operator|=(simd_mask& __x, const simd_mask& __y)
4773  {
4774  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4775  return __x;
4776  }
4777 
4778  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4779  operator^=(simd_mask& __x, const simd_mask& __y)
4780  {
4781  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4782  return __x;
4783  }
4784 
4785  // }}}
4786  // simd_mask compares [simd_mask.comparison] {{{
4787  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4788  operator==(const simd_mask& __x, const simd_mask& __y)
4789  { return !operator!=(__x, __y); }
4790 
4791  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4792  operator!=(const simd_mask& __x, const simd_mask& __y)
4793  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4794 
4795  // }}}
4796  // private_init ctor {{{
4797  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4798  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4799  : _M_data(__init) {}
4800 
4801  // }}}
4802  // private_init generator ctor {{{
4803  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4804  _GLIBCXX_SIMD_INTRINSIC constexpr
4805  simd_mask(_PrivateInit, _Fp&& __gen)
4806  : _M_data()
4807  {
4808  __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4809  _Impl::_S_set(_M_data, __i, __gen(__i));
4810  });
4811  }
4812 
4813  // }}}
4814  // bitset_init ctor {{{
4815  _GLIBCXX_SIMD_INTRINSIC constexpr
4816  simd_mask(_BitsetInit, bitset<size()> __init)
4817  : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4818  {}
4819 
4820  // }}}
4821  // __cvt {{{
4822  // TS_FEEDBACK:
4823  // The conversion operator this implements should be a ctor on simd_mask.
4824  // Once you call .__cvt() on a simd_mask it converts conveniently.
4825  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4826  struct _CvtProxy
4827  {
4828  template <typename _Up, typename _A2,
4829  typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4830  _GLIBCXX_SIMD_ALWAYS_INLINE
4831  operator simd_mask<_Up, _A2>() &&
4832  {
4833  using namespace std::experimental::__proposed;
4834  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4835  }
4836 
4837  const simd_mask<_Tp, _Abi>& _M_data;
4838  };
4839 
4840  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4841  __cvt() const
4842  { return {*this}; }
4843 
4844  // }}}
4845  // operator?: overloads (suggested extension) {{{
4846  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4847  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4848  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4849  const simd_mask& __where_false)
4850  {
4851  auto __ret = __where_false;
4852  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4853  return __ret;
4854  }
4855 
4856  template <typename _U1, typename _U2,
4857  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4858  typename = enable_if_t<conjunction_v<
4859  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4860  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4861  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4862  operator?:(const simd_mask& __k, const _U1& __where_true,
4863  const _U2& __where_false)
4864  {
4865  _Rp __ret = __where_false;
4866  _Rp::_Impl::_S_masked_assign(
4867  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4868  __data(static_cast<_Rp>(__where_true)));
4869  return __ret;
4870  }
4871 
4872  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4873  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4874  typename = enable_if_t<
4875  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4876  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4877  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4878  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4879  const simd_mask<_Up, _Au>& __where_false)
4880  {
4881  simd_mask __ret = __where_false;
4882  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4883  __where_true._M_data);
4884  return __ret;
4885  }
4886  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4887  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4888 
4889  // }}}
4890  // _M_is_constprop {{{
4891  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4892  _M_is_constprop() const
4893  {
4894  if constexpr (__is_scalar_abi<_Abi>())
4895  return __builtin_constant_p(_M_data);
4896  else
4897  return _M_data._M_is_constprop();
4898  }
4899 
4900  // }}}
4901 
4902  private:
4903  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4904  friend auto& __data<_Tp, abi_type>(simd_mask&);
4905  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4906  };
4907 
4908 // }}}
4909 
4910 /// @cond undocumented
4911 // __data(simd_mask) {{{
4912 template <typename _Tp, typename _Ap>
4913  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4914  __data(const simd_mask<_Tp, _Ap>& __x)
4915  { return __x._M_data; }
4916 
4917 template <typename _Tp, typename _Ap>
4918  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4919  __data(simd_mask<_Tp, _Ap>& __x)
4920  { return __x._M_data; }
4921 
4922 // }}}
4923 /// @endcond
4924 
4925 // simd_mask reductions [simd_mask.reductions] {{{
4926 template <typename _Tp, typename _Abi>
4927  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4928  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4929  {
4930  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4931  {
4932  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4933  if (!__k[__i])
4934  return false;
4935  return true;
4936  }
4937  else
4938  return _Abi::_MaskImpl::_S_all_of(__k);
4939  }
4940 
4941 template <typename _Tp, typename _Abi>
4942  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4943  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4944  {
4945  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4946  {
4947  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4948  if (__k[__i])
4949  return true;
4950  return false;
4951  }
4952  else
4953  return _Abi::_MaskImpl::_S_any_of(__k);
4954  }
4955 
4956 template <typename _Tp, typename _Abi>
4957  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4958  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4959  {
4960  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4961  {
4962  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4963  if (__k[__i])
4964  return false;
4965  return true;
4966  }
4967  else
4968  return _Abi::_MaskImpl::_S_none_of(__k);
4969  }
4970 
4971 template <typename _Tp, typename _Abi>
4972  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4973  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4974  {
4975  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4976  {
4977  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4978  if (__k[__i] != __k[__i - 1])
4979  return true;
4980  return false;
4981  }
4982  else
4983  return _Abi::_MaskImpl::_S_some_of(__k);
4984  }
4985 
4986 template <typename _Tp, typename _Abi>
4987  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4988  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4989  {
4990  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4991  {
4992  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4993  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4994  return ((__elements != 0) + ...);
4995  });
4996  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4997  return __r;
4998  }
4999  return _Abi::_MaskImpl::_S_popcount(__k);
5000  }
5001 
5002 template <typename _Tp, typename _Abi>
5003  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5004  find_first_set(const simd_mask<_Tp, _Abi>& __k)
5005  {
5006  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5007  {
5008  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5009  const size_t _Idx = __call_with_n_evaluations<_Np>(
5010  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5011  return std::min({__indexes...});
5012  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5013  return __k[__i] ? +__i : _Np;
5014  });
5015  if (_Idx >= _Np)
5016  __invoke_ub("find_first_set(empty mask) is UB");
5017  if (__builtin_constant_p(_Idx))
5018  return _Idx;
5019  }
5020  return _Abi::_MaskImpl::_S_find_first_set(__k);
5021  }
5022 
5023 template <typename _Tp, typename _Abi>
5024  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5025  find_last_set(const simd_mask<_Tp, _Abi>& __k)
5026  {
5027  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5028  {
5029  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5030  const int _Idx = __call_with_n_evaluations<_Np>(
5031  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5032  return std::max({__indexes...});
5033  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5034  return __k[__i] ? int(__i) : -1;
5035  });
5036  if (_Idx < 0)
5037  __invoke_ub("find_first_set(empty mask) is UB");
5038  if (__builtin_constant_p(_Idx))
5039  return _Idx;
5040  }
5041  return _Abi::_MaskImpl::_S_find_last_set(__k);
5042  }
5043 
5044 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5045 all_of(_ExactBool __x) noexcept
5046 { return __x; }
5047 
5048 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5049 any_of(_ExactBool __x) noexcept
5050 { return __x; }
5051 
5052 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5053 none_of(_ExactBool __x) noexcept
5054 { return !__x; }
5055 
5056 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5057 some_of(_ExactBool) noexcept
5058 { return false; }
5059 
5060 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5061 popcount(_ExactBool __x) noexcept
5062 { return __x; }
5063 
5064 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5065 find_first_set(_ExactBool)
5066 { return 0; }
5067 
5068 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5069 find_last_set(_ExactBool)
5070 { return 0; }
5071 
5072 // }}}
5073 
5074 /// @cond undocumented
5075 // _SimdIntOperators{{{1
5076 template <typename _V, typename _Tp, typename _Abi, bool>
5077  class _SimdIntOperators {};
5078 
5079 template <typename _V, typename _Tp, typename _Abi>
5080  class _SimdIntOperators<_V, _Tp, _Abi, true>
5081  {
5082  using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5083 
5084  _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5085  __derived() const
5086  { return *static_cast<const _V*>(this); }
5087 
5088  template <typename _Up>
5089  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5090  _S_make_derived(_Up&& __d)
5091  { return {__private_init, static_cast<_Up&&>(__d)}; }
5092 
5093  public:
5094  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5095  _V&
5096  operator%=(_V& __lhs, const _V& __x)
5097  { return __lhs = __lhs % __x; }
5098 
5099  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5100  _V&
5101  operator&=(_V& __lhs, const _V& __x)
5102  { return __lhs = __lhs & __x; }
5103 
5104  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5105  _V&
5106  operator|=(_V& __lhs, const _V& __x)
5107  { return __lhs = __lhs | __x; }
5108 
5109  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5110  _V&
5111  operator^=(_V& __lhs, const _V& __x)
5112  { return __lhs = __lhs ^ __x; }
5113 
5114  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5115  _V&
5116  operator<<=(_V& __lhs, const _V& __x)
5117  { return __lhs = __lhs << __x; }
5118 
5119  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5120  _V&
5121  operator>>=(_V& __lhs, const _V& __x)
5122  { return __lhs = __lhs >> __x; }
5123 
5124  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5125  _V&
5126  operator<<=(_V& __lhs, int __x)
5127  { return __lhs = __lhs << __x; }
5128 
5129  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5130  _V&
5131  operator>>=(_V& __lhs, int __x)
5132  { return __lhs = __lhs >> __x; }
5133 
5134  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5135  _V
5136  operator%(const _V& __x, const _V& __y)
5137  {
5138  return _SimdIntOperators::_S_make_derived(
5139  _Impl::_S_modulus(__data(__x), __data(__y)));
5140  }
5141 
5142  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5143  _V
5144  operator&(const _V& __x, const _V& __y)
5145  {
5146  return _SimdIntOperators::_S_make_derived(
5147  _Impl::_S_bit_and(__data(__x), __data(__y)));
5148  }
5149 
5150  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5151  _V
5152  operator|(const _V& __x, const _V& __y)
5153  {
5154  return _SimdIntOperators::_S_make_derived(
5155  _Impl::_S_bit_or(__data(__x), __data(__y)));
5156  }
5157 
5158  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5159  _V
5160  operator^(const _V& __x, const _V& __y)
5161  {
5162  return _SimdIntOperators::_S_make_derived(
5163  _Impl::_S_bit_xor(__data(__x), __data(__y)));
5164  }
5165 
5166  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5167  _V
5168  operator<<(const _V& __x, const _V& __y)
5169  {
5170  return _SimdIntOperators::_S_make_derived(
5171  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5172  }
5173 
5174  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5175  _V
5176  operator>>(const _V& __x, const _V& __y)
5177  {
5178  return _SimdIntOperators::_S_make_derived(
5179  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5180  }
5181 
5182  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5183  _V
5184  operator<<(const _V& __x, int __y)
5185  {
5186  if (__y < 0)
5187  __invoke_ub("The behavior is undefined if the right operand of a "
5188  "shift operation is negative. [expr.shift]\nA shift by "
5189  "%d was requested",
5190  __y);
5191  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5192  __invoke_ub(
5193  "The behavior is undefined if the right operand of a "
5194  "shift operation is greater than or equal to the width of the "
5195  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5196  __y);
5197  return _SimdIntOperators::_S_make_derived(
5198  _Impl::_S_bit_shift_left(__data(__x), __y));
5199  }
5200 
5201  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5202  _V
5203  operator>>(const _V& __x, int __y)
5204  {
5205  if (__y < 0)
5206  __invoke_ub(
5207  "The behavior is undefined if the right operand of a shift "
5208  "operation is negative. [expr.shift]\nA shift by %d was requested",
5209  __y);
5210  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5211  __invoke_ub(
5212  "The behavior is undefined if the right operand of a shift "
5213  "operation is greater than or equal to the width of the promoted "
5214  "left operand. [expr.shift]\nA shift by %d was requested",
5215  __y);
5216  return _SimdIntOperators::_S_make_derived(
5217  _Impl::_S_bit_shift_right(__data(__x), __y));
5218  }
5219 
5220  // unary operators (for integral _Tp)
5221  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5222  _V
5223  operator~() const
5224  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5225  };
5226 
5227 //}}}1
5228 /// @endcond
5229 
5230 // simd {{{
5231 template <typename _Tp, typename _Abi>
5232  class simd : public _SimdIntOperators<
5233  simd<_Tp, _Abi>, _Tp, _Abi,
5234  conjunction<is_integral<_Tp>,
5235  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5236  public _SimdTraits<_Tp, _Abi>::_SimdBase
5237  {
5238  using _Traits = _SimdTraits<_Tp, _Abi>;
5239  using _MemberType = typename _Traits::_SimdMember;
5240  using _CastType = typename _Traits::_SimdCastType;
5241  static constexpr _Tp* _S_type_tag = nullptr;
5242  friend typename _Traits::_SimdBase;
5243 
5244  public:
5245  using _Impl = typename _Traits::_SimdImpl;
5246  friend _Impl;
5247  friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5248 
5249  using value_type = _Tp;
5250  using reference = _SmartReference<_MemberType, _Impl, value_type>;
5251  using mask_type = simd_mask<_Tp, _Abi>;
5252  using abi_type = _Abi;
5253 
5254  static constexpr size_t size()
5255  { return __size_or_zero_v<_Tp, _Abi>; }
5256 
5257  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5258  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5259  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5260  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5261  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5262 
5263  // implicit broadcast constructor
5264  template <typename _Up,
5265  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5266  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5267  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5268  : _M_data(
5269  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5270  {}
5271 
5272  // implicit type conversion constructor (convert from fixed_size to
5273  // fixed_size)
5274  template <typename _Up>
5275  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5276  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5277  enable_if_t<
5278  conjunction<
5279  is_same<simd_abi::fixed_size<size()>, abi_type>,
5280  negation<__is_narrowing_conversion<_Up, value_type>>,
5281  __converts_to_higher_integer_rank<_Up, value_type>>::value,
5282  void*> = nullptr)
5283  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5284 
5285  // explicit type conversion constructor
5286 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5287  template <typename _Up, typename _A2,
5288  typename = decltype(static_simd_cast<simd>(
5289  declval<const simd<_Up, _A2>&>()))>
5290  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5291  simd(const simd<_Up, _A2>& __x)
5292  : simd(static_simd_cast<simd>(__x)) {}
5293 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5294 
5295  // generator constructor
5296  template <typename _Fp>
5297  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5298  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5299  declval<_SizeConstant<0>&>())),
5300  value_type>* = nullptr)
5301  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5302 
5303  // load constructor
5304  template <typename _Up, typename _Flags>
5305  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5306  simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5307  : _M_data(
5308  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5309  {}
5310 
5311  // loads [simd.load]
5312  template <typename _Up, typename _Flags>
5313  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5314  copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5315  {
5316  _M_data = static_cast<decltype(_M_data)>(
5317  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5318  }
5319 
5320  // stores [simd.store]
5321  template <typename _Up, typename _Flags>
5322  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5323  copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5324  {
5325  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5326  _S_type_tag);
5327  }
5328 
5329  // scalar access
5330  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5331  operator[](size_t __i)
5332  { return {_M_data, int(__i)}; }
5333 
5334  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5335  operator[]([[maybe_unused]] size_t __i) const
5336  {
5337  if constexpr (__is_scalar_abi<_Abi>())
5338  {
5339  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5340  return _M_data;
5341  }
5342  else
5343  return _M_data[__i];
5344  }
5345 
5346  // increment and decrement:
5347  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5348  operator++()
5349  {
5350  _Impl::_S_increment(_M_data);
5351  return *this;
5352  }
5353 
5354  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5355  operator++(int)
5356  {
5357  simd __r = *this;
5358  _Impl::_S_increment(_M_data);
5359  return __r;
5360  }
5361 
5362  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5363  operator--()
5364  {
5365  _Impl::_S_decrement(_M_data);
5366  return *this;
5367  }
5368 
5369  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5370  operator--(int)
5371  {
5372  simd __r = *this;
5373  _Impl::_S_decrement(_M_data);
5374  return __r;
5375  }
5376 
5377  // unary operators (for any _Tp)
5378  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5379  operator!() const
5380  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5381 
5382  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5383  operator+() const
5384  { return *this; }
5385 
5386  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5387  operator-() const
5388  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5389 
5390  // access to internal representation (suggested extension)
5391  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5392  simd(_CastType __init) : _M_data(__init) {}
5393 
5394  // compound assignment [simd.cassign]
5395  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5396  operator+=(simd& __lhs, const simd& __x)
5397  { return __lhs = __lhs + __x; }
5398 
5399  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5400  operator-=(simd& __lhs, const simd& __x)
5401  { return __lhs = __lhs - __x; }
5402 
5403  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5404  operator*=(simd& __lhs, const simd& __x)
5405  { return __lhs = __lhs * __x; }
5406 
5407  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5408  operator/=(simd& __lhs, const simd& __x)
5409  { return __lhs = __lhs / __x; }
5410 
5411  // binary operators [simd.binary]
5412  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5413  operator+(const simd& __x, const simd& __y)
5414  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5415 
5416  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5417  operator-(const simd& __x, const simd& __y)
5418  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5419 
5420  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5421  operator*(const simd& __x, const simd& __y)
5422  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5423 
5424  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5425  operator/(const simd& __x, const simd& __y)
5426  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5427 
5428  // compares [simd.comparison]
5429  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5430  operator==(const simd& __x, const simd& __y)
5431  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5432 
5433  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5434  operator!=(const simd& __x, const simd& __y)
5435  {
5436  return simd::_S_make_mask(
5437  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5438  }
5439 
5440  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5441  operator<(const simd& __x, const simd& __y)
5442  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5443 
5444  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5445  operator<=(const simd& __x, const simd& __y)
5446  {
5447  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5448  }
5449 
5450  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5451  operator>(const simd& __x, const simd& __y)
5452  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5453 
5454  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5455  operator>=(const simd& __x, const simd& __y)
5456  {
5457  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5458  }
5459 
5460  // operator?: overloads (suggested extension) {{{
5461 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5462  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5463  operator?:(const mask_type& __k, const simd& __where_true,
5464  const simd& __where_false)
5465  {
5466  auto __ret = __where_false;
5467  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5468  return __ret;
5469  }
5470 
5471 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5472  // }}}
5473 
5474  // "private" because of the first arguments's namespace
5475  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5476  simd(_PrivateInit, const _MemberType& __init)
5477  : _M_data(__init) {}
5478 
5479  // "private" because of the first arguments's namespace
5480  _GLIBCXX_SIMD_INTRINSIC
5481  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5482  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5483 
5484  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5485  _M_is_constprop() const
5486  {
5487  if constexpr (__is_scalar_abi<_Abi>())
5488  return __builtin_constant_p(_M_data);
5489  else
5490  return _M_data._M_is_constprop();
5491  }
5492 
5493  private:
5494  _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5495  _S_make_mask(typename mask_type::_MemberType __k)
5496  { return {__private_init, __k}; }
5497 
5498  friend const auto& __data<value_type, abi_type>(const simd&);
5499  friend auto& __data<value_type, abi_type>(simd&);
5500  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5501  };
5502 
5503 // }}}
5504 /// @cond undocumented
5505 // __data {{{
5506 template <typename _Tp, typename _Ap>
5507  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5508  __data(const simd<_Tp, _Ap>& __x)
5509  { return __x._M_data; }
5510 
5511 template <typename _Tp, typename _Ap>
5512  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5513  __data(simd<_Tp, _Ap>& __x)
5514  { return __x._M_data; }
5515 
5516 // }}}
5517 namespace __float_bitwise_operators { //{{{
5518 template <typename _Tp, typename _Ap>
5519  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5520  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5521  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5522 
5523 template <typename _Tp, typename _Ap>
5524  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5525  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5526  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5527 
5528 template <typename _Tp, typename _Ap>
5529  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5530  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5531  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5532 
5533 template <typename _Tp, typename _Ap>
5534  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5535  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5536  operator~(const simd<_Tp, _Ap>& __a)
5537  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5538 } // namespace __float_bitwise_operators }}}
5539 /// @endcond
5540 
5541 /// @}
5542 _GLIBCXX_SIMD_END_NAMESPACE
5543 
5544 #endif // __cplusplus >= 201703L
5545 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5546 
5547 // vim: foldmethod=marker foldmarker={{{,}}}
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition: bit:426
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition: chrono.h:779
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition: complex:395
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:425
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:365
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:335
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1640
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:1983
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2632
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2614
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2065
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:85
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1583
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2610
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition: tuple:2155
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2386
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:97
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition: tuple:2001
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:257
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:233
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1593
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1553
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1683
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1563
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1573