LCOV - code coverage report
Current view: top level - /usr/lib/gcc/x86_64-linux-gnu/13/include - emmintrin.h (source / functions) Coverage Total Hit
Test: coverage.info Lines: 80.0 % 10 8
Test Date: 2025-10-15 21:43:52 Functions: - 0 0

            Line data    Source code
       1              : /* Copyright (C) 2003-2023 Free Software Foundation, Inc.
       2              : 
       3              :    This file is part of GCC.
       4              : 
       5              :    GCC is free software; you can redistribute it and/or modify
       6              :    it under the terms of the GNU General Public License as published by
       7              :    the Free Software Foundation; either version 3, or (at your option)
       8              :    any later version.
       9              : 
      10              :    GCC is distributed in the hope that it will be useful,
      11              :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              :    GNU General Public License for more details.
      14              : 
      15              :    Under Section 7 of GPL version 3, you are granted additional
      16              :    permissions described in the GCC Runtime Library Exception, version
      17              :    3.1, as published by the Free Software Foundation.
      18              : 
      19              :    You should have received a copy of the GNU General Public License and
      20              :    a copy of the GCC Runtime Library Exception along with this program;
      21              :    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22              :    <http://www.gnu.org/licenses/>.  */
      23              : 
      24              : /* Implemented from the specification included in the Intel C++ Compiler
      25              :    User Guide and Reference, version 9.0.  */
      26              : 
      27              : #ifndef _EMMINTRIN_H_INCLUDED
      28              : #define _EMMINTRIN_H_INCLUDED
      29              : 
      30              : /* We need definitions from the SSE header files*/
      31              : #include <xmmintrin.h>
      32              : 
      33              : #ifndef __SSE2__
      34              : #pragma GCC push_options
      35              : #pragma GCC target("sse2")
      36              : #define __DISABLE_SSE2__
      37              : #endif /* __SSE2__ */
      38              : 
      39              : /* SSE2 */
      40              : typedef double __v2df __attribute__ ((__vector_size__ (16)));
      41              : typedef long long __v2di __attribute__ ((__vector_size__ (16)));
      42              : typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
      43              : typedef int __v4si __attribute__ ((__vector_size__ (16)));
      44              : typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
      45              : typedef short __v8hi __attribute__ ((__vector_size__ (16)));
      46              : typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
      47              : typedef char __v16qi __attribute__ ((__vector_size__ (16)));
      48              : typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
      49              : typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
      50              : 
      51              : /* The Intel API is flexible enough that we must allow aliasing with other
      52              :    vector types, and their scalar components.  */
      53              : typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
      54              : typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
      55              : 
      56              : /* Unaligned version of the same types.  */
      57              : typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      58              : typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      59              : 
      60              : /* Create a selector for use with the SHUFPD instruction.  */
      61              : #define _MM_SHUFFLE2(fp1,fp0) \
      62              :  (((fp1) << 1) | (fp0))
      63              : 
      64              : /* Create a vector with element 0 as F and the rest zero.  */
      65              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      66              : _mm_set_sd (double __F)
      67              : {
      68              :   return __extension__ (__m128d){ __F, 0.0 };
      69              : }
      70              : 
      71              : /* Create a vector with both elements equal to F.  */
      72              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      73              : _mm_set1_pd (double __F)
      74              : {
      75              :   return __extension__ (__m128d){ __F, __F };
      76              : }
      77              : 
      78              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      79              : _mm_set_pd1 (double __F)
      80              : {
      81              :   return _mm_set1_pd (__F);
      82              : }
      83              : 
      84              : /* Create a vector with the lower value X and upper value W.  */
      85              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      86              : _mm_set_pd (double __W, double __X)
      87              : {
      88              :   return __extension__ (__m128d){ __X, __W };
      89              : }
      90              : 
      91              : /* Create a vector with the lower value W and upper value X.  */
      92              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      93              : _mm_setr_pd (double __W, double __X)
      94              : {
      95              :   return __extension__ (__m128d){ __W, __X };
      96              : }
      97              : 
      98              : /* Create an undefined vector.  */
      99              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     100              : _mm_undefined_pd (void)
     101              : {
     102              : #pragma GCC diagnostic push
     103              : #pragma GCC diagnostic ignored "-Winit-self"
     104              :   __m128d __Y = __Y;
     105              : #pragma GCC diagnostic pop
     106              :   return __Y;
     107              : }
     108              : 
     109              : /* Create a vector of zeros.  */
     110              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     111              : _mm_setzero_pd (void)
     112              : {
     113              :   return __extension__ (__m128d){ 0.0, 0.0 };
     114              : }
     115              : 
     116              : /* Sets the low DPFP value of A from the low value of B.  */
     117              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     118              : _mm_move_sd (__m128d __A, __m128d __B)
     119              : {
     120              :   return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
     121              : }
     122              : 
     123              : /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
     124              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     125              : _mm_load_pd (double const *__P)
     126              : {
     127              :   return *(__m128d *)__P;
     128              : }
     129              : 
     130              : /* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
     131              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     132              : _mm_loadu_pd (double const *__P)
     133              : {
     134              :   return *(__m128d_u *)__P;
     135              : }
     136              : 
     137              : /* Create a vector with all two elements equal to *P.  */
     138              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     139              : _mm_load1_pd (double const *__P)
     140              : {
     141              :   return _mm_set1_pd (*__P);
     142              : }
     143              : 
     144              : /* Create a vector with element 0 as *P and the rest zero.  */
     145              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     146              : _mm_load_sd (double const *__P)
     147              : {
     148              :   return _mm_set_sd (*__P);
     149              : }
     150              : 
     151              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     152              : _mm_load_pd1 (double const *__P)
     153              : {
     154              :   return _mm_load1_pd (__P);
     155              : }
     156              : 
     157              : /* Load two DPFP values in reverse order.  The address must be aligned.  */
     158              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     159              : _mm_loadr_pd (double const *__P)
     160              : {
     161              :   __m128d __tmp = _mm_load_pd (__P);
     162              :   return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
     163              : }
     164              : 
     165              : /* Store two DPFP values.  The address must be 16-byte aligned.  */
     166              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     167              : _mm_store_pd (double *__P, __m128d __A)
     168              : {
     169              :   *(__m128d *)__P = __A;
     170              : }
     171              : 
     172              : /* Store two DPFP values.  The address need not be 16-byte aligned.  */
     173              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     174              : _mm_storeu_pd (double *__P, __m128d __A)
     175              : {
     176              :   *(__m128d_u *)__P = __A;
     177              : }
     178              : 
     179              : /* Stores the lower DPFP value.  */
     180              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     181              : _mm_store_sd (double *__P, __m128d __A)
     182              : {
     183              :   *__P = ((__v2df)__A)[0];
     184              : }
     185              : 
     186              : extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     187              : _mm_cvtsd_f64 (__m128d __A)
     188              : {
     189              :   return ((__v2df)__A)[0];
     190              : }
     191              : 
     192              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     193              : _mm_storel_pd (double *__P, __m128d __A)
     194              : {
     195              :   _mm_store_sd (__P, __A);
     196              : }
     197              : 
     198              : /* Stores the upper DPFP value.  */
     199              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     200              : _mm_storeh_pd (double *__P, __m128d __A)
     201              : {
     202              :   *__P = ((__v2df)__A)[1];
     203              : }
     204              : 
     205              : /* Store the lower DPFP value across two words.
     206              :    The address must be 16-byte aligned.  */
     207              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     208              : _mm_store1_pd (double *__P, __m128d __A)
     209              : {
     210              :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
     211              : }
     212              : 
     213              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     214              : _mm_store_pd1 (double *__P, __m128d __A)
     215              : {
     216              :   _mm_store1_pd (__P, __A);
     217              : }
     218              : 
     219              : /* Store two DPFP values in reverse order.  The address must be aligned.  */
     220              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     221              : _mm_storer_pd (double *__P, __m128d __A)
     222              : {
     223              :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
     224              : }
     225              : 
     226              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     227              : _mm_cvtsi128_si32 (__m128i __A)
     228              : {
     229              :   return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
     230              : }
     231              : 
     232              : #ifdef __x86_64__
     233              : /* Intel intrinsic.  */
     234              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     235              : _mm_cvtsi128_si64 (__m128i __A)
     236              : {
     237              :   return ((__v2di)__A)[0];
     238              : }
     239              : 
     240              : /* Microsoft intrinsic.  */
     241              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     242              : _mm_cvtsi128_si64x (__m128i __A)
     243              : {
     244              :   return ((__v2di)__A)[0];
     245              : }
     246              : #endif
     247              : 
     248              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     249              : _mm_add_pd (__m128d __A, __m128d __B)
     250              : {
     251              :   return (__m128d) ((__v2df)__A + (__v2df)__B);
     252              : }
     253              : 
     254              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     255              : _mm_add_sd (__m128d __A, __m128d __B)
     256              : {
     257              :   return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
     258              : }
     259              : 
     260              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     261              : _mm_sub_pd (__m128d __A, __m128d __B)
     262              : {
     263              :   return (__m128d) ((__v2df)__A - (__v2df)__B);
     264              : }
     265              : 
     266              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     267              : _mm_sub_sd (__m128d __A, __m128d __B)
     268              : {
     269              :   return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
     270              : }
     271              : 
     272              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     273              : _mm_mul_pd (__m128d __A, __m128d __B)
     274              : {
     275              :   return (__m128d) ((__v2df)__A * (__v2df)__B);
     276              : }
     277              : 
     278              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     279              : _mm_mul_sd (__m128d __A, __m128d __B)
     280              : {
     281              :   return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
     282              : }
     283              : 
     284              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     285              : _mm_div_pd (__m128d __A, __m128d __B)
     286              : {
     287              :   return (__m128d) ((__v2df)__A / (__v2df)__B);
     288              : }
     289              : 
     290              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     291              : _mm_div_sd (__m128d __A, __m128d __B)
     292              : {
     293              :   return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
     294              : }
     295              : 
     296              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     297              : _mm_sqrt_pd (__m128d __A)
     298              : {
     299              :   return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
     300              : }
     301              : 
     302              : /* Return pair {sqrt (B[0]), A[1]}.  */
     303              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     304              : _mm_sqrt_sd (__m128d __A, __m128d __B)
     305              : {
     306              :   __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
     307              :   return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
     308              : }
     309              : 
     310              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     311              : _mm_min_pd (__m128d __A, __m128d __B)
     312              : {
     313              :   return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
     314              : }
     315              : 
     316              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     317              : _mm_min_sd (__m128d __A, __m128d __B)
     318              : {
     319              :   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
     320              : }
     321              : 
     322              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     323              : _mm_max_pd (__m128d __A, __m128d __B)
     324              : {
     325              :   return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
     326              : }
     327              : 
     328              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     329              : _mm_max_sd (__m128d __A, __m128d __B)
     330              : {
     331              :   return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
     332              : }
     333              : 
     334              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     335              : _mm_and_pd (__m128d __A, __m128d __B)
     336              : {
     337              :   return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
     338              : }
     339              : 
     340              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     341              : _mm_andnot_pd (__m128d __A, __m128d __B)
     342              : {
     343              :   return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
     344              : }
     345              : 
     346              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     347              : _mm_or_pd (__m128d __A, __m128d __B)
     348              : {
     349              :   return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
     350              : }
     351              : 
     352              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     353              : _mm_xor_pd (__m128d __A, __m128d __B)
     354              : {
     355              :   return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
     356              : }
     357              : 
     358              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     359              : _mm_cmpeq_pd (__m128d __A, __m128d __B)
     360              : {
     361              :   return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
     362              : }
     363              : 
     364              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     365              : _mm_cmplt_pd (__m128d __A, __m128d __B)
     366              : {
     367              :   return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
     368              : }
     369              : 
     370              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     371              : _mm_cmple_pd (__m128d __A, __m128d __B)
     372              : {
     373              :   return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
     374              : }
     375              : 
     376              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     377              : _mm_cmpgt_pd (__m128d __A, __m128d __B)
     378              : {
     379              :   return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
     380              : }
     381              : 
     382              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     383              : _mm_cmpge_pd (__m128d __A, __m128d __B)
     384              : {
     385              :   return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
     386              : }
     387              : 
     388              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     389              : _mm_cmpneq_pd (__m128d __A, __m128d __B)
     390              : {
     391              :   return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
     392              : }
     393              : 
     394              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     395              : _mm_cmpnlt_pd (__m128d __A, __m128d __B)
     396              : {
     397              :   return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
     398              : }
     399              : 
     400              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     401              : _mm_cmpnle_pd (__m128d __A, __m128d __B)
     402              : {
     403              :   return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
     404              : }
     405              : 
     406              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     407              : _mm_cmpngt_pd (__m128d __A, __m128d __B)
     408              : {
     409              :   return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
     410              : }
     411              : 
     412              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     413              : _mm_cmpnge_pd (__m128d __A, __m128d __B)
     414              : {
     415              :   return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
     416              : }
     417              : 
     418              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     419              : _mm_cmpord_pd (__m128d __A, __m128d __B)
     420              : {
     421              :   return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
     422              : }
     423              : 
     424              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     425              : _mm_cmpunord_pd (__m128d __A, __m128d __B)
     426              : {
     427              :   return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
     428              : }
     429              : 
     430              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     431              : _mm_cmpeq_sd (__m128d __A, __m128d __B)
     432              : {
     433              :   return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
     434              : }
     435              : 
     436              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     437              : _mm_cmplt_sd (__m128d __A, __m128d __B)
     438              : {
     439              :   return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
     440              : }
     441              : 
     442              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     443              : _mm_cmple_sd (__m128d __A, __m128d __B)
     444              : {
     445              :   return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
     446              : }
     447              : 
     448              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     449              : _mm_cmpgt_sd (__m128d __A, __m128d __B)
     450              : {
     451              :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     452              :                                          (__v2df)
     453              :                                          __builtin_ia32_cmpltsd ((__v2df) __B,
     454              :                                                                  (__v2df)
     455              :                                                                  __A));
     456              : }
     457              : 
     458              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     459              : _mm_cmpge_sd (__m128d __A, __m128d __B)
     460              : {
     461              :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     462              :                                          (__v2df)
     463              :                                          __builtin_ia32_cmplesd ((__v2df) __B,
     464              :                                                                  (__v2df)
     465              :                                                                  __A));
     466              : }
     467              : 
     468              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     469              : _mm_cmpneq_sd (__m128d __A, __m128d __B)
     470              : {
     471              :   return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
     472              : }
     473              : 
     474              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     475              : _mm_cmpnlt_sd (__m128d __A, __m128d __B)
     476              : {
     477              :   return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
     478              : }
     479              : 
     480              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     481              : _mm_cmpnle_sd (__m128d __A, __m128d __B)
     482              : {
     483              :   return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
     484              : }
     485              : 
     486              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     487              : _mm_cmpngt_sd (__m128d __A, __m128d __B)
     488              : {
     489              :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     490              :                                          (__v2df)
     491              :                                          __builtin_ia32_cmpnltsd ((__v2df) __B,
     492              :                                                                   (__v2df)
     493              :                                                                   __A));
     494              : }
     495              : 
     496              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     497              : _mm_cmpnge_sd (__m128d __A, __m128d __B)
     498              : {
     499              :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     500              :                                          (__v2df)
     501              :                                          __builtin_ia32_cmpnlesd ((__v2df) __B,
     502              :                                                                   (__v2df)
     503              :                                                                   __A));
     504              : }
     505              : 
     506              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     507              : _mm_cmpord_sd (__m128d __A, __m128d __B)
     508              : {
     509              :   return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
     510              : }
     511              : 
     512              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     513              : _mm_cmpunord_sd (__m128d __A, __m128d __B)
     514              : {
     515              :   return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
     516              : }
     517              : 
     518              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     519              : _mm_comieq_sd (__m128d __A, __m128d __B)
     520              : {
     521              :   return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
     522              : }
     523              : 
     524              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     525              : _mm_comilt_sd (__m128d __A, __m128d __B)
     526              : {
     527              :   return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
     528              : }
     529              : 
     530              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     531              : _mm_comile_sd (__m128d __A, __m128d __B)
     532              : {
     533              :   return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
     534              : }
     535              : 
     536              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     537              : _mm_comigt_sd (__m128d __A, __m128d __B)
     538              : {
     539              :   return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
     540              : }
     541              : 
     542              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     543              : _mm_comige_sd (__m128d __A, __m128d __B)
     544              : {
     545              :   return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
     546              : }
     547              : 
     548              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     549              : _mm_comineq_sd (__m128d __A, __m128d __B)
     550              : {
     551              :   return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
     552              : }
     553              : 
     554              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     555              : _mm_ucomieq_sd (__m128d __A, __m128d __B)
     556              : {
     557              :   return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
     558              : }
     559              : 
     560              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     561              : _mm_ucomilt_sd (__m128d __A, __m128d __B)
     562              : {
     563              :   return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
     564              : }
     565              : 
     566              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     567              : _mm_ucomile_sd (__m128d __A, __m128d __B)
     568              : {
     569              :   return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
     570              : }
     571              : 
     572              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     573              : _mm_ucomigt_sd (__m128d __A, __m128d __B)
     574              : {
     575              :   return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
     576              : }
     577              : 
     578              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     579              : _mm_ucomige_sd (__m128d __A, __m128d __B)
     580              : {
     581              :   return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
     582              : }
     583              : 
     584              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     585              : _mm_ucomineq_sd (__m128d __A, __m128d __B)
     586              : {
     587              :   return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
     588              : }
     589              : 
     590              : /* Create a vector of Qi, where i is the element number.  */
     591              : 
     592              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     593              : _mm_set_epi64x (long long __q1, long long __q0)
     594              : {
     595            0 :   return __extension__ (__m128i)(__v2di){ __q0, __q1 };
     596              : }
     597              : 
     598              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     599              : _mm_set_epi64 (__m64 __q1,  __m64 __q0)
     600              : {
     601              :   return _mm_set_epi64x ((long long)__q1, (long long)__q0);
     602              : }
     603              : 
     604              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     605              : _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
     606              : {
     607          364 :   return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
     608              : }
     609              : 
     610              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     611              : _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
     612              :                short __q3, short __q2, short __q1, short __q0)
     613              : {
     614              :   return __extension__ (__m128i)(__v8hi){
     615              :     __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
     616              : }
     617              : 
     618              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     619              : _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
     620              :               char __q11, char __q10, char __q09, char __q08,
     621              :               char __q07, char __q06, char __q05, char __q04,
     622              :               char __q03, char __q02, char __q01, char __q00)
     623              : {
     624              :   return __extension__ (__m128i)(__v16qi){
     625              :     __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
     626              :     __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
     627              :   };
     628              : }
     629              : 
     630              : /* Set all of the elements of the vector to A.  */
     631              : 
     632              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     633              : _mm_set1_epi64x (long long __A)
     634              : {
     635              :   return _mm_set_epi64x (__A, __A);
     636              : }
     637              : 
     638              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     639              : _mm_set1_epi64 (__m64 __A)
     640              : {
     641              :   return _mm_set_epi64 (__A, __A);
     642              : }
     643              : 
     644              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     645              : _mm_set1_epi32 (int __A)
     646              : {
     647          364 :   return _mm_set_epi32 (__A, __A, __A, __A);
     648              : }
     649              : 
     650              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     651              : _mm_set1_epi16 (short __A)
     652              : {
     653              :   return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
     654              : }
     655              : 
     656              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     657              : _mm_set1_epi8 (char __A)
     658              : {
     659              :   return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
     660              :                        __A, __A, __A, __A, __A, __A, __A, __A);
     661              : }
     662              : 
     663              : /* Create a vector of Qi, where i is the element number.
     664              :    The parameter order is reversed from the _mm_set_epi* functions.  */
     665              : 
     666              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     667              : _mm_setr_epi64 (__m64 __q0, __m64 __q1)
     668              : {
     669              :   return _mm_set_epi64 (__q1, __q0);
     670              : }
     671              : 
     672              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     673              : _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
     674              : {
     675              :   return _mm_set_epi32 (__q3, __q2, __q1, __q0);
     676              : }
     677              : 
     678              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     679              : _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
     680              :                 short __q4, short __q5, short __q6, short __q7)
     681              : {
     682              :   return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
     683              : }
     684              : 
     685              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     686              : _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
     687              :                char __q04, char __q05, char __q06, char __q07,
     688              :                char __q08, char __q09, char __q10, char __q11,
     689              :                char __q12, char __q13, char __q14, char __q15)
     690              : {
     691              :   return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
     692              :                        __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
     693              : }
     694              : 
     695              : /* Create a vector with element 0 as *P and the rest zero.  */
     696              : 
     697              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     698              : _mm_load_si128 (__m128i const *__P)
     699              : {
     700            0 :   return *__P;
     701              : }
     702              : 
     703              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     704              : _mm_loadu_si128 (__m128i_u const *__P)
     705              : {
     706        57824 :   return *__P;
     707              : }
     708              : 
     709              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     710              : _mm_loadl_epi64 (__m128i_u const *__P)
     711              : {
     712              :   return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
     713              : }
     714              : 
     715              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     716              : _mm_loadu_si64 (void const *__P)
     717              : {
     718              :   return _mm_loadl_epi64 ((__m128i_u *)__P);
     719              : }
     720              : 
     721              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     722              : _mm_loadu_si32 (void const *__P)
     723              : {
     724              :   return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]);
     725              : }
     726              : 
     727              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     728              : _mm_loadu_si16 (void const *__P)
     729              : {
     730              :   return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]);
     731              : }
     732              : 
     733              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     734              : _mm_store_si128 (__m128i *__P, __m128i __B)
     735              : {
     736              :   *__P = __B;
     737              : }
     738              : 
     739              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     740              : _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
     741              : {
     742              :   *__P = __B;
     743              : }
     744              : 
     745              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     746              : _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
     747              : {
     748              :   *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
     749              : }
     750              : 
     751              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     752              : _mm_storeu_si64 (void *__P, __m128i __B)
     753              : {
     754              :   _mm_storel_epi64 ((__m128i_u *)__P, __B);
     755              : }
     756              : 
     757              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     758              : _mm_storeu_si32 (void *__P, __m128i __B)
     759              : {
     760              :   *(__m32_u *)__P = (__m32) ((__v4si)__B)[0];
     761              : }
     762              : 
     763              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     764              : _mm_storeu_si16 (void *__P, __m128i __B)
     765              : {
     766              :   *(__m16_u *)__P = (__m16) ((__v8hi)__B)[0];
     767              : }
     768              : 
     769              : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     770              : _mm_movepi64_pi64 (__m128i __B)
     771              : {
     772              :   return (__m64) ((__v2di)__B)[0];
     773              : }
     774              : 
     775              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     776              : _mm_movpi64_epi64 (__m64 __A)
     777              : {
     778              :   return _mm_set_epi64 ((__m64)0LL, __A);
     779              : }
     780              : 
     781              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     782              : _mm_move_epi64 (__m128i __A)
     783              : {
     784              :   return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
     785              : }
     786              : 
     787              : /* Create an undefined vector.  */
     788              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     789              : _mm_undefined_si128 (void)
     790              : {
     791              : #pragma GCC diagnostic push
     792              : #pragma GCC diagnostic ignored "-Winit-self"
     793              :   __m128i __Y = __Y;
     794              : #pragma GCC diagnostic pop
     795              :   return __Y;
     796              : }
     797              : 
     798              : /* Create a vector of zeros.  */
     799              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     800              : _mm_setzero_si128 (void)
     801              : {
     802              :   return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
     803              : }
     804              : 
     805              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     806              : _mm_cvtepi32_pd (__m128i __A)
     807              : {
     808              :   return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
     809              : }
     810              : 
     811              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     812              : _mm_cvtepi32_ps (__m128i __A)
     813              : {
     814              :   return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
     815              : }
     816              : 
     817              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     818              : _mm_cvtpd_epi32 (__m128d __A)
     819              : {
     820              :   return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
     821              : }
     822              : 
     823              : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     824              : _mm_cvtpd_pi32 (__m128d __A)
     825              : {
     826              :   return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
     827              : }
     828              : 
     829              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     830              : _mm_cvtpd_ps (__m128d __A)
     831              : {
     832              :   return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
     833              : }
     834              : 
     835              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     836              : _mm_cvttpd_epi32 (__m128d __A)
     837              : {
     838              :   return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
     839              : }
     840              : 
     841              : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     842              : _mm_cvttpd_pi32 (__m128d __A)
     843              : {
     844              :   return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
     845              : }
     846              : 
     847              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     848              : _mm_cvtpi32_pd (__m64 __A)
     849              : {
     850              :   return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
     851              : }
     852              : 
     853              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     854              : _mm_cvtps_epi32 (__m128 __A)
     855              : {
     856              :   return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
     857              : }
     858              : 
     859              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     860              : _mm_cvttps_epi32 (__m128 __A)
     861              : {
     862              :   return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
     863              : }
     864              : 
     865              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     866              : _mm_cvtps_pd (__m128 __A)
     867              : {
     868              :   return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
     869              : }
     870              : 
     871              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     872              : _mm_cvtsd_si32 (__m128d __A)
     873              : {
     874              :   return __builtin_ia32_cvtsd2si ((__v2df) __A);
     875              : }
     876              : 
     877              : #ifdef __x86_64__
     878              : /* Intel intrinsic.  */
     879              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     880              : _mm_cvtsd_si64 (__m128d __A)
     881              : {
     882              :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     883              : }
     884              : 
     885              : /* Microsoft intrinsic.  */
     886              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     887              : _mm_cvtsd_si64x (__m128d __A)
     888              : {
     889              :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     890              : }
     891              : #endif
     892              : 
     893              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     894              : _mm_cvttsd_si32 (__m128d __A)
     895              : {
     896              :   return __builtin_ia32_cvttsd2si ((__v2df) __A);
     897              : }
     898              : 
     899              : #ifdef __x86_64__
     900              : /* Intel intrinsic.  */
     901              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     902              : _mm_cvttsd_si64 (__m128d __A)
     903              : {
     904              :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     905              : }
     906              : 
     907              : /* Microsoft intrinsic.  */
     908              : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     909              : _mm_cvttsd_si64x (__m128d __A)
     910              : {
     911              :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     912              : }
     913              : #endif
     914              : 
     915              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     916              : _mm_cvtsd_ss (__m128 __A, __m128d __B)
     917              : {
     918              :   return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
     919              : }
     920              : 
     921              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     922              : _mm_cvtsi32_sd (__m128d __A, int __B)
     923              : {
     924              :   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
     925              : }
     926              : 
     927              : #ifdef __x86_64__
     928              : /* Intel intrinsic.  */
     929              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     930              : _mm_cvtsi64_sd (__m128d __A, long long __B)
     931              : {
     932              :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     933              : }
     934              : 
     935              : /* Microsoft intrinsic.  */
     936              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     937              : _mm_cvtsi64x_sd (__m128d __A, long long __B)
     938              : {
     939              :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     940              : }
     941              : #endif
     942              : 
     943              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     944              : _mm_cvtss_sd (__m128d __A, __m128 __B)
     945              : {
     946              :   return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
     947              : }
     948              : 
     949              : #ifdef __OPTIMIZE__
     950              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     951              : _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
     952              : {
     953              :   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
     954              : }
     955              : #else
     956              : #define _mm_shuffle_pd(A, B, N)                                         \
     957              :   ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),                \
     958              :                                    (__v2df)(__m128d)(B), (int)(N)))
     959              : #endif
     960              : 
     961              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     962              : _mm_unpackhi_pd (__m128d __A, __m128d __B)
     963              : {
     964              :   return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
     965              : }
     966              : 
     967              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     968              : _mm_unpacklo_pd (__m128d __A, __m128d __B)
     969              : {
     970              :   return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
     971              : }
     972              : 
     973              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     974              : _mm_loadh_pd (__m128d __A, double const *__B)
     975              : {
     976              :   return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
     977              : }
     978              : 
     979              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     980              : _mm_loadl_pd (__m128d __A, double const *__B)
     981              : {
     982              :   return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
     983              : }
     984              : 
     985              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     986              : _mm_movemask_pd (__m128d __A)
     987              : {
     988              :   return __builtin_ia32_movmskpd ((__v2df)__A);
     989              : }
     990              : 
     991              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     992              : _mm_packs_epi16 (__m128i __A, __m128i __B)
     993              : {
     994              :   return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
     995              : }
     996              : 
     997              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     998              : _mm_packs_epi32 (__m128i __A, __m128i __B)
     999              : {
    1000              :   return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
    1001              : }
    1002              : 
    1003              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1004              : _mm_packus_epi16 (__m128i __A, __m128i __B)
    1005              : {
    1006              :   return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
    1007              : }
    1008              : 
    1009              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1010              : _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
    1011              : {
    1012              :   return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
    1013              : }
    1014              : 
    1015              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1016              : _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
    1017              : {
    1018              :   return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
    1019              : }
    1020              : 
    1021              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1022              : _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
    1023              : {
    1024              :   return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
    1025              : }
    1026              : 
    1027              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1028              : _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
    1029              : {
    1030              :   return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
    1031              : }
    1032              : 
    1033              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1034              : _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
    1035              : {
    1036              :   return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
    1037              : }
    1038              : 
    1039              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1040              : _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
    1041              : {
    1042              :   return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
    1043              : }
    1044              : 
    1045              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1046              : _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
    1047              : {
    1048              :   return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
    1049              : }
    1050              : 
    1051              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1052              : _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
    1053              : {
    1054              :   return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
    1055              : }
    1056              : 
    1057              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1058              : _mm_add_epi8 (__m128i __A, __m128i __B)
    1059              : {
    1060              :   return (__m128i) ((__v16qu)__A + (__v16qu)__B);
    1061              : }
    1062              : 
    1063              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1064              : _mm_add_epi16 (__m128i __A, __m128i __B)
    1065              : {
    1066              :   return (__m128i) ((__v8hu)__A + (__v8hu)__B);
    1067              : }
    1068              : 
    1069              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1070              : _mm_add_epi32 (__m128i __A, __m128i __B)
    1071              : {
    1072              :   return (__m128i) ((__v4su)__A + (__v4su)__B);
    1073              : }
    1074              : 
    1075              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1076              : _mm_add_epi64 (__m128i __A, __m128i __B)
    1077              : {
    1078        57824 :   return (__m128i) ((__v2du)__A + (__v2du)__B);
    1079              : }
    1080              : 
    1081              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1082              : _mm_adds_epi8 (__m128i __A, __m128i __B)
    1083              : {
    1084              :   return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
    1085              : }
    1086              : 
    1087              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1088              : _mm_adds_epi16 (__m128i __A, __m128i __B)
    1089              : {
    1090              :   return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
    1091              : }
    1092              : 
    1093              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1094              : _mm_adds_epu8 (__m128i __A, __m128i __B)
    1095              : {
    1096              :   return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
    1097              : }
    1098              : 
    1099              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1100              : _mm_adds_epu16 (__m128i __A, __m128i __B)
    1101              : {
    1102              :   return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
    1103              : }
    1104              : 
    1105              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1106              : _mm_sub_epi8 (__m128i __A, __m128i __B)
    1107              : {
    1108              :   return (__m128i) ((__v16qu)__A - (__v16qu)__B);
    1109              : }
    1110              : 
    1111              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1112              : _mm_sub_epi16 (__m128i __A, __m128i __B)
    1113              : {
    1114              :   return (__m128i) ((__v8hu)__A - (__v8hu)__B);
    1115              : }
    1116              : 
    1117              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1118              : _mm_sub_epi32 (__m128i __A, __m128i __B)
    1119              : {
    1120              :   return (__m128i) ((__v4su)__A - (__v4su)__B);
    1121              : }
    1122              : 
    1123              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1124              : _mm_sub_epi64 (__m128i __A, __m128i __B)
    1125              : {
    1126              :   return (__m128i) ((__v2du)__A - (__v2du)__B);
    1127              : }
    1128              : 
    1129              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1130              : _mm_subs_epi8 (__m128i __A, __m128i __B)
    1131              : {
    1132              :   return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
    1133              : }
    1134              : 
    1135              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1136              : _mm_subs_epi16 (__m128i __A, __m128i __B)
    1137              : {
    1138              :   return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
    1139              : }
    1140              : 
    1141              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1142              : _mm_subs_epu8 (__m128i __A, __m128i __B)
    1143              : {
    1144              :   return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
    1145              : }
    1146              : 
    1147              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1148              : _mm_subs_epu16 (__m128i __A, __m128i __B)
    1149              : {
    1150              :   return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
    1151              : }
    1152              : 
    1153              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1154              : _mm_madd_epi16 (__m128i __A, __m128i __B)
    1155              : {
    1156              :   return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
    1157              : }
    1158              : 
    1159              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1160              : _mm_mulhi_epi16 (__m128i __A, __m128i __B)
    1161              : {
    1162              :   return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
    1163              : }
    1164              : 
    1165              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1166              : _mm_mullo_epi16 (__m128i __A, __m128i __B)
    1167              : {
    1168              :   return (__m128i) ((__v8hu)__A * (__v8hu)__B);
    1169              : }
    1170              : 
    1171              : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1172              : _mm_mul_su32 (__m64 __A, __m64 __B)
    1173              : {
    1174              :   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
    1175              : }
    1176              : 
    1177              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1178              : _mm_mul_epu32 (__m128i __A, __m128i __B)
    1179              : {
    1180        29640 :   return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
    1181              : }
    1182              : 
    1183              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1184              : _mm_slli_epi16 (__m128i __A, int __B)
    1185              : {
    1186              :   return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
    1187              : }
    1188              : 
    1189              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1190              : _mm_slli_epi32 (__m128i __A, int __B)
    1191              : {
    1192              :   return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
    1193              : }
    1194              : 
    1195              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1196              : _mm_slli_epi64 (__m128i __A, int __B)
    1197              : {
    1198         1456 :   return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
    1199              : }
    1200              : 
    1201              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1202              : _mm_srai_epi16 (__m128i __A, int __B)
    1203              : {
    1204              :   return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
    1205              : }
    1206              : 
    1207              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1208              : _mm_srai_epi32 (__m128i __A, int __B)
    1209              : {
    1210              :   return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
    1211              : }
    1212              : 
    1213              : #ifdef __OPTIMIZE__
    1214              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1215              : _mm_bsrli_si128 (__m128i __A, const int __N)
    1216              : {
    1217              :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1218              : }
    1219              : 
    1220              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1221              : _mm_bslli_si128 (__m128i __A, const int __N)
    1222              : {
    1223              :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1224              : }
    1225              : 
    1226              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1227              : _mm_srli_si128 (__m128i __A, const int __N)
    1228              : {
    1229              :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1230              : }
    1231              : 
    1232              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1233              : _mm_slli_si128 (__m128i __A, const int __N)
    1234              : {
    1235              :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1236              : }
    1237              : #else
    1238              : #define _mm_bsrli_si128(A, N) \
    1239              :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1240              : #define _mm_bslli_si128(A, N) \
    1241              :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1242              : #define _mm_srli_si128(A, N) \
    1243              :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1244              : #define _mm_slli_si128(A, N) \
    1245              :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1246              : #endif
    1247              : 
    1248              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1249              : _mm_srli_epi16 (__m128i __A, int __B)
    1250              : {
    1251              :   return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
    1252              : }
    1253              : 
    1254              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1255              : _mm_srli_epi32 (__m128i __A, int __B)
    1256              : {
    1257              :   return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
    1258              : }
    1259              : 
    1260              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1261              : _mm_srli_epi64 (__m128i __A, int __B)
    1262              : {
    1263         1456 :   return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
    1264              : }
    1265              : 
    1266              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1267              : _mm_sll_epi16 (__m128i __A, __m128i __B)
    1268              : {
    1269              :   return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
    1270              : }
    1271              : 
    1272              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1273              : _mm_sll_epi32 (__m128i __A, __m128i __B)
    1274              : {
    1275              :   return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
    1276              : }
    1277              : 
    1278              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1279              : _mm_sll_epi64 (__m128i __A, __m128i __B)
    1280              : {
    1281              :   return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
    1282              : }
    1283              : 
    1284              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1285              : _mm_sra_epi16 (__m128i __A, __m128i __B)
    1286              : {
    1287              :   return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
    1288              : }
    1289              : 
    1290              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1291              : _mm_sra_epi32 (__m128i __A, __m128i __B)
    1292              : {
    1293              :   return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
    1294              : }
    1295              : 
    1296              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1297              : _mm_srl_epi16 (__m128i __A, __m128i __B)
    1298              : {
    1299              :   return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
    1300              : }
    1301              : 
    1302              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1303              : _mm_srl_epi32 (__m128i __A, __m128i __B)
    1304              : {
    1305              :   return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
    1306              : }
    1307              : 
    1308              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1309              : _mm_srl_epi64 (__m128i __A, __m128i __B)
    1310              : {
    1311              :   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
    1312              : }
    1313              : 
    1314              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1315              : _mm_and_si128 (__m128i __A, __m128i __B)
    1316              : {
    1317              :   return (__m128i) ((__v2du)__A & (__v2du)__B);
    1318              : }
    1319              : 
    1320              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1321              : _mm_andnot_si128 (__m128i __A, __m128i __B)
    1322              : {
    1323              :   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
    1324              : }
    1325              : 
    1326              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1327              : _mm_or_si128 (__m128i __A, __m128i __B)
    1328              : {
    1329              :   return (__m128i) ((__v2du)__A | (__v2du)__B);
    1330              : }
    1331              : 
    1332              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1333              : _mm_xor_si128 (__m128i __A, __m128i __B)
    1334              : {
    1335        31096 :   return (__m128i) ((__v2du)__A ^ (__v2du)__B);
    1336              : }
    1337              : 
    1338              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1339              : _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
    1340              : {
    1341              :   return (__m128i) ((__v16qi)__A == (__v16qi)__B);
    1342              : }
    1343              : 
    1344              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1345              : _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
    1346              : {
    1347              :   return (__m128i) ((__v8hi)__A == (__v8hi)__B);
    1348              : }
    1349              : 
    1350              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1351              : _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
    1352              : {
    1353              :   return (__m128i) ((__v4si)__A == (__v4si)__B);
    1354              : }
    1355              : 
    1356              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1357              : _mm_cmplt_epi8 (__m128i __A, __m128i __B)
    1358              : {
    1359              :   return (__m128i) ((__v16qs)__A < (__v16qs)__B);
    1360              : }
    1361              : 
    1362              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1363              : _mm_cmplt_epi16 (__m128i __A, __m128i __B)
    1364              : {
    1365              :   return (__m128i) ((__v8hi)__A < (__v8hi)__B);
    1366              : }
    1367              : 
    1368              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1369              : _mm_cmplt_epi32 (__m128i __A, __m128i __B)
    1370              : {
    1371              :   return (__m128i) ((__v4si)__A < (__v4si)__B);
    1372              : }
    1373              : 
    1374              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1375              : _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
    1376              : {
    1377              :   return (__m128i) ((__v16qs)__A > (__v16qs)__B);
    1378              : }
    1379              : 
    1380              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1381              : _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
    1382              : {
    1383              :   return (__m128i) ((__v8hi)__A > (__v8hi)__B);
    1384              : }
    1385              : 
    1386              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1387              : _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
    1388              : {
    1389              :   return (__m128i) ((__v4si)__A > (__v4si)__B);
    1390              : }
    1391              : 
    1392              : #ifdef __OPTIMIZE__
    1393              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1394              : _mm_extract_epi16 (__m128i const __A, int const __N)
    1395              : {
    1396              :   return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
    1397              : }
    1398              : 
    1399              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1400              : _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
    1401              : {
    1402              :   return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
    1403              : }
    1404              : #else
    1405              : #define _mm_extract_epi16(A, N) \
    1406              :   ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
    1407              : #define _mm_insert_epi16(A, D, N)                               \
    1408              :   ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
    1409              :                                           (int)(D), (int)(N)))
    1410              : #endif
    1411              : 
    1412              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1413              : _mm_max_epi16 (__m128i __A, __m128i __B)
    1414              : {
    1415              :   return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
    1416              : }
    1417              : 
    1418              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1419              : _mm_max_epu8 (__m128i __A, __m128i __B)
    1420              : {
    1421              :   return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
    1422              : }
    1423              : 
    1424              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1425              : _mm_min_epi16 (__m128i __A, __m128i __B)
    1426              : {
    1427              :   return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
    1428              : }
    1429              : 
    1430              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1431              : _mm_min_epu8 (__m128i __A, __m128i __B)
    1432              : {
    1433              :   return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
    1434              : }
    1435              : 
    1436              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1437              : _mm_movemask_epi8 (__m128i __A)
    1438              : {
    1439              :   return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
    1440              : }
    1441              : 
    1442              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1443              : _mm_mulhi_epu16 (__m128i __A, __m128i __B)
    1444              : {
    1445              :   return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
    1446              : }
    1447              : 
    1448              : #ifdef __OPTIMIZE__
    1449              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1450              : _mm_shufflehi_epi16 (__m128i __A, const int __mask)
    1451              : {
    1452              :   return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
    1453              : }
    1454              : 
    1455              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1456              : _mm_shufflelo_epi16 (__m128i __A, const int __mask)
    1457              : {
    1458              :   return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
    1459              : }
    1460              : 
    1461              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1462              : _mm_shuffle_epi32 (__m128i __A, const int __mask)
    1463              : {
    1464              :   return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
    1465              : }
    1466              : #else
    1467              : #define _mm_shufflehi_epi16(A, N) \
    1468              :   ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
    1469              : #define _mm_shufflelo_epi16(A, N) \
    1470              :   ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
    1471              : #define _mm_shuffle_epi32(A, N) \
    1472              :   ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
    1473              : #endif
    1474              : 
    1475              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1476              : _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
    1477              : {
    1478              :   __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
    1479              : }
    1480              : 
    1481              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1482              : _mm_avg_epu8 (__m128i __A, __m128i __B)
    1483              : {
    1484              :   return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
    1485              : }
    1486              : 
    1487              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1488              : _mm_avg_epu16 (__m128i __A, __m128i __B)
    1489              : {
    1490              :   return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
    1491              : }
    1492              : 
    1493              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1494              : _mm_sad_epu8 (__m128i __A, __m128i __B)
    1495              : {
    1496              :   return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
    1497              : }
    1498              : 
    1499              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1500              : _mm_stream_si32 (int *__A, int __B)
    1501              : {
    1502              :   __builtin_ia32_movnti (__A, __B);
    1503              : }
    1504              : 
    1505              : #ifdef __x86_64__
    1506              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1507              : _mm_stream_si64 (long long int *__A, long long int __B)
    1508              : {
    1509              :   __builtin_ia32_movnti64 (__A, __B);
    1510              : }
    1511              : #endif
    1512              : 
    1513              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1514              : _mm_stream_si128 (__m128i *__A, __m128i __B)
    1515              : {
    1516              :   __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
    1517              : }
    1518              : 
    1519              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1520              : _mm_stream_pd (double *__A, __m128d __B)
    1521              : {
    1522              :   __builtin_ia32_movntpd (__A, (__v2df)__B);
    1523              : }
    1524              : 
    1525              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1526              : _mm_clflush (void const *__A)
    1527              : {
    1528              :   __builtin_ia32_clflush (__A);
    1529              : }
    1530              : 
    1531              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1532              : _mm_lfence (void)
    1533              : {
    1534              :   __builtin_ia32_lfence ();
    1535              : }
    1536              : 
    1537              : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1538              : _mm_mfence (void)
    1539              : {
    1540              :   __builtin_ia32_mfence ();
    1541              : }
    1542              : 
    1543              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1544              : _mm_cvtsi32_si128 (int __A)
    1545              : {
    1546              :   return _mm_set_epi32 (0, 0, 0, __A);
    1547              : }
    1548              : 
    1549              : #ifdef __x86_64__
    1550              : /* Intel intrinsic.  */
    1551              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1552              : _mm_cvtsi64_si128 (long long __A)
    1553              : {
    1554              :   return _mm_set_epi64x (0, __A);
    1555              : }
    1556              : 
    1557              : /* Microsoft intrinsic.  */
    1558              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1559              : _mm_cvtsi64x_si128 (long long __A)
    1560              : {
    1561              :   return _mm_set_epi64x (0, __A);
    1562              : }
    1563              : #endif
    1564              : 
    1565              : /* Casts between various SP, DP, INT vector types.  Note that these do no
    1566              :    conversion of values, they just change the type.  */
    1567              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1568              : _mm_castpd_ps(__m128d __A)
    1569              : {
    1570              :   return (__m128) __A;
    1571              : }
    1572              : 
    1573              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1574              : _mm_castpd_si128(__m128d __A)
    1575              : {
    1576              :   return (__m128i) __A;
    1577              : }
    1578              : 
    1579              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1580              : _mm_castps_pd(__m128 __A)
    1581              : {
    1582              :   return (__m128d) __A;
    1583              : }
    1584              : 
    1585              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1586              : _mm_castps_si128(__m128 __A)
    1587              : {
    1588              :   return (__m128i) __A;
    1589              : }
    1590              : 
    1591              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1592              : _mm_castsi128_ps(__m128i __A)
    1593              : {
    1594              :   return (__m128) __A;
    1595              : }
    1596              : 
    1597              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1598              : _mm_castsi128_pd(__m128i __A)
    1599              : {
    1600              :   return (__m128d) __A;
    1601              : }
    1602              : 
    1603              : #ifdef __DISABLE_SSE2__
    1604              : #undef __DISABLE_SSE2__
    1605              : #pragma GCC pop_options
    1606              : #endif /* __DISABLE_SSE2__ */
    1607              : 
    1608              : #endif /* _EMMINTRIN_H_INCLUDED */
        

Generated by: LCOV version 2.0-1