/***************************************************************************
 *      _   _  ___  ____                                                   *
 *     | \ | |/ _ \/ ___|                     N.A.Software Ltd             *
 *     |  \| | |_| \___ \                     1 Prospect Road              *
 *     | |\  |  _  |___) |                    Birkenhead                   *
 *     |_| \_|_| |_|____/                     CH42 8LE                     *
 *                                            England                      *
 *                                                                         *
 *     Produced by NA Software Ltd 2009       +44 (0) 151 609 1911         *
 *     COPYRIGHT (c) Intel Corporation.                                    *
 *                                                                         *
 ***************************************************************************
 *                                                                         *
 *  altivec2avx.h                                                          *
 *                                                                         *
 *                                                                         *
 ***************************************************************************
 *                                                                         *
 *  CVS Information.                                                       *
 *                                                                         *
 *                                                                         *
 * Project      :  PowerPC to IA Conversion.                               *
 * Filename     :  $Source: /usr/local/cvs/INTEL/altivec2avx/src/altivec2avx.h,v $*
 * Originator   :  Dave Murray                                             *
 * Last editor  :  $Author: ian $                                        *
 * File version :  $Revision: 1.1 $                                        *
 * Last changed :  $Date: 2010/11/25 17:05:18 $                            *
 * Description  :  altivev2avx include file. This file is used instead of  *
 *                 of altivec.h on the PowerPC. The file maps altivec      *
 *                 instructions onto AVX and SSE4.1 instructions.          *
 *                                                                         *
 *                                                                         *
 ***************************************************************************
 *                                                                         *
 *  History.                                                               *
 *                                                                         *
 *  Alpha release.   1/09/08.  First release version.                      *
 *  Gold release.    7/01/09.  Fully tested version.                       *
 *  Version 1.0.0.  17/03/09.  Additional testing. Code clean up.          *
 *  Version 1.1.0.   6/05/10.  Added AVX and SSE4.1 instructions.          *
 *                                                                         *
 ***************************************************************************
 *                                                                         *
 *  Description.                                                           *
 *                                                                         *
 * This include file is a partial translation of the file "altivec.h"      *
 * distributed with gcc.                                                   *
 *                                                                         *
 * There is further documentation in altivec2sse.pdf.                      *
 *                                                                         * 
 * This file is an aid to porting PowerPC/altivec code to run on           *
 * Intel/SSE2 processors. Replace the original altivec.h header file       *
 * with this one and recompile on an Intel machine with something like     *
 * "gcc -msse2 myfile.c".                                                  *
 *                                                                         *
 * In this modified version of the file, the instructions in the           *
 * Motorola Spec (ALTIVECPIM) target Intel assembler where possible.       *
 * Some instructions with no direct mapping are simulated with             *
 * inline functions, most of which are SIMD code, but a few process        *
 * each element of the vector serially.                                    *
 *                                                                         *
 * Coverage is about 75% complete and parts of your sources will           *
 * almost certainly need to be modified by hand. This includes code        *
 * for handling misaligned data. Other things that are missing include:    *
 *  * versions of functions that saturate the result on overflow           *
 *  * some functions do not have versions for all data types               *
 *  * function for which the position of individual elements matters       *
 *    (eg pack, perm).                                                     *
 *                                                                         *
 * Altivec is big-endian and SSE is little-endian. This means that the     *
 * elements of an xmm vector register are in reverse order, so any         *
 * instructions that depend on the position within a vector need to be     *
 * analysed to see whether the data are in register order or memory order. *
 * The splat and merge instructions have two forms (see implementations    *
 * for further comments). Define the macro MEMORY_ORDER to select between  *
 * them.                                                                   *
 *                                                                         *
 * Do not expect your highly tuned altivec code to be translated into      *
 * high-performance SSE. You will get a quick and easy first cut.          *
 * If you want high-performance SSE, you will need to adapt your           *
 * programming style: Intel has out-of-order execution and fewer           *
 * registers, so loop unrolling and other tricks that keep the altivec     *
 * pipeline full don't work.                                               *
 *                                                                         *
 ***************************************************************************/




/***************************************************************************
 * System Include Files.                                                   *
 ***************************************************************************/
#include "xmmintrin.h"  /* SSE instructions */
#include "emmintrin.h"  /* SSE2 instructions */
#include "immintrin.h"  /* AVX instructions */




/***************************************************************************
 * Definitions.                                                            *
 ***************************************************************************/

#ifndef _ALTIVEC_H
#define _ALTIVEC_H 1
#define __VEC__ 10206

#define __vector __attribute__((vector_size(16)))

/* You are allowed to undef this for C++ compatibility.  */
#define vector __vector

#define bool signed
#define pixel unsigned short
#define __pixel unsigned short

/* Dummy prototype.  */
extern int __ERROR_INVALID_ARGUMENT();

/* intel and altivec invert different arguments */
#define reversed_andnot_ps(A,B) _mm_andnot_ps((B),(A))
#define reversed_andnot_si128(A,B) _mm_andnot_si128((B),(A))
#ifdef MEMORY_ORDER
#define SWAP32(x) (x)
#else
/* Swap 0123 -> 3201 */
#define SWAP32(x) _mm_shuffle_pd((__m128d)(x), (__m128d)(x), 1)
#endif

/* addresses in vec_ld and vec_st are aligned on 16-byte boundaries */
#define V4SI_ADDRESS(A,B) ((__m128i *)( ((size_t) ((char*)(B)) + ((size_t)(A))) & ~15UL))
#define V4SF_ADDRESS(A,B) ((float *)( ((size_t) ((char*)(B)) + ((size_t)(A))) & ~15UL))

/* Helper macros.  */

#define __un_args_eq(xtype, x)						\
	__builtin_types_compatible_p (xtype, typeof (x))

#define __bin_args_eq(xtype, x, ytype, y)				\
	(__builtin_types_compatible_p (xtype, typeof (x))		\
	 && __builtin_types_compatible_p (ytype, typeof (y)))

#define __tern_args_eq(xtype, x, ytype, y, ztype, z)                    \
        (__builtin_types_compatible_p (xtype, typeof (x))               \
         && __builtin_types_compatible_p (ytype, typeof (y))		\
	 && __builtin_types_compatible_p (ztype, typeof (z)))

#define __ch(x, y, z)	__builtin_choose_expr (x, y, z)

/* These are easy... Same exact arguments.  */

#define vec_vaddcuw vec_addc
#define vec_vand vec_and
#define vec_vandc vec_andc
#define vec_vrfip vec_ceil
#define vec_vcmpbfp vec_cmpb
#define vec_vcmpgefp vec_cmpge
#define vec_vctsxs vec_cts
#define vec_vctuxs vec_ctu
#define vec_vexptefp vec_expte
#define vec_vrfim vec_floor
#define vec_lvx vec_ld
#define vec_vlogefp vec_loge
#define vec_vmaddfp vec_madd
#define vec_vmhaddshs vec_madds
#define vec_vmladduhm vec_mladd
#define vec_vmhraddshs vec_mradds
#define vec_vnmsubfp vec_nmsub
#define vec_vnor vec_nor
#define vec_vor vec_or
#define vec_vpkpx vec_packpx
#define vec_vperm vec_perm
#define vec_vrefp vec_re
#define vec_vrfin vec_round
#define vec_vrsqrtefp vec_rsqrte
#define vec_vsel vec_sel
#define vec_vsldoi vec_sld
#define vec_vsl vec_sll
#define vec_vslo vec_slo
#define vec_vspltisb vec_splat_s8
#define vec_vspltish vec_splat_s16
#define vec_vspltisw vec_splat_s32
#define vec_vsr vec_srl
#define vec_vsro vec_sro
#define vec_stvx vec_st
#define vec_vsubcuw vec_subc
#define vec_vsum2sws vec_sum2s
#define vec_vsumsws vec_sums
#define vec_vrfiz vec_trunc
#define vec_vxor vec_xor




/**************************************************************************/
/* START OF ADDITIONS FOR INTEL                                           */
/**************************************************************************/
  
/* these are just dummy functions */
#define vec_dss(a1)
#define vec_dssall()
#define vec_dst(a1,a2,a3) 
#define vec_dstst(a1,a2,a3)
#define vec_dstt(a1,a2,a3)
#define vec_dststt(a1,a2,a3)
#define vec_mtvscr(a1)
static inline vector unsigned short vec_mfvscr(void) {
  return (vector unsigned short){0,0,0,0};
}

/* ignore LRU hints on load/store */
#define vec_ldl vec_ld
#define vec_stl vec_st
#define vec_lvxl vec_lvx
#define vec_stvxl vec_stvx




/**************************************************************************/
/* Unions for access to individual elements.                              */
/**************************************************************************/

typedef union {
    vector unsigned char v;
    unsigned char s[16];
} mix_u8;

typedef union {
    vector unsigned short v;
    unsigned short s[8];
} mix_u16;

typedef union {
    vector unsigned int v;
    unsigned int s[4];
} mix_u32;

typedef union {
    vector signed char v;
    signed char s[16];
} mix_s8;

typedef union {
    vector signed short v;
    signed short s[8];
} mix_s16;

typedef union {
    vector signed int v;
    signed int s[4];
} mix_s32;

typedef union {
    vector float v;
    float s[4];
} mix_f32;




/**************************************************************************/
/* Emulations for some of the functions with no direct mapping.           */
/**************************************************************************/

static inline vector signed char __IA32_abs_v16qi(vector signed char a)
{
  const vector signed char zero = _mm_setzero_si128();
  vector signed char nega, mask;
  nega = _mm_sub_epi8(zero, a);
  mask = _mm_cmpgt_epi8(a, nega);
  return (vector signed char) _mm_or_si128(_mm_and_si128(mask, a),
					   _mm_andnot_si128(mask, nega));  
}
    
static inline vector signed short __IA32_abs_v8hi(vector signed short a)
{
  const vector signed short zero = _mm_setzero_si128();
  return (vector signed short) _mm_max_epi16(a, _mm_sub_epi16(zero, a));
}

static inline vector signed int __IA32_abs_v4si(vector signed int a)
{
  const vector signed int zero = _mm_setzero_si128();
  vector signed int nega, mask;
  nega = _mm_sub_epi32(zero, a);
  mask = _mm_cmpgt_epi32(a, nega);
  return (vector signed int) _mm_or_si128(_mm_and_si128(mask, a),
					   _mm_andnot_si128(mask, nega));  
}

static inline vector float __IA32_abs_v4sf(vector float a)
{
  return (vector float) _mm_andnot_ps((vector float) _mm_set1_epi32(0x80000000), a); 
}

static inline vector signed char __IA32_abss_v16qi(vector signed char a)
{
  const vector signed char zero = _mm_setzero_si128();
  vector signed char nega, mask;
  nega = _mm_subs_epi8(zero, a);
  mask = _mm_cmpgt_epi8(a, nega);
  return (vector signed char) _mm_or_si128(_mm_and_si128(mask, a),
					   _mm_andnot_si128(mask, nega));  
}
    
static inline vector signed short __IA32_abss_v8hi(vector signed short a)
{
  const vector signed short zero = _mm_setzero_si128();
  return (vector signed short) _mm_max_epi16(a, _mm_subs_epi16(zero, a));
}

static inline vector signed int __IA32_abss_v4si(vector signed int a)
{
  const vector signed int zero = _mm_setzero_si128();
  const vector signed int signbit = _mm_set1_epi32(0x80000000);
  vector signed int nega, mask;
  nega = _mm_sub_epi32(zero, a);

  /* saturation means 0x80000000 must map to 0x7FFFFFFF */
  mask = _mm_cmpeq_epi32(a, signbit);
  nega = _mm_or_si128(_mm_andnot_si128(signbit, mask),
                      _mm_andnot_si128(mask, nega)); 
  
  mask = _mm_cmpgt_epi32(a, nega);
  return (vector signed int) _mm_or_si128(_mm_and_si128(mask, a),
				          _mm_andnot_si128(mask, nega));  
}
/************************************************************************/
static inline vector unsigned int __IA32_vavguw (vector unsigned int a1, vector unsigned int a2)
{
  const vector unsigned int one = _mm_set1_epi32(1);
  vector unsigned int Y;
  
  /* doing it this way stops overflow of the intermediate result */
  Y = _mm_and_si128(one, _mm_or_si128(a1, a2));
  Y = _mm_add_epi32(_mm_srli_epi32(a1, 1), Y);
  Y = _mm_add_epi32(_mm_srli_epi32(a2, 1), Y);
  return (vector unsigned int) Y;
}

static inline vector signed char __IA32_vavgsb (vector signed char a1, vector signed char a2)
{
  const vector signed char one = _mm_set1_epi8(1);
  const vector signed char signbit = _mm_set1_epi8(0x80);
  vector signed char Y, mask, tmp;
  
  Y = _mm_and_si128(one, _mm_or_si128(a1, a2));
  
  /* _mm_sra_epi8 doesn't exist so fake it */
  mask = _mm_and_si128(a1, signbit);
  tmp = (vector signed char) _mm_srli_epi16((vector short) a1, 1);
  tmp = _mm_andnot_si128(signbit, tmp);
  tmp = _mm_or_si128(mask, tmp);
  
  Y = _mm_add_epi8(Y, tmp);

  mask = _mm_and_si128(a2, signbit);
  tmp = (vector signed char) _mm_srli_epi16((vector short) a2, 1);
  tmp = _mm_andnot_si128(signbit, tmp);
  tmp = _mm_or_si128(mask, tmp);
  
  Y = _mm_add_epi8(Y, tmp);

  return (vector signed char) Y;
}

static inline vector signed short __IA32_vavgsh (vector signed short a1, vector signed short a2)
{
  const vector signed short one = _mm_set1_epi16(1);
  vector signed short Y;

  Y = _mm_and_si128(one, _mm_or_si128(a1, a2));
  Y = _mm_add_epi32(_mm_srai_epi16(a1, 1), Y);
  Y = _mm_add_epi32(_mm_srai_epi16(a2, 1), Y);
  return (vector signed short) Y;
}

static inline vector signed int __IA32_vavgsw (vector signed int a1, vector signed int a2)
{
  const vector signed int one = _mm_set1_epi32(1);
  vector signed int Y;
  
  Y = _mm_and_si128(one, _mm_or_si128(a1, a2));
  Y = _mm_add_epi32(_mm_srai_epi32(a1, 1), Y);
  Y = _mm_add_epi32(_mm_srai_epi32(a2, 1), Y);
  return (vector signed int) Y;
  
}
/************************************************************************/
/*  Float/Integer Conversions                                           */
/************************************************************************/
static inline vector float
__IA32_vrfiz (vector float a1)
{
#if defined(__AVX__) || defined(__SSE5__) || defined(__SSE4_1__)
  /* round towards zero vec_trunc */
  return (vector float) _mm_round_ps(a1, _MM_FROUND_TO_ZERO);
#else
  /* round towards zero */
  const vector float largest = (vector float) _mm_set1_epi32(0x4AFFFFFF);
  const vector float signbit = (vector float) _mm_set1_epi32(0x80000000);

  vector float Y, absX, mask;

  /* truncate */
  Y = _mm_cvtepi32_ps(_mm_cvttps_epi32(a1));

  /* check for large X */
  absX = _mm_andnot_ps(signbit, a1);
  mask = _mm_cmpgt_ps(absX, largest);
  Y    = _mm_or_ps(_mm_and_ps(mask, a1), _mm_andnot_ps(mask, Y));

  /* fix NaN */
  mask = _mm_cmpneq_ps(a1, a1);
  Y    = _mm_or_ps(mask, Y);
  
  return (vector float) Y; 
#endif
}

static inline vector float
__IA32_vrfip (vector float a1)
{
#if defined(__AVX__) || defined(__SSE5__) || defined(__SSE4_1__)
  /* round up towards +infinity vec_ceil */
  return (vector float) _mm_round_ps(a1, _MM_FROUND_TO_POS_INF);
#else
  /* round up towards +infinity */
  const vector float largest = (vector float) _mm_set1_epi32(0x4AFFFFFF);
  const vector float signbit = (vector float) _mm_set1_epi32(0x80000000);
  const vector float one     = _mm_set1_ps(1.0);

  vector float Y, absX, mask;

  /* truncate */
  Y = _mm_cvtepi32_ps(_mm_cvttps_epi32(a1));

  /* fix for a1 > 0 */
  mask = _mm_cmpgt_ps(a1, Y);    
  mask = _mm_and_ps(mask, one);
  Y    = _mm_add_ps(Y, mask);

  /* check for large a1 */
  absX = _mm_andnot_ps(signbit, a1);
  mask = _mm_cmpgt_ps(absX, largest);
  Y    = _mm_or_ps(_mm_and_ps(mask, a1), _mm_andnot_ps(mask, Y));

  /* fix NaN */
  mask = _mm_cmpneq_ps(a1, a1);
  Y    = _mm_or_ps(mask, Y);
  
  return (vector float) Y; 
#endif
}

static inline vector float
__IA32_vrfim (vector float a1)
{
#if defined(__AVX__) || defined(__SSE5__) || defined(__SSE4_1__)
  /* round down towards -infinity vec_floor */
  return (vector float) _mm_round_ps(a1, _MM_FROUND_TO_NEG_INF);
#else
  /* round down towards -infinity */
  const vector float largest = (vector float) _mm_set1_epi32(0x4AFFFFFF);
  const vector float signbit = (vector float) _mm_set1_epi32(0x80000000);
  const vector float one     = _mm_set1_ps(1.0);

  vector float Y, absX, mask;

  /* truncate */
  Y = _mm_cvtepi32_ps(_mm_cvttps_epi32(a1));

  /* fix for a1 < 0 */
  mask = _mm_cmplt_ps(a1, Y);    
  mask = _mm_and_ps(mask, one);
  Y    = _mm_sub_ps(Y, mask);

  /* check for large a1 */
  absX = _mm_andnot_ps(signbit, a1);
  mask = _mm_cmpgt_ps(absX, largest);
  Y    = _mm_or_ps(_mm_and_ps(mask, a1), _mm_andnot_ps(mask, Y));

  /* fix NaN */
  mask = _mm_cmpneq_ps(a1, a1);
  Y    = _mm_or_ps(mask, Y);
  
  return (vector float) Y;
#endif
}

static inline vector float
__IA32_vrfin (vector float a1)
{
#if defined(__AVX__) || defined(__SSE5__) || defined(__SSE4_1__)
  /* round to nearest - to even if half-way vec_round */
  return (vector float) _mm_round_ps(a1, _MM_FROUND_TO_NEAREST_INT);
#else
  /* round to nearest - to even if half-way */
  const vector float largest = (vector float) _mm_set1_epi32(0x4AFFFFFF);
  const vector float signbit = (vector float) _mm_set1_epi32(0x80000000);
  const vector float half    = _mm_set1_ps(0.5);
  const vector signed int one = _mm_set1_epi32(1);

  vector float Y, absX, mask, diff, sign;
  vector signed int x, even, delta;

  sign = _mm_and_ps(signbit, a1);
  absX = _mm_andnot_ps(signbit, a1);
  x = _mm_cvttps_epi32(absX);

  /* see whether to round up */
  Y = _mm_cvtepi32_ps(x);
  diff = _mm_sub_ps(absX, Y);
  
  mask = _mm_cmpeq_ps(diff, half);
  even = _mm_and_si128(x, one);
  even = _mm_and_si128((vector signed int) mask, even);
  
  mask = _mm_cmpgt_ps(diff, half);
  delta = _mm_and_si128((vector signed int) mask, one);
  
  delta = _mm_or_si128(even, delta);
  
  x = _mm_add_epi32(x, delta);  
  Y = _mm_cvtepi32_ps(x);
  Y = _mm_or_ps(Y, sign);
  
  /* check for large a1 */
  mask = _mm_cmpgt_ps(absX, largest);
  Y    = _mm_or_ps(_mm_and_ps(mask, a1), _mm_andnot_ps(mask, Y));

  /* fix NaN */
  mask = _mm_cmpneq_ps(a1, a1);
  Y    = _mm_or_ps(mask, Y);
  
  return (vector float) Y;
#endif
}
/************************************************************************/
static inline vector float __IA32_vcfsx (vector signed int X, unsigned char b)
{
  vector float Y;
  vector signed int x;
  
  Y = _mm_cvtepi32_ps(X);
  
  if (b > 0) {
    /* divide by 2^b */
    x = _mm_set1_epi32(b);
    x = _mm_slli_epi32(x, 23);
    Y = (vector float) _mm_sub_epi32((vector signed int) Y, x);
  }

  return (vector float) Y;
}

static inline vector signed int __IA32_vctsxs (vector float X, unsigned char b)
{
  const vector float max_int_4f = _mm_set1_ps(2147483647.0);
  vector signed int x, Y, mask;
  
  if (b > 0) {
    /* multiply by 2^b */
    x = _mm_set1_epi32(b);
    x = _mm_slli_epi32(x, 23);
    X = (vector float) _mm_add_epi32((vector signed int) X, x);
  }

  Y = _mm_cvttps_epi32((vector float) X);

  /* floats bigger than MAX_INT get mapped to 0x80000000 (which is
     -MAX_INT) but saturation requires us to get 0x7FFFFFFF.
  */
  mask = (vector signed int) _mm_cmpgt_ps(X, max_int_4f);
  Y = _mm_xor_si128(mask, Y);

  /* NaN maps to zero */
  mask = (vector signed int) _mm_cmpeq_ps(X, X);
  Y = _mm_and_si128(mask, Y);
  
  return (vector signed int) Y;
}

static inline vector char 
myvec_perm(vector char va, vector char vb, vector char vc)
{
  int i;
  mix_u8 a, b, c, r;

  a.v = va;
  b.v = vb;
  c.v = vc;
  for (i = 0; i < 16; i++) {
    /* j <- c{i}[4:7] */
    int idx = c.s[i];
    int j = (idx & 0xf);
    int bit = (idx & 0x10);
    r.s[i] = (bit == 0) ? a.s[j] : b.s[j];
    /* printf("%2d c=0x%02X  from=%2d%c r=%02x\n", i, idx, j, !!bit?'A':'B', r.s[i]);*/
  }
  return r.v;
}


static inline vector signed int __IA32_vperm_4si (vector signed int A, 
						  vector signed int B,
						  vector char C) 
{
  /* AVX version give incorrect results unfortunately.
vec_perm FAILED at line 770
A = 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175
B = 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191
C =   2,   3,   6,   7,  10,  11,  14,  15,  18,  19,  22,  23,  26,  27,  30,  31
gave
160, 161, 162, 163, 168, 169, 170, 171, 176, 177, 178, 179, 184, 185, 186, 187
sum2 should be
162, 163, 166, 167, 170, 171, 174, 175, 178, 179, 182, 183, 186, 187, 190, 191
   */
#if 0
  vector signed int _result;
  vector float vfx, vfy;
  vector char vcx,vcy;

  vcx = _mm_srai_epi32((vector signed int)C, 2);
  vfx = _mm_permutevar_ps((vector float)A, vcx);
  vfy = _mm_permutevar_ps((vector float)B, vcx);
  vcy = _mm_slli_epi16((vector char)C, 3);
  _result = (vector signed int)_mm_blendv_ps(vfx, vfy, (vector float)vcy);

  return _result;
#endif
  return (vector signed int)myvec_perm((vector char)A, (vector char)B, C);
}

static inline vector unsigned char __IA32_lvsr(int a, void *b)
{
 vector unsigned char vcx, vcy;
 unsigned char c =  ((unsigned char)a + (size_t)b)&0x0f;
#ifdef MEMORY_ORDER
 vector unsigned char vpat2 = 
   { 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17};

 if (c >= 8) c -= 0x10;
 vcx =  (vector unsigned char)_mm_set1_epi8(c);
 vcy = (vector unsigned char)_mm_sub_epi8(vpat2, vcx);
#else
 vector unsigned char vpat2 = 
   { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f };

 if (c < 8) 
   c += 8;
 else if (c >= 8)
   c -= 8;
 vcx =  (vector unsigned char)_mm_set1_epi8(c);
 vcy = (vector unsigned char)_mm_sub_epi8(vpat2, vcx);
#endif

 return(vcy);
}

static inline vector unsigned char __IA32_lvsl(int a, void *b)
{
 vector unsigned char vcx, vcy;
 unsigned char c = ((unsigned char)a + (size_t)b)&0x0f;
#ifdef MEMORY_ORDER
 vector unsigned char vpat2 = 
   { 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17};

 if (c >= 8) c -= 0x10;
 vcx = (vector unsigned char)_mm_set1_epi8(c);
 vcy = (vector unsigned char)_mm_add_epi8(vpat2, vcx);
#else
 vector unsigned char vpat1 = 
   { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };

 if (c < 8) 
   c += 8;
 else if (c >= 8)
   c -= 8;
 vcx =  (vector unsigned char)_mm_set1_epi8(c);
 vcy = (vector unsigned char)_mm_add_epi8(vpat1, vcx);
#endif

 return(vcy);
}

/************************************************************************/
/*  Predicates                                                          */
/************************************************************************/
static inline int any_true_4f(vector float a1)
{
  return (_mm_movemask_ps(a1) > 0x0000);
}

static inline int any_false_4f(vector float a1)
{
  return (_mm_movemask_ps(a1) < 0x000F);
}

static inline int all_true_4f(vector float a1)
{
  return (_mm_movemask_ps(a1) == 0x000F);
}

static inline int all_false_4f(vector float a1)
{
  return (_mm_movemask_ps(a1) == 0x0000);
}
/************************************************************************/
static inline int any_true_16i(vector unsigned char a1)
{
  return (_mm_movemask_epi8(a1) > 0x0000);
}

static inline int any_false_16i(vector unsigned char a1)
{
  return (_mm_movemask_epi8(a1) < 0xFFFF);
}

static inline int all_true_16i(vector unsigned char a1)
{
  return (_mm_movemask_epi8(a1) == 0xFFFF);
}

static inline int all_false_16i(vector unsigned char a1)
{
  return (_mm_movemask_epi8(a1) == 0x0000);
}
/************************************************************************/
static inline int any_true_8i(vector unsigned short a1)
{
  return ((_mm_movemask_epi8(a1) & 0xAAAA) > 0x0000);
}

static inline int any_false_8i(vector unsigned short a1)
{
  return ((_mm_movemask_epi8(a1) & 0xAAAA) < 0xAAAA);
}

static inline int all_true_8i(vector unsigned short a1)
{
  return ((_mm_movemask_epi8(a1) & 0xAAAA) == 0xAAAA);
}

static inline int all_false_8i(vector unsigned short a1)
{
  return ((_mm_movemask_epi8(a1) & 0xAAAA) == 0x0000);
}
/************************************************************************/
static inline int any_true_4i(vector unsigned int a1)
{
  return ((_mm_movemask_epi8(a1) & 0x8888) > 0x0000);
}

static inline int any_false_4i(vector unsigned int a1)
{
  return ((_mm_movemask_epi8(a1) & 0x8888) < 0x8888);
}

static inline int all_true_4i(vector unsigned int a1)
{
  return ((_mm_movemask_epi8(a1) & 0x8888) == 0x8888);
}

static inline int all_false_4i(vector unsigned int a1)
{
  return ((_mm_movemask_epi8(a1) & 0x8888) == 0x0000);
}
/************************************************************************/
/*  Unsigned Comparison                                                 */
/************************************************************************/
static inline vector signed char __IA32_cmpgt_epu8 (vector signed char a1, vector signed char a2)
{
  const vector signed char offset = _mm_set1_epi8(0x80);
  return (vector signed char) _mm_cmpgt_epi8(_mm_add_epi8(a1, offset),
					     _mm_add_epi8(a2, offset));
}

static inline vector signed short __IA32_cmpgt_epu16 (vector signed short a1, vector signed short a2)
{
  const vector signed short offset = _mm_set1_epi16(0x8000);
  return (vector signed short) _mm_cmpgt_epi16(_mm_add_epi16(a1, offset),
					       _mm_add_epi16(a2, offset));
}

static inline vector signed int __IA32_cmpgt_epu32 (vector signed int a1, vector signed int a2)
{
  const vector signed int offset = _mm_set1_epi32(0x80000000);
  return (vector signed int) _mm_cmpgt_epi32(_mm_add_epi32(a1, offset),
					     _mm_add_epi32(a2, offset));
}

#define __IA32_cmplt_epu8(A,B) __IA32_cmpgt_epu8((B),(A))
#define __IA32_cmplt_epu16(A,B) __IA32_cmpgt_epu16((B),(A))
#define __IA32_cmplt_epu32(A,B) __IA32_cmpgt_epu32((B),(A))

/************************************************************************/
static inline vector signed int __IA32_vcmpbfp (vector float a, vector float b)
{
  vector float mask;
  vector signed int Y, flag;

  /* check for bad arguments */
  mask = _mm_cmpunord_ps(a, a);
  mask = _mm_or_ps(mask, _mm_cmpunord_ps(b, b));
  flag = _mm_set1_epi32(0xC0000000);
  Y = _mm_and_si128((vector int) mask, flag);

  /* check a <= b */
  flag = _mm_slli_epi32(flag, 1);
  mask = _mm_cmpgt_ps(a, b);
  Y = _mm_or_si128(Y, _mm_and_si128((vector int) mask, flag));
  
  /* check a >= -b */
  b = _mm_xor_ps((vector float) flag, b);
  flag = _mm_srli_epi32(flag, 1);
  mask = _mm_cmplt_ps(a, b);
  Y = _mm_or_si128(Y, _mm_and_si128((vector int) mask, flag));
  
  return (vector signed int) Y;
}
/************************************************************************/
static inline vector signed int __IA32_vnor (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_xor_si128(_mm_or_si128(a1, a2),
					   _mm_set1_epi32(0xFFFFFFFF));
}
/************************************************************************/
/*  Maximum, Minimum, Select                                            */
/************************************************************************/
static inline vector signed char __IA32_vmaxsb (vector signed char a1, vector signed char a2)
{
  vector signed char mask = _mm_cmpgt_epi8(a1, a2);
  return (vector signed char) _mm_or_si128(_mm_and_si128(mask, a1),
					   _mm_andnot_si128(mask, a2));
}

static inline vector unsigned short __IA32_vmaxuh (vector signed short a1, vector signed short a2)
{
  vector unsigned short mask = __IA32_cmpgt_epu16(a1, a2);
  return (vector unsigned short) _mm_or_si128(_mm_and_si128(mask, a1),
					      _mm_andnot_si128(mask, a2));
}

static inline vector unsigned int __IA32_vmaxuw (vector signed int a1, vector signed int a2)
{
  vector unsigned int mask = __IA32_cmpgt_epu32(a1, a2);
  return (vector unsigned int) _mm_or_si128(_mm_and_si128(mask, a1),
 					    _mm_andnot_si128(mask, a2));
}

static inline vector signed int __IA32_vmaxsw (vector signed int a1, vector signed int a2)
{
  vector signed int mask = _mm_cmpgt_epi32(a1, a2);
  return (vector signed int) _mm_or_si128(_mm_and_si128(mask, a1),
					  _mm_andnot_si128(mask, a2));
}

static inline vector float __IA32_vmaxfp (vector float a1, vector float a2)
{
  vector float mask = _mm_cmpunord_ps(a1, a2); /* fix NaN */
  return (vector float) _mm_or_ps(mask, _mm_max_ps(a1, a2));
}
/************************************************************************/
static inline vector signed char __IA32_vminsb (vector signed char a1, vector signed char a2)
{
  vector signed char mask = _mm_cmplt_epi8(a1, a2);
  return (vector signed char) _mm_or_si128(_mm_and_si128(mask, a1),
					   _mm_andnot_si128(mask, a2));
}

static inline vector unsigned short __IA32_vminuh (vector signed short a1, vector signed short a2)
{
  vector unsigned short mask = __IA32_cmpgt_epu16(a2, a1);
  return (vector unsigned short) _mm_or_si128(_mm_and_si128(mask, a1),
					      _mm_andnot_si128(mask, a2));
}

static inline vector unsigned int __IA32_vminuw (vector signed int a1, vector signed int a2)
{
  vector unsigned int mask = __IA32_cmpgt_epu32(a2, a1);
  return (vector unsigned int) _mm_or_si128(_mm_and_si128(mask, a1),
					    _mm_andnot_si128(mask, a2));
}

static inline vector signed int __IA32_vminsw (vector signed int a1, vector signed int a2)
{
  vector signed int mask = _mm_cmplt_epi32(a1, a2);
  return (vector signed int) _mm_or_si128(_mm_and_si128(mask, a1),
					  _mm_andnot_si128(mask, a2));
}

static inline vector float __IA32_vminfp (vector float a1, vector float a2)
{
  vector float mask = _mm_cmpunord_ps(a1, a2); /* fix NaN */
  return (vector float) _mm_or_ps(mask, _mm_min_ps(a1, a2));
}
/************************************************************************/
static inline vector signed int __IA32_vsel_4si (vector signed int a1,
					  vector signed int a2, vector signed int a3)
{
  return (vector signed int) _mm_or_si128(_mm_and_si128(a3, a2),
					  _mm_andnot_si128(a3, a1));
}

static inline vector float __IA32_vsel_4sf (vector float a1, vector float a2, vector float a3)
{
  return (vector float) _mm_or_ps(_mm_and_ps(a3, a2), _mm_andnot_ps(a3, a1));
}
/************************************************************************/
/*  Shifts (SSE does not have per-element shifts so do scalar ops)      */
/*  Use gcc/AT+T assembler to access shift and rotate instructions      */
/************************************************************************/

/* left logical shift */
static inline vector unsigned char __IA32_vslb (vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi8(0x07), a2);

  for (i = 0; i < 16; i++) { r.s[i] = a.s[i] << b.s[i]; }
  
  return r.v;
}

static inline vector unsigned short __IA32_vslh (vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi16(0x000F), a2);;

  for (i = 0; i < 8; i++) { r.s[i] = a.s[i] << b.s[i]; }
  
  return r.v;
}

static inline vector unsigned int __IA32_vslw (vector unsigned int a1, vector unsigned int a2)
{
  mix_u32 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi32(0x0000001F), a2);

  for (i = 0; i < 4; i++) { r.s[i] = a.s[i] << b.s[i]; }
  
  return r.v;
}

/* 6.3.7 in the C standard defines right shift operator as follows:
   unsigned or positive values feed zeros in the left end (logical shift),
   signed negative values may feed zeros or ones (implementation dependent).
*/

/* right logical shift */
static inline vector unsigned char __IA32_vsrb (vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi8(0x07), a2);

  for (i = 0; i < 16; i++) { r.s[i] = a.s[i] >> b.s[i]; }
  
  return r.v;
}

static inline vector unsigned short __IA32_vsrh (vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi16(0x000F), a2);

  for (i = 0; i < 8; i++) { r.s[i] = a.s[i] >> b.s[i]; }
  
  return r.v;
}

static inline vector unsigned int __IA32_vsrw (vector unsigned int a1, vector unsigned int a2)
{
  mix_u32 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi32(0x0000001F), a2);

  for (i = 0; i < 4; i++) { r.s[i] = a.s[i] >> b.s[i]; }
  
  return r.v;
}

/* right arithmetic shift */
static inline vector unsigned char __IA32_vsrab (vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi8(0x07), a2);

  for (i = 0; i < 16; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "sarb\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"(b.s[i])
	     : "%cl");
  }  
  
  return r.v;
}

static inline vector unsigned short __IA32_vsrah (vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi16(0x000F), a2);

  for (i = 0; i < 8; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "sarw\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"((unsigned char)b.s[i])
	     : "%cl");
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_vsraw (vector unsigned int a1, vector unsigned int a2)
{
  mix_u32 a, b, r;
  int i;

  a.v = a1;
  b.v = _mm_and_si128(_mm_set1_epi32(0x0000001F), a2);

  for (i = 0; i < 4; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "sarl\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"((unsigned char)b.s[i])
	     : "%cl");
  }
  
  return r.v;
}

/* left rotate */
static inline vector unsigned char __IA32_vrlb (vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b, r;
  int i;

  a.v = a1;
  b.v = a2;

  for (i = 0; i < 16; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "rolb\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"(b.s[i])
	     : "%cl");
  }
  
  return r.v;
}

static inline vector unsigned short __IA32_vrlh (vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b, r;
  int i;

  a.v = a1;
  b.v = a2;

  for (i = 0; i < 8; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "rolw\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"((unsigned char)b.s[i])
	     : "%cl");
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_vrlw (vector unsigned int a1, vector unsigned int a2)
{
  mix_u32 a, b, r;
  int i;

  a.v = a1;
  b.v = a2;

  for (i = 0; i < 4; i++) {
    __asm__ ("movb\t%2,%%cl\n\t"
	     "roll\t%%cl,%0"
	     : "=q"(r.s[i])
	     : "0"(a.s[i]), "q"((unsigned char)b.s[i])
	     : "%cl");
  }
  
  return r.v;
}
/************************************************************************/
/* Shift entire vector by n bytes                                       */
/* If data is loaded from memory then left and right are interchanged.  */
/************************************************************************/
#ifdef MEMORY_ORDER
#define __IA32_vslo __BYTES_RIGHT
#define __IA32_vsro __BYTES_LEFT
#else
#define __IA32_vslo __BYTES_LEFT
#define __IA32_vsro __BYTES_RIGHT
#endif
static inline vector unsigned char __BYTES_LEFT(vector signed int a, vector signed int n)
{
  mix_u8 tmp;
  vector signed int y = a;
  int count;
  
  /* shift value is in bits 3-6 of LSB (right end) of n */
  tmp.v = (vector unsigned char) n;
  count = tmp.s[15] >> 3;

  /* SSE shift value has to be a compile-time constant */
  if (count & 1) y = _mm_slli_si128(y, 1);
  if (count & 2) y = _mm_slli_si128(y, 2);
  if (count & 4) y = _mm_slli_si128(y, 4);
  if (count & 8) y = _mm_slli_si128(y, 8);
  
  return (vector unsigned char) y;
}

static inline vector unsigned char __BYTES_RIGHT(vector signed int a, vector signed int n)
{
  mix_u8 tmp;
  vector signed int y = a;
  int count;
  
  /* shift value is in bits 3-6 of LSB (right end) of n */
  tmp.v = (vector unsigned char) n;
  count = tmp.s[15] >> 3;

  /* SSE shift value has to be a compile-time constant */
  if (count & 1) y = _mm_srli_si128(y, 1);
  if (count & 2) y = _mm_srli_si128(y, 2);
  if (count & 4) y = _mm_srli_si128(y, 4);
  if (count & 8) y = _mm_srli_si128(y, 8);
  
  return (vector unsigned char) y;
}
/************************************************************************/

static inline vector char __IA32_vsldoi_4si (vector char va, vector char vb, int c)
{
#ifdef MEMORY_ORDER
  int i;
  mix_u8 a, b, r;

  a.v = va;
  b.v = vb;

  for (i = 0; i < 16; i++) {
    int j, out;
    out = (i&0xfc)+ 3-(i&3);
    j = ((i+c)&0xfc)+ 3-((i+c)&3);
    r.s[out] = (j < 16) ? a.s[j] : b.s[j-16];
    /* printf("%2d a=%02x b=%02x r=%02x\n", j, a.s[j], b.s[j], r.s[out]); */
  }

  return r.v;
#else
  switch (c) {
  case 0:
    return va;
  case 4: {
    mix_u32 a, b, r;
    a.v = va;
    b.v = vb;
    r.s[0] = a.s[1];
    r.s[1] = a.s[2];
    r.s[2] = a.s[3];
    r.s[3] = b.s[0];
    return r.v;
  }
  case 8: {
    return (vector char)_mm_shuffle_ps((__m128)va, (__m128)vb, _MM_SHUFFLE(1,0,3,2));
  }
  case 12: {
    mix_u32 a, b, r;
    a.v = va;
    b.v = vb;
    r.s[0] = a.s[3];
    r.s[1] = b.s[0];
    r.s[2] = b.s[1];
    r.s[3] = b.s[2];
    return r.v;
  }
  case 16:
    return vb;
  default: {
    int i;
    mix_u8 a, b, r;

    a.v = va;
    b.v = vb;
    for (i = 0; i < 16; i++) {
      int j, out;
      out = (i&0xfc)+ 3-(i&3);
      j = ((i+c)&0xfc)+ 3-((i+c)&3);
      r.s[out] = (j < 16) ? a.s[j] : b.s[j-16];
      /* printf("%2d a=%02x b=%02x r=%02x\n", i, a.s[i], b.s[i], r.s[i]); */
    }
    return r.v;
  }
  }
#endif
}



/************************************************************************/

#ifdef MEMORY_ORDER
/* data in registers have been loaded from memory
   and are therefore in reverse order */
#define __IA32_vmrghb(A,B) _mm_unpacklo_epi8((A),(B))
#define __IA32_vmrghh(A,B) _mm_unpacklo_epi16((A),(B))
#define __IA32_vmrghw(A,B) _mm_unpacklo_epi32((A),(B))
#define __IA32_vmrghf(A,B) _mm_unpacklo_ps((A),(B))
#define __IA32_vmrglb(A,B) _mm_unpackhi_epi8((A),(B))
#define __IA32_vmrglh(A,B) _mm_unpackhi_epi16((A),(B))
#define __IA32_vmrglw(A,B) _mm_unpackhi_epi32((A),(B))
#define __IA32_vmrglf(A,B) _mm_unpackhi_ps((A),(B))
#else
/* data should be viewed as-is in the registers */
#define __IA32_vmrghb(A,B) _mm_unpackhi_epi8((B),(A))
#define __IA32_vmrghh(A,B) _mm_unpackhi_epi16((B),(A))
#define __IA32_vmrghw(A,B) _mm_unpackhi_epi32((B),(A))
#define __IA32_vmrghf(A,B) _mm_unpackhi_ps((B),(A))
#define __IA32_vmrglb(A,B) _mm_unpacklo_epi8((B),(A))
#define __IA32_vmrglh(A,B) _mm_unpacklo_epi16((B),(A))
#define __IA32_vmrglw(A,B) _mm_unpacklo_epi32((B),(A))
#define __IA32_vmrglf(A,B) _mm_unpacklo_ps((B),(A))
#endif

/************************************************************************/
/* splat: a = vector with N elements.
          n = integer 0 ... N-1.
	  return a vector of N elements filled with a[n].
   On altivec n counts from left to right (0 = MSB, N-1 = LSB).
   Intel counts from right to left. If the values in the vector register
   have been loaded memory then a[n] will give the correct answer.
   If they have been created in the register then the order needs to be
   reversed - ie use a[N-1-n].
*/

static inline vector signed char __IA32_vspltb (vector signed char a1, const char n)
{
  mix_s8 tmp;

  tmp.v = a1;
#ifdef MEMORY_ORDER
  return (vector signed char) _mm_set1_epi8(tmp.s[n & 0x0F]);
#else
  return (vector signed char) _mm_set1_epi8(tmp.s[15 - (n & 0x0F)]);
#endif  
}

static inline vector signed short __IA32_vsplth (vector signed short a1, const char n)
{
  mix_s16 tmp;

  tmp.v = a1;
#ifdef MEMORY_ORDER
  return (vector signed short) _mm_set1_epi16(tmp.s[n & 0x07]);
#else
  return (vector signed short) _mm_set1_epi16(tmp.s[7 - (n & 0x07)]);
#endif  
}

static inline vector signed int __IA32_vspltw (vector signed int a1, const char n)
{
  mix_s32 tmp;

  tmp.v = a1;
#ifdef MEMORY_ORDER  
  return (vector signed int) _mm_set1_epi32(tmp.s[n & 0x03]);
#else
  return (vector signed int) _mm_set1_epi32(tmp.s[3 - (n & 0x03)]);
#endif  
}

static inline vector float __IA32_vspltf (vector float a1, const char n)
{
  mix_f32 tmp;

  tmp.v = a1;
#ifdef MEMORY_ORDER  
  return (vector float) _mm_set1_ps(tmp.s[n & 0x03]);
#else
  return (vector float) _mm_set1_ps(tmp.s[3 - (n & 0x03)]);
#endif  
}

/************************************************************************/
static inline vector unsigned char __IA32_lveub (int a, unsigned char *b)
{
  mix_u8 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = (size_t)address & 0x0000000F;
  r.s[i] = *((unsigned char *)address);
  return r.v;
}

static inline vector signed char __IA32_lvesb (int a, signed char *b)
{
  mix_s8 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = (size_t)address & 0x0000000F;
  r.s[i] = *((signed char *)address);
  return r.v;
}

static inline vector unsigned short __IA32_lveuh (int a, unsigned short *b)
{
  mix_u16 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 1;
  r.s[i] = *((unsigned short *)address);
  return r.v;
}

static inline vector signed short __IA32_lvesh (int a, signed short *b)
{
  mix_s16 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 1;
  r.s[i] = *((signed short *)address);
  return r.v;
}

static inline vector unsigned int __IA32_lveuw (int a, unsigned int *b)
{
  mix_u32 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  r.s[i] = *((unsigned int *)address);
  return r.v;
}

static inline vector signed int __IA32_lvesw (int a, signed int *b)
{
  mix_s32 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  r.s[i] = *((signed int *)address);
  return r.v;
}

static inline vector float __IA32_lvef (int a, float *b)
{
  mix_f32 r;
  char *address;
  size_t i;
  
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  r.s[i] = *((float *)address);
  return r.v;
}
/************************************************************************/
static inline void __IA32_stveub (vector unsigned char x, int a, unsigned char *b)
{
  mix_u8 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = (size_t)address & 0x0000000F;
  *((unsigned char *)address) = r.s[i];
  return;
}

static inline void __IA32_stvesb (vector signed char x, int a, signed char *b)
{
  mix_s8 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = (size_t)address & 0x0000000F;
  r.s[i] = *((signed char *)address);
  return;
}

static inline void __IA32_stveuh (vector unsigned short x, int a, unsigned short *b)
{
  mix_u16 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 1;
  *((unsigned short *)address) = r.s[i];
  return;
}

static inline void __IA32_stvesh (vector signed short x, int a, signed short *b)
{
  mix_s16 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 1;
  *((signed short *)address) = r.s[i];
  return;
}

static inline void __IA32_stveuw (vector unsigned int x, int a, unsigned int *b)
{
  mix_u32 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  *((unsigned int *)address) = r.s[i];
  return;
}

static inline void __IA32_stvesw (vector signed int x, int a, signed int *b)
{
  mix_s32 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  *((signed int *)address) = r.s[i];
  return;
}

static inline void __IA32_stvef (vector float x, int a, float *b)
{
  mix_f32 r;
  char *address;
  size_t i;
  
  r.v = x;
  address = (char *)b + a;
  i = ((size_t)address & 0x0000000F) >> 2;
  *((float *)address) = r.s[i];
  return;
}
/************************************************************************/
/* multiply-sum */
static inline vector unsigned int __IA32_vmsumubm(vector unsigned char a1,
					   vector unsigned char a2, vector unsigned int a3)
{
  mix_u8 a, b;
  mix_u32 c, r;
  int i, j;
  unsigned int sum;
  
  a.v = a1;
  b.v = a2;
  c.v = a3;
  
  for (i = 0; i < 4; i++) {
    sum = 0;
    for (j = 0; j < 4; j++) { sum += a.s[4*i + j] * b.s[4*i + j]; }
    r.s[i] = sum + c.s[i];
  }
  
  return r.v;
}

static inline vector signed int __IA32_vmsummbm(vector signed char a1,
					 vector unsigned char a2, vector signed int a3)
{
  mix_u8 a, b;
  mix_s32 c, r;
  int i, j, sum;
  
  a.v = (vector unsigned char) a1; /* have to cast to get same answer as PPC */
  b.v = a2;
  c.v = a3;
  
  for (i = 0; i < 4; i++) {
    sum = 0;
    for (j = 0; j < 4; j++) { sum += a.s[4*i + j] * b.s[4*i + j]; }
    r.s[i] = sum + c.s[i];
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_vmsumuhm(vector unsigned short a1,
					   vector unsigned short a2, vector unsigned int a3)
{
  mix_u16 a, b;
  mix_u32 c, r;
  int i;
  
  a.v = a1;
  b.v = a2;
  c.v = a3;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i] + a.s[2*i + 1] * b.s[2*i + 1] + c.s[i]; 
  }
  
  return r.v;
}

static inline vector signed int __IA32_vmsumshm(vector signed short a1,
					 vector signed short a2, vector signed int a3)
{
  mix_s16 a, b;
  mix_s32 c, r;
  int i;
  
  a.v = a1;
  b.v = a2;
  c.v = a3;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i] + a.s[2*i + 1] * b.s[2*i + 1] + c.s[i]; 
  }
  
  return r.v;
}
/************************************************************************/
/* elements of even index in altivec = high-order half (count from the left)
   when loaded from memory on intel they end up in the low-order half
   (which is still even index as intel counts from the right)
*/
#ifdef MEMORY_ORDER
#define __IA32_vmuleub __IA32_MUL_HIGH_UB
#define __IA32_vmulesb __IA32_MUL_HIGH_SB
#define __IA32_vmuleuh __IA32_MUL_HIGH_UH
#define __IA32_vmulesh __IA32_MUL_HIGH_SH
#define __IA32_vmuloub __IA32_MUL_LOW_UB
#define __IA32_vmulosb __IA32_MUL_LOW_SB
#define __IA32_vmulouh __IA32_MUL_LOW_UH
#define __IA32_vmulosh __IA32_MUL_LOW_SH
#else
#define __IA32_vmuleub __IA32_MUL_LOW_UB
#define __IA32_vmulesb __IA32_MUL_LOW_SB
#define __IA32_vmuleuh __IA32_MUL_LOW_UH
#define __IA32_vmulesh __IA32_MUL_LOW_SH
#define __IA32_vmuloub __IA32_MUL_HIGH_UB
#define __IA32_vmulosb __IA32_MUL_HIGH_SB
#define __IA32_vmulouh __IA32_MUL_HIGH_UH
#define __IA32_vmulosh __IA32_MUL_HIGH_SH
#endif

static inline vector unsigned short __IA32_MUL_HIGH_UB(vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b;
  mix_u16 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 8; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i];
  }
  
  return r.v;
}

static inline vector signed short __IA32_MUL_HIGH_SB(vector signed char a1, vector signed char a2)
{
  mix_s8 a, b;
  mix_s16 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 8; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i];
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_MUL_HIGH_UH(vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b;
  mix_u32 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i];
  }
  
  return r.v;
}

static inline vector signed int __IA32_MUL_HIGH_SH(vector signed short a1, vector signed short a2)
{
  mix_s16 a, b;
  mix_s32 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i] * b.s[2*i];
  }
  
  return r.v;
}
/************************************************************************/
static inline vector unsigned short __IA32_MUL_LOW_UB(vector unsigned char a1, vector unsigned char a2)
{
  mix_u8 a, b;
  mix_u16 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 8; i++) {
    r.s[i] = a.s[2*i+1] * b.s[2*i+1];
  }
  
  return r.v;
}

static inline vector signed short __IA32_MUL_LOW_SB(vector signed char a1, vector signed char a2)
{
  mix_s8 a, b;
  mix_s16 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 8; i++) {
    r.s[i] = a.s[2*i+1] * b.s[2*i+1];
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_MUL_LOW_UH(vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b;
  mix_u32 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i+1] * b.s[2*i+1];
  }
  
  return r.v;
}

static inline vector signed int __IA32_MUL_LOW_SH(vector signed short a1, vector signed short a2)
{
  mix_s16 a, b;
  mix_s32 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 4; i++) {
    r.s[i] = a.s[2*i+1] * b.s[2*i+1];
  }
  
  return r.v;
}
/************************************************************************/
/* Shift entire vector as 128-bit int by n bits */
static inline vector unsigned int __IA32_vsl(vector unsigned int a1, vector signed int a2)
{
  mix_u8 a, b, r;
  int i, count;
  unsigned char carry;
  
  a.v = (vector unsigned char) a1;
  b.v = (vector unsigned char) a2;
    
  /* shift value is in bits 0-2 of LSB (right end) of b */
  count = b.s[15] & 7;

  if (0 == count) return a1;
  
  carry = 0;
  i = 15;
  while (i >= 0) {
    r.s[i] = (a.s[i] << count) | carry;
    carry = a.s[i] >> (8 - count);
    i--;
  }
  
  return r.v;
}

static inline vector unsigned int __IA32_vsr(vector unsigned int a1, vector signed int a2)
{
  mix_u8 a, b, r;
  int i, count;
  unsigned char carry;
  
  a.v = (vector unsigned char) a1;
  b.v = (vector unsigned char) a2;
    
  /* shift value is in bits 0-2 of LSB (right end) of b */
  count = b.s[15] & 7;

  if (0 == count) return a1;
  
  carry = 0;
  for (i = 0; i < 16; i++) {
    r.s[i] = (a.s[i] >> count) | carry;
    carry = a.s[i] << (8 - count);
  }
  
  return r.v;
}
/************************************************************************/
/* pack */
static inline vector unsigned char __IA32_vpkuhum(vector unsigned short a1, vector unsigned short a2)
{
  mix_u16 a, b;
  mix_u8 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 8; i++) { r.s[i] = (a.s[i] & 0x00FF); }
  for (i = 0; i < 8; i++) { r.s[i+8] = (b.s[i] & 0x00FF); }
  
  return r.v;
}

static inline vector unsigned short __IA32_vpkuwum(vector unsigned int a1, vector unsigned int a2)
{
  mix_u32 a, b;
  mix_u16 r;
  int i;
  
  a.v = a1;
  b.v = a2;
  
  for (i = 0; i < 4; i++) { r.s[i] = (a.s[i] & 0x0000FFFF); }
  for (i = 0; i < 4; i++) { r.s[i+4] = (b.s[i] & 0x0000FFFF); }
  
  return r.v;
}

/* saturated versions exist for the following types */
#define __IA32_vpkshss _mm_packs_epi16
#define __IA32_vpkswss _mm_packs_epi32
#define __IA32_vpkuhus _mm_packus_epi16

/************************************************************************/
/* unpack */

#ifdef MEMORY_ORDER
#define __IA32_vupkhsb __IA32_UNPACK_LOW_SB
#define __IA32_vupkhsh __IA32_UNPACK_LOW_SH
#define __IA32_vupklsb __IA32_UNPACK_HIGH_SB
#define __IA32_vupklsh __IA32_UNPACK_HIGH_SH
#else
#define __IA32_vupkhsb __IA32_UNPACK_HIGH_SB
#define __IA32_vupkhsh __IA32_UNPACK_HIGH_SH
#define __IA32_vupklsb __IA32_UNPACK_LOW_SB
#define __IA32_vupklsh __IA32_UNPACK_LOW_SH
#endif

/* the second "a" in the following calls is a dummy argument that gets overwritten */
static inline vector signed short __IA32_UNPACK_HIGH_SB(vector signed char a)
{
  return (vector signed short) _mm_srai_epi16(_mm_unpackhi_epi8(a,a), 8);
}

static inline vector signed int __IA32_UNPACK_HIGH_SH(vector signed short a)
{
  return (vector signed int) _mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16);
}

static inline vector signed short __IA32_UNPACK_LOW_SB(vector signed char a)
{
  return (vector signed short) _mm_srai_epi16(_mm_unpacklo_epi8(a,a), 8);
}

static inline vector signed int __IA32_UNPACK_LOW_SH(vector signed short a)
{
  return (vector signed int) _mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16);
}


/************************************************************************/

/* vec_expte() Returns a vector containing estimates of 2 raised to the
   value of the corresponding elements of the given vector. */
/* From
http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html */

/* exp2*/


#define EXP_POLY_DEGREE 5

#define EXPPOLY0(x, c0) _mm_set1_ps(c0)
#define EXPPOLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(EXPPOLY0(x, c1), x), _mm_set1_ps(c0))
#define EXPPOLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(EXPPOLY1(x, c1, c2), x), _mm_set1_ps(c0))
#define EXPPOLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(EXPPOLY2(x, c1, c2, c3), x), _mm_set1_ps(c0))
#define EXPPOLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(EXPPOLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0))
#define EXPPOLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(EXPPOLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0))

static inline vector float __IA32_vexptefp(vector float x)
{
  vector int ipart;
  vector float fpart, expipart, expfpart;

  x = _mm_min_ps(x, _mm_set1_ps( 129.00000f));
  x = _mm_max_ps(x, _mm_set1_ps(-126.99999f));

  /* ipart = int(x - 0.5) */
  ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f)));

  /* fpart = x - ipart */
  fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart));

  /* expipart = (float) (1 << ipart) */
  expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23));

  /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
#if EXP_POLY_DEGREE == 5
  expfpart = EXPPOLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f);
#elif EXP_POLY_DEGREE == 4
  expfpart = EXPPOLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f);
#elif EXP_POLY_DEGREE == 3
  expfpart = EXPPOLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f);
#elif EXP_POLY_DEGREE == 2
  expfpart = EXPPOLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f);
#else
#error
#endif

  return _mm_mul_ps(expipart, expfpart);
}

/* log2 */

#define LOG_POLY_DEGREE 3

static inline vector float __IA32_vlogefp(vector float x)
{
  vector int exp = _mm_set1_epi32(0x7F800000);
  vector int mant = _mm_set1_epi32(0x007FFFFF);
  vector float one = _mm_set1_ps( 1.0f);
  vector int i = _mm_castps_si128(x);
  vector float e = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, exp), 23), _mm_set1_epi32(127)));
  vector float m = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mant)), one);
  vector float p;

   /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ */
#if LOG_POLY_DEGREE == 6
   p = EXPPOLY5( m, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f,  3.1821337e-1f, -3.4436006e-2f);
#elif LOG_POLY_DEGREE == 5
   p = EXPPOLY4(m, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
#elif LOG_POLY_DEGREE == 4
   p = EXPPOLY3(m, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
#elif LOG_POLY_DEGREE == 3
   p = EXPPOLY2(m, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f);
#else
#error
#endif

   /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
   p = _mm_mul_ps(p, _mm_sub_ps(m, one));

   return _mm_add_ps(p, e);
}


/*
  From
 http://developer.apple.com/documentation/performance/Conceptual/Accelerate_sse_migration/migration_sse_translation/chapter_4_section_5.html
*/

static const vector float two16 = (const vector float) {0x1.0p16f,0x1.0p16f,0x1.0p16f,0x1.0p16f};

/* Convert vUInt32 to vFloat according to the current rounding mode */
static inline vector float _mm_ctf_epu32(vector unsigned int v)
{
  /* Avoid double rounding by doing two exact conversions
     of high and low 16-bit segments */

  vector signed int hi = _mm_srli_epi32( (vector signed int)v, 16 );
  vector signed int lo = _mm_srli_epi32(_mm_slli_epi32( (vector signed int)v, 16 ), 16 );
  vector float fHi = _mm_mul_ps( _mm_cvtepi32_ps( hi ), two16);

  vector float fLo = _mm_cvtepi32_ps( lo );

  /* do single rounding according to current rounding mode
     note that AltiVec always uses round to nearest. We use current
     rounding mode here, which is round to nearest by default. */
  return _mm_add_ps(fHi, fLo);
}


static inline vector float __IA32_vcfux(vector unsigned int a1, int a2)
{
  vector float v = _mm_ctf_epu32(a1);
  float scale = 1./(1<<a2);
  vector float vscale = {scale, scale, scale, scale};
  return _mm_mul_ps(v, vscale);
}


/************************************************************************/

#ifdef HAVE_ALTIVEC_EXTRAS

#include "altivec_extras.h"

#else
/* Prototypes for functions without Intel translations.
   Having these makes the error messages more meaningful:
   instead of "can't convert between vector values of different size"
   you get "undefined reference to __IA32_<function_name>"
   when try to compile.
*/

vector unsigned char __IA32_lvsl(int, void *);
vector unsigned char __IA32_lvsr(int, void *);
vector unsigned int __IA32_vaddcuw(vector unsigned int, vector unsigned int);
vector signed int __IA32_vaddsws(vector signed int, vector signed int);
vector unsigned int __IA32_vadduws(vector unsigned int, vector unsigned int);
vector float __IA32_vcfux(vector unsigned int, int);
vector unsigned int __IA32_vctuxs(vector float, int);
vector signed short __IA32_vmhaddshs(vector signed short, vector signed short, vector signed short);
vector signed short __IA32_vmhraddshs(vector signed short, vector signed short, vector signed short);
vector signed short __IA32_vmladduhm(vector signed short, vector signed short, vector signed short);
vector signed int __IA32_vmsumshs(vector signed short, vector signed short, vector signed int);
vector unsigned int __IA32_vmsumuhs(vector unsigned short, vector unsigned short, vector unsigned int);
vector pixel __IA32_vpkpx(vector unsigned int, vector unsigned int);
vector unsigned char __IA32_vpkshus(vector signed short, vector signed short);
vector unsigned short __IA32_vpkswus(vector signed int, vector signed int);
vector unsigned short __IA32_vpkuwus(vector unsigned int, vector unsigned int);
vector unsigned int __IA32_vsubcuw(vector unsigned int, vector unsigned int);
vector signed int __IA32_vsubsws(vector signed int, vector signed int);
vector unsigned int __IA32_vsubuws(vector unsigned int, vector unsigned int);
vector signed int __IA32_vsum2sws(vector signed int, vector signed int);
vector signed int __IA32_vsum4sbs(vector signed char, vector signed int);
vector signed int __IA32_vsum4shs(vector signed short, vector signed int);
vector unsigned int __IA32_vsum4ubs(vector unsigned char, vector unsigned int);
vector signed int __IA32_vsumsws(vector signed int, vector signed int);
vector unsigned int __IA32_vupkhpx(vector pixel);
vector unsigned int __IA32_vupklpx(vector pixel);

#endif

/**************************************************************************/
/* END OF ADDITIONS FOR INTEL                                             */
/**************************************************************************/

#ifdef __cplusplus

/* Prototypes for builtins that take literals and must always be inlined.  */
inline vector float vec_ctf (vector unsigned int, const char) __attribute__ ((always_inline));
inline vector float vec_ctf (vector signed int, const char) __attribute__ ((always_inline));
inline vector float vec_vcfsx (vector signed int a1, const char a2) __attribute__ ((always_inline));
inline vector float vec_vcfux (vector unsigned int a1, const char a2) __attribute__ ((always_inline));
inline vector signed int vec_cts (vector float, const char) __attribute__ ((always_inline));
inline vector unsigned int vec_ctu (vector float, const char) __attribute__ ((always_inline));

inline vector float vec_sld (vector float, vector float, const char) __attribute__ ((always_inline));
inline vector signed int vec_sld (vector signed int, vector signed int, const char) __attribute__ ((always_inline));
inline vector unsigned int vec_sld (vector unsigned int, vector unsigned int, const char) __attribute__ ((always_inline));
inline vector signed short vec_sld (vector signed short, vector signed short, const char) __attribute__ ((always_inline));
inline vector unsigned short vec_sld (vector unsigned short, vector unsigned short, const char) __attribute__ ((always_inline));
inline vector signed char vec_sld (vector signed char, vector signed char, const char) __attribute__ ((always_inline));
inline vector unsigned char vec_sld (vector unsigned char, vector unsigned char, const char) __attribute__ ((always_inline));
inline vector signed char vec_splat (vector signed char, const char) __attribute__ ((always_inline));
inline vector unsigned char vec_splat (vector unsigned char, const char) __attribute__ ((always_inline));
inline vector signed short vec_splat (vector signed short, const char) __attribute__ ((always_inline));
inline vector unsigned short vec_splat (vector unsigned short, const char) __attribute__ ((always_inline));
inline vector float vec_splat (vector float, const char) __attribute__ ((always_inline));
inline vector signed int vec_splat (vector signed int, const char) __attribute__ ((always_inline));
inline vector unsigned int vec_splat (vector unsigned int, const char) __attribute__ ((always_inline));
inline vector signed char vec_splat_s8 (const char) __attribute__ ((always_inline));
inline vector signed short vec_splat_s16 (const char) __attribute__ ((always_inline));
inline vector signed int vec_splat_s32 (const char) __attribute__ ((always_inline));
inline vector unsigned char vec_splat_u8 (const char) __attribute__ ((always_inline));
inline vector unsigned short vec_splat_u16 (const char) __attribute__ ((always_inline));
inline vector unsigned int vec_splat_u32 (const char) __attribute__ ((always_inline));
inline vector float vec_vspltw (vector float a1, const char a2) __attribute__ ((always_inline));
inline vector signed int vec_vspltw (vector signed int a1, const char a2) __attribute__ ((always_inline));
inline vector unsigned int vec_vspltw (vector unsigned int a1, const char a2) __attribute__ ((always_inline));
inline vector signed short vec_vsplth (vector signed short a1, const char a2) __attribute__ ((always_inline));
inline vector unsigned short vec_vsplth (vector unsigned short a1, const char a2) __attribute__ ((always_inline));
inline vector signed char vec_vspltb (vector signed char a1, const char a2) __attribute__ ((always_inline));
inline vector unsigned char vec_vspltb (vector unsigned char a1, const char a2) __attribute__ ((always_inline));

/* vec_abs */

inline vector signed char
vec_abs (vector signed char a1)
{
  return __IA32_abs_v16qi (a1);
}

inline vector signed short
vec_abs (vector signed short a1)
{
  return __IA32_abs_v8hi (a1);
}

inline vector signed int
vec_abs (vector signed int a1)
{
  return __IA32_abs_v4si (a1);
}

inline vector float
vec_abs (vector float a1)
{
  return __IA32_abs_v4sf (a1);
}

/* vec_abss */

inline vector signed char
vec_abss (vector signed char a1)
{
  return __IA32_abss_v16qi (a1);
}

inline vector signed short
vec_abss (vector signed short a1)
{
  return __IA32_abss_v8hi (a1);
}

inline vector signed int
vec_abss (vector signed int a1)
{
  return __IA32_abss_v4si (a1);
}

/* vec_add */

inline vector signed char
vec_add (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_add (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_add (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_add (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_add (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_add (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_add (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_add (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_add (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_add (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_add (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_add (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_add (vector float a1, vector float a2)
{
  return (vector float) _mm_add_ps ((vector float) a1, (vector float) a2);
}

/* vec_vaddfp */

inline vector float
vec_vaddfp (vector float a1, vector float a2)
{
  return (vector float) _mm_add_ps ((vector float) a1, (vector float) a2);
}

/* vec_vadduwm */

inline vector signed int
vec_vadduwm (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vadduwm (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vadduwm (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vadduwm (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_add_epi32 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vadduhm */

inline vector signed short
vec_vadduhm (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vadduhm (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vadduhm (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vadduhm (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_add_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vaddubm */

inline vector signed char
vec_vaddubm (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vaddubm (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vaddubm (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vaddubm (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_add_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_addc */

inline vector unsigned int
vec_addc (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vaddcuw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_adds */

inline vector unsigned char
vec_adds (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_adds (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_adds (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_adds (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_adds_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned short
vec_adds (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_adds (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_adds (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_adds (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_adds_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned int
vec_adds (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_adds (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_adds (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_adds (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vaddsws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vaddsws */

inline vector signed int
vec_vaddsws (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vaddsws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vadduws */

inline vector unsigned int
vec_vadduws (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vadduws (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vadduws (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vadduws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vaddshs */
inline vector signed short
vec_vaddshs (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_adds_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vadduhs */

inline vector unsigned short
vec_vadduhs (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vadduhs (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vadduhs (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_adds_epu16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vaddsbs */

inline vector signed char
vec_vaddsbs (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_adds_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vaddubs */

inline vector unsigned char
vec_vaddubs (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vaddubs (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vaddubs (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_adds_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_and */

inline vector float
vec_and (vector float a1, vector float a2)
{
  return (vector float) _mm_and_ps ((vector float) a1, (vector float) a2);
}

inline vector float
vec_and (vector float a1, vector signed int a2)
{
  return (vector float) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_and (vector signed int a1, vector float a2)
{
  return (vector float) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_and (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_and (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_and (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_and (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_and (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_and (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_and (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_and (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_and (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_and (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_and (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_and (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_and_si128 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_andc */

inline vector float
vec_andc (vector float a1, vector float a2)
{
  return (vector float) reversed_andnot_ps ((vector float) a1, (vector float) a2);
}



inline vector float
vec_andc (vector float a1, vector signed int a2)
{
  return (vector float) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_andc (vector signed int a1, vector float a2)
{
  return (vector float) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_andc (vector signed int a1, vector signed int a2)
{
  return (vector signed int) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_andc (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_andc (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_andc (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_andc (vector signed short a1, vector signed short a2)
{
  return (vector signed short) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_andc (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_andc (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_andc (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_andc (vector signed char a1, vector signed char a2)
{
  return (vector signed char) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_andc (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_andc (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_andc (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) reversed_andnot_si128 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_avg */

inline vector unsigned char
vec_avg (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_avg_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_avg (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vavgsb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned short
vec_avg (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_avg_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_avg (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vavgsh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned int
vec_avg (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vavguw ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_avg (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vavgsw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vavgsw */

inline vector signed int
vec_vavgsw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vavgsw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vavguw */

inline vector unsigned int
vec_vavguw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vavguw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vavgsh */

inline vector signed short
vec_vavgsh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vavgsh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vavguh */

inline vector unsigned short
vec_vavguh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_avg_epu16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vavgsb */

inline vector signed char
vec_vavgsb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vavgsb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vavgub */

inline vector unsigned char
vec_vavgub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_avg_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_ceil */

inline vector float
vec_ceil (vector float a1)
{
  return (vector float) __IA32_vrfip ((vector float) a1);
}

/* vec_cmpb */

inline vector signed int
vec_cmpb (vector float a1, vector float a2)
{
  return (vector signed int) __IA32_vcmpbfp ((vector float) a1, (vector float) a2);
}

/* vec_cmpeq */

inline vector signed char
vec_cmpeq (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_cmpeq (vector unsigned char a1, vector unsigned char a2)
{
  return (vector signed char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_cmpeq (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_cmpeq (vector unsigned short a1, vector unsigned short a2)
{
  return (vector signed short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_cmpeq (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_cmpeq (vector unsigned int a1, vector unsigned int a2)
{
  return (vector signed int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_cmpeq (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpeq_ps ((vector float) a1, (vector float) a2);
}

/* vec_vcmpeqfp */

inline vector signed int
vec_vcmpeqfp (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpeq_ps ((vector float) a1, (vector float) a2);
}

/* vec_vcmpequw */

inline vector signed int
vec_vcmpequw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_vcmpequw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector signed int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vcmpequh */

inline vector signed short
vec_vcmpequh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_vcmpequh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector signed short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vcmpequb */

inline vector signed char
vec_vcmpequb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_vcmpequb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector signed char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_cmpge */

inline vector signed int
vec_cmpge (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpge_ps ((vector float) a1, (vector float) a2);
}

/* vec_cmpgt */

inline vector signed char
vec_cmpgt (vector unsigned char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_cmpgt (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_cmpgt (vector unsigned short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_cmpgt (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_cmpgt (vector unsigned int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_cmpgt (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_cmpgt (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpgt_ps ((vector float) a1, (vector float) a2);
}

/* vec_vcmpgtfp */

inline vector signed int
vec_vcmpgtfp (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpgt_ps ((vector float) a1, (vector float) a2);
}

/* vec_vcmpgtsw */

inline vector signed int
vec_vcmpgtsw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vcmpgtuw */

inline vector signed int
vec_vcmpgtuw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vcmpgtsh */

inline vector signed short
vec_vcmpgtsh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vcmpgtuh */

inline vector signed short
vec_vcmpgtuh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vcmpgtsb */

inline vector signed char
vec_vcmpgtsb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vcmpgtub */

inline vector signed char
vec_vcmpgtub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_cmple */

inline vector signed int
vec_cmple (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpge_ps ((vector float) a1, (vector float) a2);
}

/* vec_cmplt */

inline vector signed char
vec_cmplt (vector unsigned char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_cmpgt_epu8 ((vector signed char) a2, (vector signed char) a1);
}

inline vector signed char
vec_cmplt (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_cmpgt_epi8 ((vector signed char) a2, (vector signed char) a1);
}

inline vector signed short
vec_cmplt (vector unsigned short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_cmpgt_epu16 ((vector signed short) a2, (vector signed short) a1);
}

inline vector signed short
vec_cmplt (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_cmpgt_epi16 ((vector signed short) a2, (vector signed short) a1);
}

inline vector signed int
vec_cmplt (vector unsigned int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_cmpgt_epu32 ((vector signed int) a2, (vector signed int) a1);
}

inline vector signed int
vec_cmplt (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_cmpgt_epi32 ((vector signed int) a2, (vector signed int) a1);
}

inline vector signed int
vec_cmplt (vector float a1, vector float a2)
{
  return (vector signed int) _mm_cmpgt_ps ((vector float) a2, (vector float) a1);
}

/* vec_ctf */

inline vector float
vec_ctf (vector unsigned int a1, const char a2)
{
  return (vector float) __IA32_vcfux ((vector signed int) a1, a2);
}

inline vector float
vec_ctf (vector signed int a1, const char a2)
{
  return (vector float) __IA32_vcfsx ((vector signed int) a1, a2);
}

/* vec_vcfsx */

inline vector float
vec_vcfsx (vector signed int a1, const char a2)
{
  return (vector float) __IA32_vcfsx ((vector signed int) a1, a2);
}

/* vec_vcfux */

inline vector float
vec_vcfux (vector unsigned int a1, const char a2)
{
  return (vector float) __IA32_vcfux ((vector signed int) a1, a2);
}

/* vec_cts */

inline vector signed int
vec_cts (vector float a1, const char a2)
{
  return (vector signed int) __IA32_vctsxs ((vector float) a1, a2);
}

/* vec_ctu */

inline vector unsigned int
vec_ctu (vector float a1, const char a2)
{
  return (vector unsigned int) __IA32_vctuxs ((vector float) a1, a2);
}

/* vec_expte */

inline vector float
vec_expte (vector float a1)
{
  return (vector float) __IA32_vexptefp ((vector float) a1);
}

/* vec_floor */

inline vector float
vec_floor (vector float a1)
{
  return (vector float) __IA32_vrfim ((vector float) a1);
}

/* vec_ld */

inline vector float
vec_ld (int a1, vector float *a2)
{
  return (vector float) _mm_load_ps(V4SF_ADDRESS(a1, (void *) a2));
}

inline vector float
vec_ld (int a1, float *a2)
{
  return (vector float) _mm_load_ps(V4SF_ADDRESS(a1, (void *) a2));
}

inline vector signed int
vec_ld (int a1, vector signed int *a2)
{
  return (vector signed int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed int
vec_ld (int a1, signed int *a2)
{
  return (vector signed int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed int
vec_ld (int a1, signed long *a2)
{
  return (vector signed int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned int
vec_ld (int a1, vector unsigned int *a2)
{
  return (vector unsigned int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned int
vec_ld (int a1, unsigned int *a2)
{
  return (vector unsigned int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned int
vec_ld (int a1, unsigned long *a2)
{
  return (vector unsigned int) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed short
vec_ld (int a1, vector signed short *a2)
{
  return (vector signed short) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed short
vec_ld (int a1, signed short *a2)
{
  return (vector signed short) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned short
vec_ld (int a1, vector unsigned short *a2)
{
  return (vector unsigned short) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned short
vec_ld (int a1, unsigned short *a2)
{
  return (vector unsigned short) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed char
vec_ld (int a1, vector signed char *a2)
{
  return (vector signed char) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector signed char
vec_ld (int a1, signed char *a2)
{
  return (vector signed char) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned char
vec_ld (int a1, vector unsigned char *a2)
{
  return (vector unsigned char) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

inline vector unsigned char
vec_ld (int a1, unsigned char *a2)
{
  return (vector unsigned char) _mm_load_si128(V4SI_ADDRESS(a1, (void *) a2));
}

/* vec_lde */

inline vector signed char
vec_lde (int a1, signed char *a2)
{
  return (vector signed char) __IA32_lvesb (a1, a2);
}

inline vector unsigned char
vec_lde (int a1, unsigned char *a2)
{
  return (vector unsigned char) __IA32_lveub (a1, a2);
}

inline vector signed short
vec_lde (int a1, signed short *a2)
{
  return (vector signed short) __IA32_lvesh (a1, a2);
}

inline vector unsigned short
vec_lde (int a1, unsigned short *a2)
{
  return (vector unsigned short) __IA32_lveuh (a1, a2);
}

inline vector float
vec_lde (int a1, float *a2)
{
  return (vector float) __IA32_lvef (a1, a2);
}

inline vector signed int
vec_lde (int a1, signed int *a2)
{
  return (vector signed int) __IA32_lvesw (a1, a2);
}

inline vector unsigned int
vec_lde (int a1, unsigned int *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, a2);
}

inline vector signed int
vec_lde (int a1, signed long *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int *) a2);
}

inline vector unsigned int
vec_lde (int a1, unsigned long *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int *) a2);
}


inline vector signed char
vec_lde (int a1, vector signed char *a2)
{
  return (vector signed char) __IA32_lvesb (a1, (signed char *)a2);
}

inline vector unsigned char
vec_lde (int a1, vector unsigned char *a2)
{
  return (vector unsigned char) __IA32_lveub (a1, (unsigned char *)a2);
}

inline vector signed short
vec_lde (int a1, vector signed short *a2)
{
  return (vector signed short) __IA32_lvesh (a1, (signed short*)a2);
}

inline vector unsigned short
vec_lde (int a1, vector unsigned short *a2)
{
  return (vector unsigned short) __IA32_lveuh (a1, (unsigned short*)a2);
}

inline vector float
vec_lde (int a1, vector float *a2)
{
  return (vector float) __IA32_lvef (a1, (float *)a2);
}

inline vector signed int
vec_lde (int a1, vector signed int *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int*)a2);
}

inline vector unsigned int
vec_lde (int a1, vector unsigned int *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int*)a2);
}

inline vector signed int
vec_lde (int a1, vector signed long *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int *) a2);
}

inline vector unsigned int
vec_lde (int a1, vector unsigned long *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int *) a2);
}


/* vec_lvewx */

inline vector float
vec_lvewx (int a1, float *a2)
{
  return (vector float) __IA32_lvef (a1, a2);
}

inline vector signed int
vec_lvewx (int a1, signed int *a2)
{
  return (vector signed int) __IA32_lvesw (a1, a2);
}

inline vector unsigned int
vec_lvewx (int a1, unsigned int *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, a2);
}

inline vector signed int
vec_lvewx (int a1, signed long *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int *) a2);
}

inline vector unsigned int
vec_lvewx (int a1, unsigned long *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int *) a2);
}

inline vector float
vec_lvewx (int a1, vector float *a2)
{
  return (vector float) __IA32_lvef (a1, (float *)a2);
}

inline vector signed int
vec_lvewx (int a1, vector signed int *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int*)a2);
}

inline vector unsigned int
vec_lvewx (int a1, vector unsigned int *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int*)a2);
}

inline vector signed int
vec_lvewx (int a1, vector signed long *a2)
{
  return (vector signed int) __IA32_lvesw (a1, (signed int *) a2);
}

inline vector unsigned int
vec_lvewx (int a1, vector unsigned long *a2)
{
  return (vector unsigned int) __IA32_lveuw (a1, (unsigned int *) a2);
}

/* vec_lvehx */

inline vector signed short
vec_lvehx (int a1, signed short *a2)
{
  return (vector signed short) __IA32_lvesh (a1, a2);
}

inline vector unsigned short
vec_lvehx (int a1, unsigned short *a2)
{
  return (vector unsigned short) __IA32_lveuh (a1, a2);
}

inline vector signed short
vec_lvehx (int a1, vector signed short *a2)
{
  return (vector signed short) __IA32_lvesh (a1, (signed short*)a2);
}

inline vector unsigned short
vec_lvehx (int a1, vector unsigned short *a2)
{
  return (vector unsigned short) __IA32_lveuh (a1, (unsigned short*)a2);
}

/* vec_lvebx */

inline vector signed char
vec_lvebx (int a1, signed char *a2)
{
  return (vector signed char) __IA32_lvesb (a1, a2);
}

inline vector unsigned char
vec_lvebx (int a1, unsigned char *a2)
{
  return (vector unsigned char) __IA32_lveub (a1, a2);
}

inline vector signed char
vec_lvebx (int a1, vector signed char *a2)
{
  return (vector signed char) __IA32_lvesb (a1, (signed char*)a2);
}

inline vector unsigned char
vec_lvebx (int a1, vector unsigned char *a2)
{
  return (vector unsigned char) __IA32_lveub (a1, (unsigned char*)a2);
}

/* vec_loge */

inline vector float
vec_loge (vector float a1)
{
  return (vector float) __IA32_vlogefp ((vector float) a1);
}

/* vec_lvsl */

inline vector unsigned char
vec_lvsl (int a1, unsigned char *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, signed char *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, unsigned short *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, signed short *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, unsigned int *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, signed int *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, unsigned long *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, signed long *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsl (int a1, float *a2)
{
  return (vector unsigned char) __IA32_lvsl (a1, (void *) a2);
}

/* vec_lvsr */

inline vector unsigned char
vec_lvsr (int a1, unsigned char *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, signed char *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, unsigned short *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, signed short *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, unsigned int *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, signed int *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, unsigned long *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, signed long *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

inline vector unsigned char
vec_lvsr (int a1, float *a2)
{
  return (vector unsigned char) __IA32_lvsr (a1, (void *) a2);
}

/* vec_madd */

inline vector float
vec_madd (vector float a1, vector float a2, vector float a3)
{
  return (vector float) _mm_add_ps(_mm_mul_ps((vector float) a1, (vector float) a2), (vector float) a3);
}


/* vec_madds */

inline vector signed short
vec_madds (vector signed short a1, vector signed short a2, vector signed short a3)
{
  return (vector signed short) __IA32_vmhaddshs ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

/* vec_max */

inline vector unsigned char
vec_max (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_max (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_max (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_max (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmaxsb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned short
vec_max (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_max (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_max (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_max (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_max_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned int
vec_max (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_max (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_max (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_max (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmaxsw ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_max (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmaxfp ((vector float) a1, (vector float) a2);
}

/* vec_vmaxfp */

inline vector float
vec_vmaxfp (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmaxfp ((vector float) a1, (vector float) a2);
}

/* vec_vmaxsw */

inline vector signed int
vec_vmaxsw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmaxsw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmaxuw */

inline vector unsigned int
vec_vmaxuw (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vmaxuw (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vmaxuw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmaxuw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmaxsh */

inline vector signed short
vec_vmaxsh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_max_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmaxuh */

inline vector unsigned short
vec_vmaxuh (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vmaxuh (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vmaxuh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmaxuh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmaxsb */

inline vector signed char
vec_vmaxsb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmaxsb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vmaxub */

inline vector unsigned char
vec_vmaxub (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vmaxub (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vmaxub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_max_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_mergeh */

inline vector signed char
vec_mergeh (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmrghb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_mergeh (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vmrghb ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_mergeh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vmrghh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_mergeh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmrghh ((vector signed short) a1, (vector signed short) a2);
}

inline vector float
vec_mergeh (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmrghf ((vector float) a1, (vector float) a2);
}

inline vector signed int
vec_mergeh (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmrghw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_mergeh (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmrghw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmrghw */

inline vector float
vec_vmrghw (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmrghf ((vector float) a1, (vector float) a2);
}

inline vector signed int
vec_vmrghw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmrghw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vmrghw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmrghw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmrghh */

inline vector signed short
vec_vmrghh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vmrghh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vmrghh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmrghh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmrghb */

inline vector signed char
vec_vmrghb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmrghb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vmrghb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vmrghb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_mergel */

inline vector signed char
vec_mergel (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmrglb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_mergel (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vmrglb ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_mergel (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vmrglh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_mergel (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmrglh ((vector signed short) a1, (vector signed short) a2);
}

inline vector float
vec_mergel (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmrglf ((vector float) a1, (vector float) a2);
}

inline vector signed int
vec_mergel (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmrglw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_mergel (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmrglw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmrglw */

inline vector float
vec_vmrglw (vector float a1, vector float a2)
{
  return (vector float) __IA32_vmrglf ((vector float) a1, (vector float) a2);
}

inline vector signed int
vec_vmrglw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vmrglw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vmrglw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vmrglw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vmrglh */

inline vector signed short
vec_vmrglh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vmrglh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vmrglh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vmrglh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmrglb */

inline vector signed char
vec_vmrglb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vmrglb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vmrglb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vmrglb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_min */

inline vector unsigned char
vec_min (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_min (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_min (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_min (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vminsb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned short
vec_min (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_min (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_min (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_min (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_min_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned int
vec_min (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_min (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_min (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_min (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vminsw ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_min (vector float a1, vector float a2)
{
  return (vector float) __IA32_vminfp ((vector float) a1, (vector float) a2);
}

/* vec_vminfp */

inline vector float
vec_vminfp (vector float a1, vector float a2)
{
  return (vector float) __IA32_vminfp ((vector float) a1, (vector float) a2);
}

/* vec_vminsw */

inline vector signed int
vec_vminsw (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vminsw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vminuw */

inline vector unsigned int
vec_vminuw (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vminuw (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vminuw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vminuw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vminsh */

inline vector signed short
vec_vminsh (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_min_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vminuh */

inline vector unsigned short
vec_vminuh (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vminuh (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vminuh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vminuh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vminsb */

inline vector signed char
vec_vminsb (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vminsb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vminub */

inline vector unsigned char
vec_vminub (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vminub (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vminub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_min_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_mladd */

inline vector signed short
vec_mladd (vector signed short a1, vector signed short a2, vector signed short a3)
{
  return (vector signed short) __IA32_vmladduhm ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

inline vector signed short
vec_mladd (vector signed short a1, vector unsigned short a2, vector unsigned short a3)
{
  return (vector signed short) __IA32_vmladduhm ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

inline vector signed short
vec_mladd (vector unsigned short a1, vector signed short a2, vector signed short a3)
{
  return (vector signed short) __IA32_vmladduhm ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

inline vector unsigned short
vec_mladd (vector unsigned short a1, vector unsigned short a2, vector unsigned short a3)
{
  return (vector unsigned short) __IA32_vmladduhm ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

/* vec_mradds */

inline vector signed short
vec_mradds (vector signed short a1, vector signed short a2, vector signed short a3)
{
  return (vector signed short) __IA32_vmhraddshs ((vector signed short) a1, (vector signed short) a2, (vector signed short) a3);
}

/* vec_msum */

inline vector unsigned int
vec_msum (vector unsigned char a1, vector unsigned char a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumubm ((vector signed char) a1, (vector signed char) a2, (vector signed int) a3);
}

inline vector signed int
vec_msum (vector signed char a1, vector unsigned char a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsummbm ((vector signed char) a1, (vector signed char) a2, (vector signed int) a3);
}

inline vector unsigned int
vec_msum (vector unsigned short a1, vector unsigned short a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumuhm ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

inline vector signed int
vec_msum (vector signed short a1, vector signed short a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsumshm ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_vmsumshm */

inline vector signed int
vec_vmsumshm (vector signed short a1, vector signed short a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsumshm ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_vmsumuhm */

inline vector unsigned int
vec_vmsumuhm (vector unsigned short a1, vector unsigned short a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumuhm ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_vmsummbm */

inline vector signed int
vec_vmsummbm (vector signed char a1, vector unsigned char a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsummbm ((vector signed char) a1, (vector signed char) a2, (vector signed int) a3);
}

/* vec_vmsumubm */

inline vector unsigned int
vec_vmsumubm (vector unsigned char a1, vector unsigned char a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumubm ((vector signed char) a1, (vector signed char) a2, (vector signed int) a3);
}

/* vec_msums */

inline vector unsigned int
vec_msums (vector unsigned short a1, vector unsigned short a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumuhs ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

inline vector signed int
vec_msums (vector signed short a1, vector signed short a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsumshs ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_vmsumshs */

inline vector signed int
vec_vmsumshs (vector signed short a1, vector signed short a2, vector signed int a3)
{
  return (vector signed int) __IA32_vmsumshs ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_vmsumuhs */

inline vector unsigned int
vec_vmsumuhs (vector unsigned short a1, vector unsigned short a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vmsumuhs ((vector signed short) a1, (vector signed short) a2, (vector signed int) a3);
}

/* vec_mule */

inline vector unsigned short
vec_mule (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vmuleub ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_mule (vector signed char a1, vector signed char a2)
{
  return (vector signed short) __IA32_vmulesb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned int
vec_mule (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vmuleuh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_mule (vector signed short a1, vector signed short a2)
{
  return (vector signed int) __IA32_vmulesh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmulesh */

inline vector signed int
vec_vmulesh (vector signed short a1, vector signed short a2)
{
  return (vector signed int) __IA32_vmulesh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmulesb */

inline vector signed int
vec_vmulesb (vector signed short a1, vector signed short a2)
{
  return (vector signed int) __IA32_vmulesb ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmuleuh */

inline vector unsigned int
vec_vmuleuh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vmuleuh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmuleub */
inline vector unsigned short
vec_vmuleub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vmuleub ((vector signed char) a1, (vector signed char) a2);
}

/* vec_mulo */

inline vector unsigned short
vec_mulo (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vmuloub ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_mulo (vector signed char a1, vector signed char a2)
{
  return (vector signed short) __IA32_vmulosb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned int
vec_mulo (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vmulouh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_mulo (vector signed short a1, vector signed short a2)
{
  return (vector signed int) __IA32_vmulosh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmulosh */

inline vector signed int
vec_vmulosh (vector signed short a1, vector signed short a2)
{
  return (vector signed int) __IA32_vmulosh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmulouh */

inline vector unsigned int
vec_vmulouh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vmulouh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vmulosb */

inline vector signed short
vec_vmulosb (vector signed char a1, vector signed char a2)
{
  return (vector signed short) __IA32_vmulosb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vmuloub */

inline vector unsigned short
vec_vmuloub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vmuloub ((vector signed char) a1, (vector signed char) a2);
}

/* vec_nmsub */

inline vector float
vec_nmsub (vector float a1, vector float a2, vector float a3)
{
  return (vector float) _mm_sub_ps((vector float) a3, _mm_mul_ps((vector float) a1, (vector float) a2));
}

/* vec_nor */

inline vector float
vec_nor (vector float a1, vector float a2)
{
  return (vector float) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_nor (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_nor (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_nor (vector signed short a1, vector signed short a2)
{
  return (vector signed short) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_nor (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_nor (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_nor (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vnor ((vector signed int) a1, (vector signed int) a2);
}

/* vec_or */

inline vector float
vec_or (vector float a1, vector float a2)
{
  return (vector float) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_or (vector float a1, vector signed int a2)
{
  return (vector float) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_or (vector signed int a1, vector float a2)
{
  return (vector float) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_or (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_or (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_or (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_or (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_or (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_or (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_or (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_or (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_or (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_or (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_or (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_or (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_or_si128 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_pack */

inline vector signed char
vec_pack (vector signed short a1, vector signed short a2)
{
  return (vector signed char) __IA32_vpkuhum ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned char
vec_pack (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vpkuhum ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_pack (vector signed int a1, vector signed int a2)
{
  return (vector signed short) __IA32_vpkuwum ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_pack (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkuwum ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkuwum */

inline vector signed short
vec_vpkuwum (vector signed int a1, vector signed int a2)
{
  return (vector signed short) __IA32_vpkuwum ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_vpkuwum (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkuwum ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkuhum */

inline vector signed char
vec_vpkuhum (vector signed short a1, vector signed short a2)
{
  return (vector signed char) __IA32_vpkuhum ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned char
vec_vpkuhum (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vpkuhum ((vector signed short) a1, (vector signed short) a2);
}

/* vec_packpx */

inline vector unsigned short
vec_packpx (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkpx ((vector signed int) a1, (vector signed int) a2);
}

/* vec_packs */

inline vector unsigned char
vec_packs (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vpkuhus ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed char
vec_packs (vector signed short a1, vector signed short a2)
{
  return (vector signed char) __IA32_vpkshss ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_packs (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkuwus ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_packs (vector signed int a1, vector signed int a2)
{
  return (vector signed short) __IA32_vpkswss ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkswss */

inline vector signed short
vec_vpkswss (vector signed int a1, vector signed int a2)
{
  return (vector signed short) __IA32_vpkswss ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkuwus */

inline vector unsigned short
vec_vpkuwus (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkuwus ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkshss */

inline vector signed char
vec_vpkshss (vector signed short a1, vector signed short a2)
{
  return (vector signed char) __IA32_vpkshss ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vpkuhus */

inline vector unsigned char
vec_vpkuhus (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vpkuhus ((vector signed short) a1, (vector signed short) a2);
}

/* vec_packsu */

inline vector unsigned char
vec_packsu (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vpkuhus ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned char
vec_packsu (vector signed short a1, vector signed short a2)
{
  return (vector unsigned char) __IA32_vpkshus ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_packsu (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vpkuwus ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_packsu (vector signed int a1, vector signed int a2)
{
  return (vector unsigned short) __IA32_vpkswus ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkswus */

inline vector unsigned short
vec_vpkswus (vector signed int a1, vector signed int a2)
{
  return (vector unsigned short) __IA32_vpkswus ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vpkshus */

inline vector unsigned char
vec_vpkshus (vector signed short a1, vector signed short a2)
{
  return (vector unsigned char) __IA32_vpkshus ((vector signed short) a1, (vector signed short) a2);
}

/* vec_perm */

inline vector float
vec_perm (vector float a1, vector float a2, vector unsigned char a3)
{
  return (vector float) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector signed int
vec_perm (vector signed int a1, vector signed int a2, vector unsigned char a3)
{
  return (vector signed int) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector unsigned int
vec_perm (vector unsigned int a1, vector unsigned int a2, vector unsigned char a3)
{
  return (vector unsigned int) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector signed short
vec_perm (vector signed short a1, vector signed short a2, vector unsigned char a3)
{
  return (vector signed short) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector unsigned short
vec_perm (vector unsigned short a1, vector unsigned short a2, vector unsigned char a3)
{
  return (vector unsigned short) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector signed char
vec_perm (vector signed char a1, vector signed char a2, vector unsigned char a3)
{
  return (vector signed char) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

inline vector unsigned char
vec_perm (vector unsigned char a1, vector unsigned char a2, vector unsigned char a3)
{
  return (vector unsigned char) __IA32_vperm_4si ((vector signed int) a1, (vector signed int) a2, (vector signed char) a3);
}

/* vec_re */

inline vector float
vec_re (vector float a1)
{
  return (vector float) _mm_rcp_ps ((vector float) a1);
}

/* vec_rl */

inline vector signed char
vec_rl (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vrlb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_rl (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vrlb ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_rl (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vrlh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_rl (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vrlh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_rl (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vrlw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_rl (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vrlw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vrlw */

inline vector signed int
vec_vrlw (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vrlw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vrlw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vrlw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vrlh */

inline vector signed short
vec_vrlh (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vrlh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vrlh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vrlh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vrlb */

inline vector signed char
vec_vrlb (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vrlb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vrlb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vrlb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_round */

inline vector float
vec_round (vector float a1)
{
  return (vector float) __IA32_vrfin ((vector float) a1);
}

/* vec_rsqrte */

inline vector float
vec_rsqrte (vector float a1)
{
  return (vector float) _mm_rsqrt_ps ((vector float) a1);
}

/* vec_sel */

inline vector float
vec_sel (vector float a1, vector float a2, vector signed int a3)
{
  return (vector float) __IA32_vsel_4sf ((vector float) a1, (vector float) a2, (vector float) a3);
}

inline vector float
vec_sel (vector float a1, vector float a2, vector unsigned int a3)
{
  return (vector float) __IA32_vsel_4sf ((vector float) a1, (vector float) a2, (vector float) a3);
}

inline vector signed int
vec_sel (vector signed int a1, vector signed int a2, vector signed int a3)
{
  return (vector signed int) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector signed int
vec_sel (vector signed int a1, vector signed int a2, vector unsigned int a3)
{
  return (vector signed int) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned int
vec_sel (vector unsigned int a1, vector unsigned int a2, vector signed int a3)
{
  return (vector unsigned int) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned int
vec_sel (vector unsigned int a1, vector unsigned int a2, vector unsigned int a3)
{
  return (vector unsigned int) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector signed short
vec_sel (vector signed short a1, vector signed short a2, vector signed short a3)
{
  return (vector signed short) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector signed short
vec_sel (vector signed short a1, vector signed short a2, vector unsigned short a3)
{
  return (vector signed short) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned short
vec_sel (vector unsigned short a1, vector unsigned short a2, vector signed short a3)
{
  return (vector unsigned short) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned short
vec_sel (vector unsigned short a1, vector unsigned short a2, vector unsigned short a3)
{
  return (vector unsigned short) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector signed char
vec_sel (vector signed char a1, vector signed char a2, vector signed char a3)
{
  return (vector signed char) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector signed char
vec_sel (vector signed char a1, vector signed char a2, vector unsigned char a3)
{
  return (vector signed char) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned char
vec_sel (vector unsigned char a1, vector unsigned char a2, vector signed char a3)
{
  return (vector unsigned char) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

inline vector unsigned char
vec_sel (vector unsigned char a1, vector unsigned char a2, vector unsigned char a3)
{
  return (vector unsigned char) __IA32_vsel_4si ((vector signed int) a1, (vector signed int) a2, (vector signed int) a3);
}

/* vec_sl */

inline vector signed char
vec_sl (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vslb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sl (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vslb ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_sl (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vslh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sl (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vslh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_sl (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vslw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sl (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vslw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vslw */

inline vector signed int
vec_vslw (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vslw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vslw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vslw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vslh */

inline vector signed short
vec_vslh (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vslh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vslh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vslh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vslb */

inline vector signed char
vec_vslb (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vslb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vslb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vslb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_sld */

inline vector float
vec_sld (vector float a1, vector float a2, const char a3)
{
  return (vector float) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector signed int
vec_sld (vector signed int a1, vector signed int a2, const char a3)
{
  return (vector signed int) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector unsigned int
vec_sld (vector unsigned int a1, vector unsigned int a2, const char a3)
{
  return (vector unsigned int) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector signed short
vec_sld (vector signed short a1, vector signed short a2, const char a3)
{
  return (vector signed short) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector unsigned short
vec_sld (vector unsigned short a1, vector unsigned short a2, const char a3)
{
  return (vector unsigned short) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector signed char
vec_sld (vector signed char a1, vector signed char a2, const char a3)
{
  return (vector signed char) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

inline vector unsigned char
vec_sld (vector unsigned char a1, vector unsigned char a2, const char a3)
{
  return (vector unsigned char) __IA32_vsldoi_4si ((vector signed int) a1, (vector signed int) a2, a3);
}

/* vec_sll */

inline vector signed int
vec_sll (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_sll (vector signed int a1, vector unsigned short a2)
{
  return (vector signed int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_sll (vector signed int a1, vector unsigned char a2)
{
  return (vector signed int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sll (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sll (vector unsigned int a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sll (vector unsigned int a1, vector unsigned char a2)
{
  return (vector unsigned int) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_sll (vector signed short a1, vector unsigned int a2)
{
  return (vector signed short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_sll (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_sll (vector signed short a1, vector unsigned char a2)
{
  return (vector signed short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_sll (vector unsigned short a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_sll (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_sll (vector unsigned short a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_sll (vector signed char a1, vector unsigned int a2)
{
  return (vector signed char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_sll (vector signed char a1, vector unsigned short a2)
{
  return (vector signed char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_sll (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_sll (vector unsigned char a1, vector unsigned int a2)
{
  return (vector unsigned char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_sll (vector unsigned char a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_sll (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsl ((vector signed int) a1, (vector signed int) a2);
}

/* vec_slo */

inline vector float
vec_slo (vector float a1, vector signed char a2)
{
  return (vector float) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_slo (vector float a1, vector unsigned char a2)
{
  return (vector float) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_slo (vector signed int a1, vector signed char a2)
{
  return (vector signed int) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_slo (vector signed int a1, vector unsigned char a2)
{
  return (vector signed int) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_slo (vector unsigned int a1, vector signed char a2)
{
  return (vector unsigned int) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_slo (vector unsigned int a1, vector unsigned char a2)
{
  return (vector unsigned int) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_slo (vector signed short a1, vector signed char a2)
{
  return (vector signed short) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_slo (vector signed short a1, vector unsigned char a2)
{
  return (vector signed short) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_slo (vector unsigned short a1, vector signed char a2)
{
  return (vector unsigned short) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_slo (vector unsigned short a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_slo (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_slo (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_slo (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_slo (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vslo ((vector signed int) a1, (vector signed int) a2);
}

/* vec_splat */

inline vector signed char
vec_splat (vector signed char a1, const char a2)
{
  return (vector signed char) __IA32_vspltb ((vector signed char) a1,  a2);
}

inline vector unsigned char
vec_splat (vector unsigned char a1, const char a2)
{
  return (vector unsigned char) __IA32_vspltb ((vector signed char) a1,  a2);
}

inline vector signed short
vec_splat (vector signed short a1, const char a2)
{
  return (vector signed short) __IA32_vsplth ((vector signed short) a1,  a2);
}

inline vector unsigned short
vec_splat (vector unsigned short a1, const char a2)
{
  return (vector unsigned short) __IA32_vsplth ((vector signed short) a1,  a2);
}

inline vector float
vec_splat (vector float a1, const char a2)
{
  return (vector float) __IA32_vspltf ((vector float) a1,  a2);
}

inline vector signed int
vec_splat (vector signed int a1, const char a2)
{
  return (vector signed int) __IA32_vspltw ((vector signed int) a1,  a2);
}

inline vector unsigned int
vec_splat (vector unsigned int a1, const char a2)
{
  return (vector unsigned int) __IA32_vspltw ((vector signed int) a1,  a2);
}

/* vec_vspltw */

inline vector float
vec_vspltw (vector float a1, const char a2)
{
  return (vector float) __IA32_vspltf ((vector float) a1,  a2);
}

inline vector signed int
vec_vspltw (vector signed int a1, const char a2)
{
  return (vector signed int) __IA32_vspltw ((vector signed int) a1,  a2);
}

inline vector unsigned int
vec_vspltw (vector unsigned int a1, const char a2)
{
  return (vector unsigned int) __IA32_vspltw ((vector signed int) a1,  a2);
}

/* vec_vsplth */

inline vector signed short
vec_vsplth (vector signed short a1, const char a2)
{
  return (vector signed short) __IA32_vsplth ((vector signed short) a1,  a2);
}

inline vector unsigned short
vec_vsplth (vector unsigned short a1, const char a2)
{
  return (vector unsigned short) __IA32_vsplth ((vector signed short) a1,  a2);
}

/* vec_vspltb */

inline vector signed char
vec_vspltb (vector signed char a1, const char a2)
{
  return (vector signed char) __IA32_vspltb ((vector signed char) a1,  a2);
}

inline vector unsigned char
vec_vspltb (vector unsigned char a1, const char a2)
{
  return (vector unsigned char) __IA32_vspltb ((vector signed char) a1,  a2);
}

/* vec_splat_s8 */

inline vector signed char
vec_splat_s8 (const char a1)
{
  return (vector signed char) _mm_set1_epi8(a1);
}

/* vec_splat_s16 */

inline vector signed short
vec_splat_s16 (const char a1)
{
  return (vector signed short) _mm_set1_epi16(a1);
}

/* vec_splat_s32 */

inline vector signed int
vec_splat_s32 (const char a1)
{
  return (vector signed int) _mm_set1_epi32(a1);
}

/* vec_splat_u8 */

inline vector unsigned char
vec_splat_u8 (const char a1)
{
  return (vector unsigned char) _mm_set1_epi8(a1 & 0x1F);
}

/* vec_splat_u16 */

inline vector unsigned short
vec_splat_u16 (const char a1)
{
  return (vector unsigned short) _mm_set1_epi16(a1 & 0x1F);
}

/* vec_splat_u32 */

inline vector unsigned int
vec_splat_u32 (const char a1)
{
  return (vector unsigned int) _mm_set1_epi32(a1 & 0x1F);
}

/* vec_sr */

inline vector signed char
vec_sr (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsrb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sr (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsrb ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_sr (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsrh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sr (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsrh ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_sr (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsrw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sr (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsrw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsrw */

inline vector signed int
vec_vsrw (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsrw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsrw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsrw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsrh */

inline vector signed short
vec_vsrh (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsrh ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsrh (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsrh ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vsrb */

inline vector signed char
vec_vsrb (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsrb ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsrb (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsrb ((vector signed char) a1, (vector signed char) a2);
}

/* vec_sra */

inline vector signed char
vec_sra (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsrab ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sra (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsrab ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_sra (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsrah ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sra (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsrah ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_sra (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsraw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sra (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsraw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsraw */

inline vector signed int
vec_vsraw (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsraw ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsraw (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsraw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsrah */

inline vector signed short
vec_vsrah (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsrah ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsrah (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsrah ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vsrab */

inline vector signed char
vec_vsrab (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsrab ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsrab (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsrab ((vector signed char) a1, (vector signed char) a2);
}

/* vec_srl */

inline vector signed int
vec_srl (vector signed int a1, vector unsigned int a2)
{
  return (vector signed int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_srl (vector signed int a1, vector unsigned short a2)
{
  return (vector signed int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_srl (vector signed int a1, vector unsigned char a2)
{
  return (vector signed int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_srl (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_srl (vector unsigned int a1, vector unsigned short a2)
{
  return (vector unsigned int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_srl (vector unsigned int a1, vector unsigned char a2)
{
  return (vector unsigned int) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_srl (vector signed short a1, vector unsigned int a2)
{
  return (vector signed short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_srl (vector signed short a1, vector unsigned short a2)
{
  return (vector signed short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_srl (vector signed short a1, vector unsigned char a2)
{
  return (vector signed short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_srl (vector unsigned short a1, vector unsigned int a2)
{
  return (vector unsigned short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_srl (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_srl (vector unsigned short a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_srl (vector signed char a1, vector unsigned int a2)
{
  return (vector signed char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_srl (vector signed char a1, vector unsigned short a2)
{
  return (vector signed char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_srl (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_srl (vector unsigned char a1, vector unsigned int a2)
{
  return (vector unsigned char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_srl (vector unsigned char a1, vector unsigned short a2)
{
  return (vector unsigned char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_srl (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsr ((vector signed int) a1, (vector signed int) a2);
}

/* vec_sro */

inline vector float
vec_sro (vector float a1, vector signed char a2)
{
  return (vector float) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_sro (vector float a1, vector unsigned char a2)
{
  return (vector float) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_sro (vector signed int a1, vector signed char a2)
{
  return (vector signed int) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_sro (vector signed int a1, vector unsigned char a2)
{
  return (vector signed int) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sro (vector unsigned int a1, vector signed char a2)
{
  return (vector unsigned int) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sro (vector unsigned int a1, vector unsigned char a2)
{
  return (vector unsigned int) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_sro (vector signed short a1, vector signed char a2)
{
  return (vector signed short) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_sro (vector signed short a1, vector unsigned char a2)
{
  return (vector signed short) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_sro (vector unsigned short a1, vector signed char a2)
{
  return (vector unsigned short) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_sro (vector unsigned short a1, vector unsigned char a2)
{
  return (vector unsigned short) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_sro (vector signed char a1, vector signed char a2)
{
  return (vector signed char) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_sro (vector signed char a1, vector unsigned char a2)
{
  return (vector signed char) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_sro (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_sro (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) __IA32_vsro ((vector signed int) a1, (vector signed int) a2);
}

/* vec_st */

inline void
vec_st (vector float a1, int a2, void *a3)
{
  _mm_store_ps(V4SF_ADDRESS(a2, (void *)a3), (vector float) a1);
}

inline void
vec_st (vector signed int a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

inline void
vec_st (vector unsigned int a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

inline void
vec_st (vector signed short a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

inline void
vec_st (vector unsigned short a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

inline void
vec_st (vector signed char a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

inline void
vec_st (vector unsigned char a1, int a2, void *a3)
{
  _mm_store_si128(V4SI_ADDRESS(a2, (void *)a3), (vector signed int) a1);
}

/* vec_ste */

inline void
vec_ste (vector signed char a1, int a2, void *a3)
{
  __IA32_stvesb ((vector signed char) a1, a2, (signed char *) a3);
}

inline void
vec_ste (vector unsigned char a1, int a2, void *a3)
{
  __IA32_stveub ((vector unsigned char) a1, a2, (unsigned char *) a3);
}

inline void
vec_ste (vector signed short a1, int a2, void *a3)
{
  __IA32_stvesh ((vector signed short) a1, a2, (signed short *) a3);
}

inline void
vec_ste (vector unsigned short a1, int a2, void *a3)
{
  __IA32_stveuh ((vector unsigned short) a1, a2, (unsigned short *) a3);
}

inline void
vec_ste (vector float a1, int a2, void *a3)
{
  __IA32_stvef ((vector float) a1, a2, (float *) a3);
}

inline void
vec_ste (vector signed int a1, int a2, void *a3)
{
  __IA32_stvesw ((vector signed int) a1, a2, (signed int *) a3);
}

inline void
vec_ste (vector unsigned int a1, int a2, void *a3)
{
  __IA32_stveuw ((vector unsigned int) a1, a2, (unsigned int *) a3);
}

/* vec_stvewx */

inline void
vec_stvewx (vector float a1, int a2, void *a3)
{
  __IA32_stvef ((vector float) a1, a2, (float *) a3);
}

inline void
vec_stvewx (vector signed int a1, int a2, void *a3)
{
  __IA32_stvesw ((vector signed int) a1, a2, (signed int *) a3);
}

inline void
vec_stvewx (vector unsigned int a1, int a2, void *a3)
{
  __IA32_stveuw ((vector unsigned int) a1, a2, (unsigned int *) a3);
}

/* vec_stvehx */

inline void
vec_stvehx (vector signed short a1, int a2, void *a3)
{
  __IA32_stvesh ((vector signed short) a1, a2, (signed short *) a3);
}

inline void
vec_stvehx (vector unsigned short a1, int a2, void *a3)
{
  __IA32_stveuh ((vector unsigned short) a1, a2, (unsigned short *) a3);
}

/* vec_stvebx */

inline void
vec_stvebx (vector signed char a1, int a2, void *a3)
{
  __IA32_stvesb ((vector signed char) a1, a2, (signed char *) a3);
}

inline void
vec_stvebx (vector unsigned char a1, int a2, void *a3)
{
  __IA32_stveub ((vector unsigned char) a1, a2, (unsigned char *) a3);
}

/* vec_sub */

inline vector signed char
vec_sub (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sub (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sub (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_sub (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed short
vec_sub (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sub (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sub (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_sub (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed int
vec_sub (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sub (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sub (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_sub (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_sub (vector float a1, vector float a2)
{
  return (vector float) _mm_sub_ps ((vector float) a1, (vector float) a2);
}

/* vec_vsubfp */

inline vector float
vec_vsubfp (vector float a1, vector float a2)
{
  return (vector float) _mm_sub_ps ((vector float) a1, (vector float) a2);
}

/* vec_vsubuwm */

inline vector signed int
vec_vsubuwm (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsubuwm (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsubuwm (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsubuwm (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_sub_epi32 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsubuhm */

inline vector signed short
vec_vsubuhm (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsubuhm (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsubuhm (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsubuhm (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_sub_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vsububm */

inline vector signed char
vec_vsububm (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsububm (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsububm (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsububm (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_sub_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_subc */

inline vector unsigned int
vec_subc (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsubcuw ((vector signed int) a1, (vector signed int) a2);
}

/* vec_subs */

inline vector unsigned char
vec_subs (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_subs (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_subs (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector signed char
vec_subs (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_subs_epi8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned short
vec_subs (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_subs (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_subs (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector signed short
vec_subs (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_subs_epi16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned int
vec_subs (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_subs (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_subs (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_subs (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsubsws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsubsws */

inline vector signed int
vec_vsubsws (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsubsws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsubuws */

inline vector unsigned int
vec_vsubuws (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsubuws (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_vsubuws (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsubuws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_vsubshs */

inline vector signed short
vec_vsubshs (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_subs_epi16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vsubuhs */

inline vector unsigned short
vec_vsubuhs (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsubuhs (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

inline vector unsigned short
vec_vsubuhs (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_subs_epu16 ((vector signed short) a1, (vector signed short) a2);
}

/* vec_vsubsbs */

inline vector signed char
vec_vsubsbs (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_subs_epi8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_vsububs */

inline vector unsigned char
vec_vsububs (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsububs (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

inline vector unsigned char
vec_vsububs (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_subs_epu8 ((vector signed char) a1, (vector signed char) a2);
}

/* vec_sum4s */

inline vector unsigned int
vec_sum4s (vector unsigned char a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsum4ubs ((vector signed char) a1, (vector signed int) a2);
}

inline vector signed int
vec_sum4s (vector signed char a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsum4sbs ((vector signed char) a1, (vector signed int) a2);
}

inline vector signed int
vec_sum4s (vector signed short a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsum4shs ((vector signed short) a1, (vector signed int) a2);
}

/* vec_vsum4shs */

inline vector signed int
vec_vsum4shs (vector signed short a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsum4shs ((vector signed short) a1, (vector signed int) a2);
}

/* vec_vsum4sbs */

inline vector signed int
vec_vsum4sbs (vector signed char a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsum4sbs ((vector signed char) a1, (vector signed int) a2);
}

/* vec_vsum4ubs */

inline vector unsigned int
vec_vsum4ubs (vector unsigned char a1, vector unsigned int a2)
{
  return (vector unsigned int) __IA32_vsum4ubs ((vector signed char) a1, (vector signed int) a2);
}

/* vec_sum2s */

inline vector signed int
vec_sum2s (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsum2sws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_sums */

inline vector signed int
vec_sums (vector signed int a1, vector signed int a2)
{
  return (vector signed int) __IA32_vsumsws ((vector signed int) a1, (vector signed int) a2);
}

/* vec_trunc */

inline vector float
vec_trunc (vector float a1)
{
  return (vector float) __IA32_vrfiz ((vector float) a1);
}

/* vec_unpackh */
inline vector signed short
vec_unpackh (vector signed char a1)
{
  return (vector signed short) __IA32_vupkhsb ((vector signed char) SWAP32(a1));
}

inline vector unsigned int
vec_unpackh (vector unsigned short a1)
{
  return (vector unsigned int) __IA32_vupkhpx ((vector signed short) a1);
}

inline vector signed int
vec_unpackh (vector signed short a1)
{
  return (vector signed int) __IA32_vupkhsh ((vector signed short) SWAP32(a1));
}

/* vec_vupkhsh */

inline vector signed int
vec_vupkhsh (vector signed short a1)
{
  return (vector signed int) __IA32_vupkhsh ((vector signed short) a1);
}

/* vec_vupkhpx */

inline vector unsigned int
vec_vupkhpx (vector unsigned short a1)
{
  return (vector unsigned int) __IA32_vupkhpx ((vector signed short) a1);
}

/* vec_vupkhsb */

inline vector signed short
vec_vupkhsb (vector signed char a1)
{
  return (vector signed short) __IA32_vupkhsb ((vector signed char) a1);
}

/* vec_unpackl */

inline vector signed short
vec_unpackl (vector signed char a1)
{
  return (vector signed short) __IA32_vupklsb ((vector signed char) SWAP32(a1));
}

inline vector unsigned int
vec_unpackl (vector unsigned short a1)
{
  return (vector unsigned int) __IA32_vupklpx ((vector signed short) a1);
}

inline vector signed int
vec_unpackl (vector signed short a1)
{
  return (vector signed int) __IA32_vupklsh ((vector signed short) SWAP32(a1));
}

/* vec_vupklpx */

inline vector unsigned int
vec_vupklpx (vector unsigned short a1)
{
  return (vector unsigned int) __IA32_vupklpx ((vector signed short) a1);
}

/* vec_upklsh */

inline vector signed int
vec_vupklsh (vector signed short a1)
{
  return (vector signed int) __IA32_vupklsh ((vector signed short) a1);
}

/* vec_vupklsb */

inline vector signed short
vec_vupklsb (vector signed char a1)
{
  return (vector signed short) __IA32_vupklsb ((vector signed char) a1);
}

/* vec_xor */

inline vector float
vec_xor (vector float a1, vector float a2)
{
  return (vector float) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_xor (vector float a1, vector signed int a2)
{
  return (vector float) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector float
vec_xor (vector signed int a1, vector float a2)
{
  return (vector float) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed int
vec_xor (vector signed int a1, vector signed int a2)
{
  return (vector signed int) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_xor (vector signed int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_xor (vector unsigned int a1, vector signed int a2)
{
  return (vector unsigned int) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned int
vec_xor (vector unsigned int a1, vector unsigned int a2)
{
  return (vector unsigned int) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed short
vec_xor (vector signed short a1, vector signed short a2)
{
  return (vector signed short) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_xor (vector signed short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_xor (vector unsigned short a1, vector signed short a2)
{
  return (vector unsigned short) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned short
vec_xor (vector unsigned short a1, vector unsigned short a2)
{
  return (vector unsigned short) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector signed char
vec_xor (vector signed char a1, vector signed char a2)
{
  return (vector signed char) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_xor (vector signed char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_xor (vector unsigned char a1, vector signed char a2)
{
  return (vector unsigned char) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

inline vector unsigned char
vec_xor (vector unsigned char a1, vector unsigned char a2)
{
  return (vector unsigned char) _mm_xor_si128 ((vector signed int) a1, (vector signed int) a2);
}

/* vec_all_eq */

inline int
vec_all_eq (vector signed char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmpeq_epi8 (a1, (vector signed char) a2));
}

inline int
vec_all_eq (vector signed char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmpeq_epi8 (a1, a2));
}

inline int
vec_all_eq (vector unsigned char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_eq (vector unsigned char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_eq (vector signed short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_eq (vector signed short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_eq (vector unsigned short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_eq (vector unsigned short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_eq (vector signed int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_eq (vector signed int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_eq (vector unsigned int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_eq (vector unsigned int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_eq (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpeq_ps (a1, a2));
}

/* vec_all_ge */

inline int
vec_all_ge (vector signed char a1, vector unsigned char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ge (vector unsigned char a1, vector signed char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ge (vector unsigned char a1, vector unsigned char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ge (vector signed char a1, vector signed char a2)
{
  return all_false_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ge (vector signed short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ge (vector unsigned short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ge (vector unsigned short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ge (vector signed short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ge (vector signed int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ge (vector unsigned int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ge (vector unsigned int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ge (vector signed int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ge (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpge_ps (a1, a2));
}

/* vec_all_gt */

inline int
vec_all_gt (vector signed char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_gt (vector unsigned char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_gt (vector unsigned char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_gt (vector signed char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_gt (vector signed short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_gt (vector unsigned short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_gt (vector unsigned short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_gt (vector signed short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_gt (vector signed int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_gt (vector unsigned int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_gt (vector unsigned int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_gt (vector signed int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_gt (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpgt_ps (a1, a2));
}

/* vec_all_in */

inline int
vec_all_in (vector float a1, vector float a2)
{
  return all_true_4f((vector float) __IA32_vcmpbfp (a1, a2));
}

/* vec_all_le */

inline int
vec_all_le (vector signed char a1, vector unsigned char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_le (vector unsigned char a1, vector signed char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_le (vector unsigned char a1, vector unsigned char a2)
{
  return all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_le (vector signed char a1, vector signed char a2)
{
  return all_false_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_le (vector signed short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_le (vector unsigned short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_le (vector unsigned short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_le (vector signed short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_le (vector signed int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_le (vector unsigned int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_le (vector unsigned int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_le (vector signed int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_le (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmple_ps (a1, a2));
}

/* vec_all_lt */

inline int
vec_all_lt (vector signed char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_lt (vector unsigned char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_lt (vector unsigned char a1, vector unsigned char a2)
{
  return all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_lt (vector signed char a1, vector signed char a2)
{
  return all_true_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_lt (vector signed short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_lt (vector unsigned short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_lt (vector unsigned short a1, vector unsigned short a2)
{
  return all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_lt (vector signed short a1, vector signed short a2)
{
  return all_true_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_lt (vector signed int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_lt (vector unsigned int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_lt (vector unsigned int a1, vector unsigned int a2)
{
  return all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_lt (vector signed int a1, vector signed int a2)
{
  return all_true_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_lt (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmplt_ps (a1, a2));
}

/* vec_all_nan */

inline int
vec_all_nan (vector float a1)
{
  return all_true_4f((vector float) _mm_cmpneq_ps (a1, a1));
}

/* vec_all_ne */

inline int
vec_all_ne (vector signed char a1, vector unsigned char a2)
{
  return all_false_16i(_mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ne (vector signed char a1, vector signed char a2)
{
  return all_false_16i(_mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ne (vector unsigned char a1, vector signed char a2)
{
  return all_false_16i(_mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ne (vector unsigned char a1, vector unsigned char a2)
{
  return all_false_16i(_mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_all_ne (vector signed short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ne (vector signed short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ne (vector unsigned short a1, vector signed short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ne (vector unsigned short a1, vector unsigned short a2)
{
  return all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_all_ne (vector signed int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ne (vector signed int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ne (vector unsigned int a1, vector signed int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ne (vector unsigned int a1, vector unsigned int a2)
{
  return all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_all_ne (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpneq_ps (a1, a2));
}

/* vec_all_nge */

inline int
vec_all_nge (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpnge_ps (a1, a2));
}

/* vec_all_ngt */

inline int
vec_all_ngt (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpngt_ps (a1, a2));
}

/* vec_all_nle */

inline int
vec_all_nle (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpnle_ps (a1, a2));
}

/* vec_all_nlt */

inline int
vec_all_nlt (vector float a1, vector float a2)
{
  return all_true_4f((vector float) _mm_cmpnlt_ps (a2, a1));
}

/* vec_all_numeric */

inline int
vec_all_numeric (vector float a1)
{
  return all_true_4f((vector float) _mm_cmpeq_ps (a1, a1));
}

/* vec_any_eq */

inline int
vec_any_eq (vector signed char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_eq (vector signed char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_eq (vector unsigned char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_eq (vector unsigned char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_eq (vector signed short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_eq (vector signed short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_eq (vector unsigned short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_eq (vector unsigned short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_eq (vector signed int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_eq (vector signed int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_eq (vector unsigned int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_eq (vector unsigned int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_eq (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpeq_ps (a1, a2));
}

/* vec_any_ge */

inline int
vec_any_ge (vector signed char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ge (vector unsigned char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ge (vector unsigned char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ge (vector signed char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ge (vector signed short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ge (vector unsigned short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ge (vector unsigned short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ge (vector signed short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ge (vector signed int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ge (vector unsigned int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ge (vector unsigned int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ge (vector signed int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ge (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpge_ps (a1, a2));
}

/* vec_any_gt */

inline int
vec_any_gt (vector signed char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_gt (vector unsigned char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_gt (vector unsigned char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_gt (vector signed char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_gt (vector signed short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_gt (vector unsigned short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_gt (vector unsigned short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_gt (vector signed short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_gt (vector signed int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_gt (vector unsigned int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_gt (vector unsigned int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_gt (vector signed int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_gt (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpgt_ps (a1, a2));
}

/* vec_any_le */

inline int
vec_any_le (vector signed char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_le (vector unsigned char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_le (vector unsigned char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_le (vector signed char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_le (vector signed short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_le (vector unsigned short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_le (vector unsigned short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_le (vector signed short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_le (vector signed int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_le (vector unsigned int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_le (vector unsigned int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_le (vector signed int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_le (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmple_ps (a1, a2));
}

/* vec_any_lt */

inline int
vec_any_lt (vector signed char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_lt (vector unsigned char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_lt (vector unsigned char a1, vector unsigned char a2)
{
  return any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_lt (vector signed char a1, vector signed char a2)
{
  return any_true_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_lt (vector signed short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_lt (vector unsigned short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_lt (vector unsigned short a1, vector unsigned short a2)
{
  return any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_lt (vector signed short a1, vector signed short a2)
{
  return any_true_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_lt (vector signed int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_lt (vector unsigned int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_lt (vector unsigned int a1, vector unsigned int a2)
{
  return any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_lt (vector signed int a1, vector signed int a2)
{
  return any_true_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_lt (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmplt_ps (a1, a2));
}

/* vec_any_nan */

inline int
vec_any_nan (vector float a1)
{
  return any_true_4f((vector float) _mm_cmpneq_ps (a1, a1));
}

/* vec_any_ne */

inline int
vec_any_ne (vector signed char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ne (vector signed char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ne (vector unsigned char a1, vector signed char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ne (vector unsigned char a1, vector unsigned char a2)
{
  return any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) a1, (vector signed char) a2));
}

inline int
vec_any_ne (vector signed short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ne (vector signed short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ne (vector unsigned short a1, vector signed short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ne (vector unsigned short a1, vector unsigned short a2)
{
  return any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) a1, (vector signed short) a2));
}

inline int
vec_any_ne (vector signed int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ne (vector signed int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ne (vector unsigned int a1, vector signed int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ne (vector unsigned int a1, vector unsigned int a2)
{
  return any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) a1, (vector signed int) a2));
}

inline int
vec_any_ne (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpneq_ps (a1, a2));
}

/* vec_any_nge */

inline int
vec_any_nge (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpnge_ps (a1, a2));
}

/* vec_any_ngt */

inline int
vec_any_ngt (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpngt_ps (a1, a2));
}

/* vec_any_nle */

inline int
vec_any_nle (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpnle_ps (a1, a2));
}

/* vec_any_nlt */

inline int
vec_any_nlt (vector float a1, vector float a2)
{
  return any_true_4f((vector float) _mm_cmpnlt_ps (a1, a2));
}

/* vec_any_numeric */

inline int
vec_any_numeric (vector float a1)
{
  return any_true_4f((vector float) _mm_cmpeq_ps (a1, a1));
}

/* vec_any_out */

inline int
vec_any_out (vector float a1, vector float a2)
{
  return any_true_4f((vector float) __IA32_vcmpbfp (a1, a2));
}

/* vec_step */

template<typename _Tp>
struct __vec_step_help
{
  // All proper vector types will specialize _S_elem.
};

template<>
struct __vec_step_help<vector signed short>
{
  static const int _S_elem = 8;
};

template<>
struct __vec_step_help<vector unsigned short>
{
  static const int _S_elem = 8;
};

template<>
struct __vec_step_help<vector signed int>
{
  static const int _S_elem = 4;
};

template<>
struct __vec_step_help<vector unsigned int>
{
  static const int _S_elem = 4;
};

template<>
struct __vec_step_help<vector unsigned char>
{
  static const int _S_elem = 16;
};

template<>
struct __vec_step_help<vector signed char>
{
  static const int _S_elem = 16;
};

template<>
struct __vec_step_help<vector float>
{
  static const int _S_elem = 4;
};

#define vec_step(t)  __vec_step_help<typeof(t)>::_S_elem

/**************************************************************************/
#else /* not C++ */

#define vec_abs(a) \
  __ch (__un_args_eq (vector signed char, (a)), \
        ((vector signed char) __IA32_abs_v16qi ((vector signed char) (a))), \
  __ch (__un_args_eq (vector signed short, (a)), \
        ((vector signed short) __IA32_abs_v8hi ((vector signed short) (a))), \
  __ch (__un_args_eq (vector signed int, (a)), \
        ((vector signed int) __IA32_abs_v4si ((vector signed int) (a))), \
  __ch (__un_args_eq (vector float, (a)), \
        ((vector float) __IA32_abs_v4sf ((vector float) (a))), \
  __ERROR_INVALID_ARGUMENT ()))))

#define vec_abss(a) \
  __ch (__un_args_eq (vector signed char, (a)), \
        ((vector signed char) __IA32_abss_v16qi ((vector signed char) (a))), \
  __ch (__un_args_eq (vector signed short, (a)), \
        ((vector signed short) __IA32_abss_v8hi ((vector signed short) (a))), \
  __ch (__un_args_eq (vector signed int, (a)), \
        ((vector signed int) __IA32_abss_v4si ((vector signed int) (a))), \
  __ERROR_INVALID_ARGUMENT ())))

#define vec_step(t) \
  __ch (__builtin_types_compatible_p (typeof (t), vector signed int), 4,      \
  __ch (__builtin_types_compatible_p (typeof (t), vector unsigned int), 4,    \
  __ch (__builtin_types_compatible_p (typeof (t), vector signed short), 8,    \
  __ch (__builtin_types_compatible_p (typeof (t), vector unsigned short), 8,  \
  __ch (__builtin_types_compatible_p (typeof (t), vector signed char), 16,    \
  __ch (__builtin_types_compatible_p (typeof (t), vector unsigned char), 16,  \
  __ch (__builtin_types_compatible_p (typeof (t), vector float), 4,           \
  __ERROR_INVALID_ARGUMENT ())))))))

#define vec_vaddubm(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))))

#define vec_vadduhm(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))))

#define vec_vadduwm(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))))

#define vec_vaddfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_add_ps ((vector float) (a1), (vector float) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_add(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_add_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_add_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_add_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_add_ps ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_addc(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vaddcuw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())

#define vec_adds(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_adds_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_adds_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vaddsws ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))


#define vec_vaddsws(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vaddsws ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vadduws(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vadduws ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())))

#define vec_vaddshs(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_adds_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vadduhs(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_adds_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())))

#define vec_vaddsbs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_adds_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vaddubs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_adds_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())))

#define vec_and(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector signed int, (a2)), \
      ((vector float) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector float, (a2)), \
      ((vector float) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_and_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))))

#define vec_andc(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector unsigned int, (a2)), \
      ((vector float) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector float, (a2)), \
      ((vector float) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector float, (a2)), \
      ((vector float) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector signed int, (a2)), \
      ((vector float) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector signed char) reversed_andnot_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))))))))))))

#define vec_avg(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_avg_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vavgsb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_avg_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vavgsh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vavguw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vavgsw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))

#define vec_vavgsw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vavgsw ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vavguw(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vavguw ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vavgsh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vavgsh ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vavguh(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_avg_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vavgsb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vavgsb ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vavgub(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_avg_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_ceil(a1) __IA32_vrfip ((a1))

#define vec_cmpb(a1, a2) __IA32_vcmpbfp ((a1), (a2))

#define vec_cmpeq(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector signed int) _mm_cmpeq_ps ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_vcmpeqfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector signed int) _mm_cmpeq_ps ((vector float) (a1), (vector float) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpequw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))

#define vec_vcmpequh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))

#define vec_vcmpequb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))

#define vec_cmpge(a1, a2) (vector signed int) _mm_cmpge_ps ((a1), (a2))

#define vec_cmpgt(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector signed int) _mm_cmpgt_ps ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_vcmpgtfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector signed int) _mm_cmpgt_ps ((vector float) (a1), (vector float) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtsw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtuw(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtsh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtuh(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtsb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_vcmpgtub(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
  __ERROR_INVALID_ARGUMENT ())

#define vec_cmple(a1, a2) _mm_cmpge_ps ((a1), (a2))

#define vec_cmplt(a2, a1) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector signed int) _mm_cmpgt_ps ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_ctf(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), int, (a2)), \
      ((vector float) __IA32_vcfux ((vector signed int) (a1), (const char) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), int, (a2)), \
      ((vector float) __IA32_vcfsx ((vector signed int) (a1), (const char) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), unsigned int, (a2)), \
      ((vector float) __IA32_vcfux ((vector signed int) (a1), (const char) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), unsigned int, (a2)), \
      ((vector float) __IA32_vcfsx ((vector signed int) (a1), (const char) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vcfsx(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), int, (a2)), \
      ((vector float) __IA32_vcfsx ((vector signed int) (a1), (const char) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), unsigned int, (a2)), \
      ((vector float) __IA32_vcfsx ((vector signed int) (a1), (const char) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))

#define vec_vcfux(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), int, (a2)), \
      ((vector float) __IA32_vcfux ((vector signed int) (a1), (const char) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), unsigned int, (a2)), \
      ((vector float) __IA32_vcfux ((vector signed int) (a1), (const char) (a2))), \
  __ERROR_INVALID_ARGUMENT ()))

#define vec_cts(a1, a2) __IA32_vctsxs ((a1), (a2))

#define vec_ctu(a1, a2) (vector unsigned int) __IA32_vctuxs ((a1), (a2))

#define vec_expte(a1) __IA32_vexptefp (a1)

#define vec_floor(a1) __IA32_vrfim (a1)

#define vec_ld(a, b) \
__ch (__un_args_eq (vector unsigned char, *(b)), \
      ((vector unsigned char) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (unsigned char, *(b)), \
      ((vector unsigned char) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector signed char, *(b)), \
      ((vector signed char) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (signed char, *(b)), \
      ((vector signed char) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector unsigned short, *(b)), \
      ((vector unsigned short) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (unsigned short, *(b)), \
      ((vector unsigned short) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector signed short, *(b)), \
      ((vector signed short) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (signed short, *(b)), \
      ((vector signed short) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector unsigned int, *(b)), \
      ((vector unsigned int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (unsigned int, *(b)), \
      ((vector unsigned int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (unsigned long, *(b)), \
      ((vector unsigned int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector signed int, *(b)), \
      ((vector signed int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (signed int, *(b)), \
      ((vector signed int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (signed long, *(b)), \
      ((vector signed int) _mm_load_si128(V4SI_ADDRESS((a),(b)))), \
__ch (__un_args_eq (vector float, *(b)), \
      ((vector float) _mm_load_ps(V4SF_ADDRESS((a),(b)))), \
__ch (__un_args_eq (float, *(b)), \
      ((vector float) _mm_load_ps(V4SF_ADDRESS((a),(b)))), \
__ERROR_INVALID_ARGUMENT ()))))))))))))))))

#define vec_lde(a, b) \
__ch (__un_args_eq (unsigned char, *(b)), \
      ((vector unsigned char) __IA32_lveub ((a), (unsigned char *)(b))), \
__ch (__un_args_eq (signed char, *(b)), \
      ((vector signed char) __IA32_lvesb ((a), (signed char *)(b))), \
__ch (__un_args_eq (unsigned short, *(b)), \
      ((vector unsigned short) __IA32_lveuh ((a), (unsigned short *)(b))), \
__ch (__un_args_eq (signed short, *(b)), \
      ((vector signed short) __IA32_lvesh ((a), (signed short *)(b))), \
__ch (__un_args_eq (unsigned long, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (signed long, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (unsigned int, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (signed int, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (float, *(b)), \
      ((vector float) __IA32_lvef ((a), (float *)(b))), \
__ch (__un_args_eq (vector unsigned char, *(b)), \
      ((vector unsigned char) __IA32_lveub ((a), (unsigned char *)(b))), \
__ch (__un_args_eq (vector signed char, *(b)), \
      ((vector signed char) __IA32_lvesb ((a), (signed char *)(b))), \
__ch (__un_args_eq (vector unsigned short, *(b)), \
      ((vector unsigned short) __IA32_lveuh ((a), (unsigned short *)(b))), \
__ch (__un_args_eq (vector signed short, *(b)), \
      ((vector signed short) __IA32_lvesh ((a), (signed short *)(b))), \
__ch (__un_args_eq (vector unsigned long, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (vector signed long, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (vector unsigned int, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (vector signed int, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (vector float, *(b)), \
      ((vector float) __IA32_lvef ((a), (float *)(b))), \
      __ERROR_INVALID_ARGUMENT ()))))))))))))))))))

#define vec_lvewx(a, b) \
__ch (__un_args_eq (unsigned int, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (signed int, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (unsigned long, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (signed long, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (float, *(b)), \
      ((vector float) __IA32_lvef ((a), (float *)(b))), \
__ch (__un_args_eq (vector unsigned int, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (vector signed int, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (vector unsigned long, *(b)), \
      ((vector unsigned int) __IA32_lveuw ((a), (unsigned int *)(b))), \
__ch (__un_args_eq (vector signed long, *(b)), \
      ((vector signed int) __IA32_lvesw ((a), (signed int *)(b))), \
__ch (__un_args_eq (vector float, *(b)), \
      ((vector float) __IA32_lvef ((a), (float *)(b))), \
__ERROR_INVALID_ARGUMENT ()))))))))))

#define vec_lvehx(a, b) \
__ch (__un_args_eq (unsigned short, *(b)), \
      ((vector unsigned short) __IA32_lveuh ((a), (unsigned short *)(b))), \
__ch (__un_args_eq (signed short, *(b)), \
      ((vector signed short) __IA32_lvesh ((a), (signed short *)(b))), \
__ch (__un_args_eq (vector unsigned short, *(b)), \
      ((vector unsigned short) __IA32_lveuh ((a), (unsigned short *)(b))), \
__ch (__un_args_eq (vector signed short, *(b)), \
      ((vector signed short) __IA32_lvesh ((a), (signed short *)(b))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_lvebx(a, b) \
__ch (__un_args_eq (unsigned char, *(b)), \
      ((vector unsigned char) __IA32_lveub ((a), (unsigned char *)(b))), \
__ch (__un_args_eq (signed char, *(b)), \
      ((vector signed char) __IA32_lvesb ((a), (signed char *)(b))), \
__ch (__un_args_eq (vector unsigned char, *(b)), \
      ((vector unsigned char) __IA32_lveub ((a), (unsigned char *)(b))), \
__ch (__un_args_eq (vector signed char, *(b)), \
      ((vector signed char) __IA32_lvesb ((a), (signed char *)(b))), \
__ERROR_INVALID_ARGUMENT ()))))


#define vec_loge(a1) __IA32_vlogefp (a1)

#define vec_lvsl(a1, a2) \
__ch (__un_args_eq (unsigned char, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed char, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned short, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed short, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned int, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed int, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned long, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed long, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ch (__un_args_eq (float, *(a2)), \
      ((vector unsigned char) __IA32_lvsl ((a1), (void *) (a2))), \
__ERROR_INVALID_ARGUMENT ())))))))))

#define vec_lvsr(a1, a2) \
__ch (__un_args_eq (unsigned char, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed char, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned short, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed short, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned int, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed int, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (unsigned long, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (signed long, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ch (__un_args_eq (float, *(a2)), \
      ((vector unsigned char) __IA32_lvsr ((a1), (void *) (a2))), \
__ERROR_INVALID_ARGUMENT ())))))))))

#define vec_madd(a1, a2, a3) _mm_add_ps(_mm_mul_ps((a1), (a2)), (a3))

#define vec_madds(a1, a2, a3) __IA32_vmhaddshs ((a1), (a2), (a3))

#define vec_max(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmaxsb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_max_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmaxsw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmaxfp ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_vmaxfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmaxfp ((vector float) (a1), (vector float) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmaxsw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmaxsw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmaxuw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmaxuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vmaxsh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_max_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmaxuh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmaxuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vmaxsb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmaxsb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmaxub(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_max_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_mergeh(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmrghb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vmrghb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vmrghh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmrghh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmrghf ((vector float) (a1), (vector float) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmrghw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmrghw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_vmrghw(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmrghf ((vector float) (a1), (vector float) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmrghw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmrghw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vmrghh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vmrghh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmrghh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vmrghb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmrghb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vmrghb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_mergel(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmrglf ((vector float) (a1), (vector float) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_vmrglw(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vmrglf ((vector float) (a1), (vector float) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vmrglh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vmrglb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_min(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vminsb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_min_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vminsw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vminfp ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_vminfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vminfp ((vector float) (a1), (vector float) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vminsw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vminsw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vminuw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vminuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vminsh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_min_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vminuh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vminuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vminsb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vminsb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vminub(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_min_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_mladd(a1, a2, a3) \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed short, (a3)), \
      ((vector signed short) __IA32_vmladduhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed short) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector unsigned short, (a2), vector unsigned short, (a3)), \
      ((vector signed short) __IA32_vmladduhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed short) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector signed short, (a2), vector signed short, (a3)), \
      ((vector signed short) __IA32_vmladduhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed short) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned short, (a3)), \
      ((vector unsigned short) __IA32_vmladduhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed short) (a3))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_mradds(a1, a2, a3) __IA32_vmhraddshs ((a1), (a2), (a3))

#define vec_msum(a1, a2, a3) \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumubm ((vector signed char) (a1), (vector signed char) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector unsigned char, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsummbm ((vector signed char) (a1), (vector signed char) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumuhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsumshm ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vmsumshm(a1, a2, a3) \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsumshm ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmsumuhm(a1, a2, a3) \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumuhm ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmsummbm(a1, a2, a3) \
__ch (__tern_args_eq (vector signed char, (a1), vector unsigned char, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsummbm ((vector signed char) (a1), (vector signed char) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmsumubm(a1, a2, a3) \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumubm ((vector signed char) (a1), (vector signed char) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_msums(a1, a2, a3) \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumuhs ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsumshs ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
    __ERROR_INVALID_ARGUMENT ()))

#define vec_vmsumshs(a1, a2, a3) \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vmsumshs ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmsumuhs(a1, a2, a3) \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vmsumuhs ((vector signed short) (a1), (vector signed short) (a2), (vector signed int) (a3))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_mule(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vmuleub ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vmulesb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vmuleuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed int) __IA32_vmulesh ((vector signed short) (a1), (vector signed short) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vmulesh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed int) __IA32_vmulesh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmuleuh(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vmuleuh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmulesb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vmulesb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmuleub(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vmuleub ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_mulo(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vmuloub ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vmulosb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vmulouh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed int) __IA32_vmulosh ((vector signed short) (a1), (vector signed short) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vmulosh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed int) __IA32_vmulosh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmulouh(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vmulouh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmulosb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vmulosb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vmuloub(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vmuloub ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_nmsub(a1, a2, a3) \
__ch (__tern_args_eq (vector float, ((a1)), vector float, ((a2)) , vector float, ((a3))), \
      ((vector float) _mm_sub_ps((vector float) a3, _mm_mul_ps((vector float) a1, (vector float) a2))), \
    __ERROR_INVALID_ARGUMENT ())

#define vec_nor(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vnor ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_or(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_or_ps ((vector float) (a1), (vector float) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector signed int, (a2)), \
      ((vector float) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector float, (a2)), \
      ((vector float) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_or_si128 ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))))

#define vec_pack(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed char) __IA32_vpkuhum ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vpkuhum ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed short) __IA32_vpkuwum ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vpkuwum ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vpkuwum(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed short) __IA32_vpkuwum ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vpkuwum ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vpkuhum(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed char) __IA32_vpkuhum ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vpkuhum ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_packpx(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
  (vector unsigned short) __IA32_vpkpx ((vector signed int) (a1), (vector signed int) (a2)), \
__ERROR_INVALID_ARGUMENT ())

#define vec_packs(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vpkuhus ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed char) __IA32_vpkshss ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vpkuwus ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed short) __IA32_vpkswss ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vpkswss(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed short) __IA32_vpkswss ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vpkuwus(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vpkuwus ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vpkshss(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed char) __IA32_vpkshss ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vpkuhus(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vpkuhus ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_packsu(a1, a2) \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vpkuhus ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector unsigned char) __IA32_vpkshus ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vpkuwus ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector unsigned short) __IA32_vpkswus ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))

#define vec_vpkswus(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector unsigned short) __IA32_vpkswus ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vpkshus(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector unsigned char) __IA32_vpkshus ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_perm(a1, a2, a3) \
__ch (__tern_args_eq (vector float, (a1), vector float, (a2), vector unsigned char, (a3)), \
      ((vector float) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector signed int, (a1), vector signed int, (a2), vector unsigned char, (a3)), \
      ((vector signed int) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector unsigned int, (a1), vector unsigned int, (a2), vector unsigned char, (a3)), \
      ((vector unsigned int) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector unsigned char, (a3)), \
      ((vector signed short) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned char, (a3)), \
      ((vector unsigned short) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector signed char, (a2), vector unsigned char, (a3)), \
      ((vector signed char) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), vector unsigned char, (a3)), \
      ((vector unsigned char) __IA32_vperm_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed char) (a3))), \
    __ERROR_INVALID_ARGUMENT ())))))))

#define vec_re(a1) _mm_rcp_ps ((a1))

#define vec_rl(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vrlb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vrlb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vrlh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vrlh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vrlw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vrlw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))

#define vec_vrlw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vrlw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vrlw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vrlh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vrlh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vrlh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vrlb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vrlb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vrlb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_round(a1) __IA32_vrfin ((a1))

#define vec_rsqrte(a1) _mm_rsqrt_ps ((a1))

#define vec_sel(a1, a2, a3) \
__ch (__tern_args_eq (vector float, (a1), vector float, (a2), vector signed int, (a3)), \
      ((vector float) __IA32_vsel_4sf ((vector float) (a1), (vector float) (a2), (vector float) (a3))), \
__ch (__tern_args_eq (vector float, (a1), vector float, (a2), vector unsigned int, (a3)), \
      ((vector float) __IA32_vsel_4sf ((vector float) (a1), (vector float) (a2), (vector float) (a3))), \
__ch (__tern_args_eq (vector signed int, (a1), vector signed int, (a2), vector signed int, (a3)), \
      ((vector signed int) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed int, (a1), vector signed int, (a2), vector unsigned int, (a3)), \
      ((vector signed int) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned int, (a1), vector unsigned int, (a2), vector signed int, (a3)), \
      ((vector unsigned int) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned int, (a1), vector unsigned int, (a2), vector unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector signed short, (a3)), \
      ((vector signed short) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), vector unsigned short, (a3)), \
      ((vector signed short) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector signed short, (a3)), \
      ((vector unsigned short) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), vector unsigned short, (a3)), \
      ((vector unsigned short) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector signed char, (a2), vector signed char, (a3)), \
      ((vector signed char) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector signed char, (a2), vector unsigned char, (a3)), \
      ((vector signed char) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), vector signed char, (a3)), \
      ((vector unsigned char) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), vector unsigned char, (a3)), \
      ((vector unsigned char) __IA32_vsel_4si ((vector signed int) (a1), (vector signed int) (a2), (vector signed int) (a3))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))

#define vec_sl(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vslb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vslb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vslh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vslh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vslw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vslw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))

#define vec_vslw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vslw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vslw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vslh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vslh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vslh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vslb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vslb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vslb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_sld(a1, a2, a3) \
__ch (__tern_args_eq (vector float, (a1), vector float, (a2), int, (a3)), \
      ((vector float) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector float, (a1), vector float, (a2), unsigned int, (a3)), \
      ((vector float) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed int, (a1), vector signed int, (a2), int, (a3)), \
      ((vector signed int) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed int, (a1), vector signed int, (a2), unsigned int, (a3)), \
      ((vector signed int) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned int, (a1), vector unsigned int, (a2), int, (a3)), \
      ((vector unsigned int) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned int, (a1), vector unsigned int, (a2), unsigned int, (a3)), \
      ((vector unsigned int) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), int, (a3)), \
      ((vector signed short) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed short, (a1), vector signed short, (a2), unsigned int, (a3)), \
      ((vector signed short) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), int, (a3)), \
      ((vector unsigned short) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned short, (a1), vector unsigned short, (a2), unsigned int, (a3)), \
      ((vector unsigned short) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector signed char, (a2), int, (a3)), \
      ((vector signed char) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector signed char, (a1), vector signed char, (a2), unsigned int, (a3)), \
      ((vector signed char) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), int, (a3)), \
      ((vector unsigned char) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
__ch (__tern_args_eq (vector unsigned char, (a1), vector unsigned char, (a2), unsigned int, (a3)), \
      ((vector unsigned char) __IA32_vsldoi_4si ((vector signed int) (a1), (vector signed int) (a2), (const char) (a3))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))

#define vec_sll(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned short, (a2)), \
      ((vector signed int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned char, (a2)), \
      ((vector signed int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned char, (a2)), \
      ((vector unsigned int) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned int, (a2)), \
      ((vector signed short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned char, (a2)), \
      ((vector signed short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned int, (a2)), \
      ((vector signed char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned short, (a2)), \
      ((vector signed char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned int, (a2)), \
      ((vector unsigned char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsl ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))))))

#define vec_slo(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector signed char, (a2)), \
      ((vector float) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector unsigned char, (a2)), \
      ((vector float) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed char, (a2)), \
      ((vector signed int) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned char, (a2)), \
      ((vector signed int) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed char, (a2)), \
      ((vector unsigned int) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned char, (a2)), \
      ((vector unsigned int) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned char, (a2)), \
      ((vector signed short) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed char, (a2)), \
      ((vector unsigned short) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vslo ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))

#define vec_splat(a1, a2) \
__ch (__bin_args_eq (vector signed char, ((a1)), int, ((a2))), \
      ((vector signed char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed char, ((a1)), unsigned int, ((a2))), \
      ((vector signed char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), int, ((a2))), \
      ((vector unsigned char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), int, ((a2))), \
      ((vector signed short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), unsigned int, ((a2))), \
      ((vector signed short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), int, ((a2))), \
      ((vector unsigned short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector float, ((a1)), int, ((a2))), \
      ((vector float) __IA32_vspltf ((vector float) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector float, ((a1)), unsigned int, ((a2))), \
      ((vector float) __IA32_vspltf ((vector float) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), int, ((a2))), \
      ((vector signed int) __IA32_vspltw ((vector signed int) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), unsigned int, ((a2))), \
      ((vector signed int) __IA32_vspltw ((vector signed int) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), int, ((a2))), \
      ((vector unsigned int) __IA32_vspltw ((vector signed int) (a1), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned int) __IA32_vspltw ((vector signed int) (a1), (const char) ((a2)))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))

#define vec_vspltw(a1, a2) \
__ch (__bin_args_eq (vector float, ((a1)), int, ((a2))), \
      ((vector float) __IA32_vspltf ((vector float) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector float, ((a1)), unsigned int, ((a2))), \
      ((vector float) __IA32_vspltf ((vector float) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), int, ((a2))), \
      ((vector signed int) __IA32_vspltw ((vector signed int) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), unsigned int, ((a2))), \
      ((vector signed int) __IA32_vspltw ((vector signed int) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), int, ((a2))), \
      ((vector unsigned int) __IA32_vspltw ((vector signed int) (a1), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned int) __IA32_vspltw ((vector signed int) (a1), (const char) ((a2)))), \
__ERROR_INVALID_ARGUMENT ()))))))

#define vec_vsplth(a1, a2) \
__ch (__bin_args_eq (vector signed short, ((a1)), int, ((a2))), \
      ((vector signed short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), unsigned int, ((a2))), \
      ((vector signed short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), int, ((a2))), \
      ((vector unsigned short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned short) __IA32_vsplth ((vector signed short) ((a1)), (const char) ((a2)))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_vspltb(a1, a2) \
__ch (__bin_args_eq (vector signed char, ((a1)), int, ((a2))), \
      ((vector signed char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector signed char, ((a1)), unsigned int, ((a2))), \
      ((vector signed char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), int, ((a2))), \
      ((vector unsigned char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), unsigned int, ((a2))), \
      ((vector unsigned char) __IA32_vspltb ((vector signed char) ((a1)), (const char) ((a2)))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_splat_s8(a1) ((vector signed char) _mm_set1_epi8(a1))

#define vec_splat_s16(a1) ((vector signed short) _mm_set1_epi16(a1))

#define vec_splat_s32(a1) ((vector signed int) _mm_set1_epi32(a1))

#define vec_splat_u8(a1) ((vector unsigned char) _mm_set1_epi8(a1 & 0x1F))

#define vec_splat_u16(a1) ((vector unsigned short) _mm_set1_epi16(a1 & 0x1F))

#define vec_splat_u32(a1) ((vector unsigned int) _mm_set1_epi32(a1 & 0x1F))

#define vec_sr(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsrb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsrb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsrh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsrh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsrw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsrw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))

#define vec_vsrw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsrw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsrw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vsrh(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsrh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsrh ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vsrb(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsrb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsrb ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_sra(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsrab ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsrab ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsrah ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsrah ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsraw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsraw ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))

#define vec_vsraw(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsraw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsraw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vsrah(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsrah ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsrah ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_vsrab(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsrab ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsrab ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_srl(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector signed int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned short, (a2)), \
      ((vector signed int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned char, (a2)), \
      ((vector signed int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned short, (a2)), \
      ((vector unsigned int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned char, (a2)), \
      ((vector unsigned int) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned int, (a2)), \
      ((vector signed short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector signed short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned char, (a2)), \
      ((vector signed short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned int, (a2)), \
      ((vector unsigned short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned int, (a2)), \
      ((vector signed char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned short, (a2)), \
      ((vector signed char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned int, (a2)), \
      ((vector unsigned char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned short, (a2)), \
      ((vector unsigned char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsr ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))))))

#define vec_sro(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector signed char, (a2)), \
      ((vector float) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector unsigned char, (a2)), \
      ((vector float) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed char, (a2)), \
      ((vector signed int) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned char, (a2)), \
      ((vector signed int) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed char, (a2)), \
      ((vector unsigned int) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned char, (a2)), \
      ((vector unsigned int) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed char, (a2)), \
      ((vector signed short) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned char, (a2)), \
      ((vector signed short) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed char, (a2)), \
      ((vector unsigned short) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned char, (a2)), \
      ((vector unsigned short) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector signed char) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) __IA32_vsro ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))))

#define vec_st(a, b, c) \
__ch (__un_args_eq (vector unsigned char, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector signed char, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector unsigned short, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector signed short, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector unsigned int, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector signed int, (a)), \
      (_mm_store_si128(V4SI_ADDRESS(b, (void *)c), (vector signed int) a)), \
__ch (__un_args_eq (vector float, (a)), \
      (_mm_store_ps(V4SF_ADDRESS(b, (void *)c), (vector float) a)), \
__ERROR_INVALID_ARGUMENT ())))))))     

#define vec_ste(a, b, c) \
__ch (__un_args_eq (vector unsigned char, (a)), \
      __IA32_stveub ((vector unsigned char) (a), (b), (unsigned char *)(c)), \
__ch (__un_args_eq (vector signed char, (a)), \
      __IA32_stvesb ((vector signed char) (a), (b), (signed char *)(c)), \
__ch (__un_args_eq (vector unsigned short, (a)), \
     __IA32_stveuh ((vector unsigned short) (a), (b), (unsigned short *)(c)), \
__ch (__un_args_eq (vector signed short, (a)), \
     __IA32_stvesh ((vector signed short) (a), (b), (signed short *)(c)), \
__ch (__un_args_eq (vector unsigned int, (a)), \
     __IA32_stveuw ((vector unsigned int) (a), (b), (unsigned int *)(c)), \
__ch (__un_args_eq (vector signed int, (a)), \
     __IA32_stvesw ((vector signed int) (a), (b), (signed int *)(c)), \
__ch (__un_args_eq (vector float, (a)), \
     __IA32_stvef ((vector float) (a), (b), (float *)(c)), \
     __ERROR_INVALID_ARGUMENT ())))))))

#define vec_stvewx(a, b, c) \
__ch (__un_args_eq (vector unsigned int, (a)), \
     __IA32_stveuw ((vector unsigned int) (a), (b), (unsigned int *)(c)), \
__ch (__un_args_eq (vector signed int, (a)), \
     __IA32_stvesw ((vector signed int) (a), (b), (signed int *)(c)), \
__ch (__un_args_eq (vector float, (a)), \
     __IA32_stvef ((vector float) (a), (b), (float *)(c)), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_stvehx(a, b, c) \
__ch (__un_args_eq (vector unsigned short, (a)), \
     __IA32_stveuh ((vector unsigned short) (a), (b), (unsigned short *)(c)), \
__ch (__un_args_eq (vector signed short, (a)), \
     __IA32_stvesh ((vector signed short) (a), (b), (signed short *)(c)), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_stvebx(a, b, c) \
__ch (__un_args_eq (vector unsigned char, (a)), \
      __IA32_stveub ((vector unsigned char) (a), (b), (unsigned char *)(c)), \
__ch (__un_args_eq (vector signed char, (a)), \
      __IA32_stvesb ((vector signed char) (a), (b), (signed char *)(c)), \
__ERROR_INVALID_ARGUMENT ()))

#define vec_sub(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_sub_ps ((vector float) (a1), (vector float) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_vsubfp(a1, a2) \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) _mm_sub_ps ((vector float) (a1), (vector float) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsubuwm(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) _mm_sub_epi32 ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_vsubuhm(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_sub_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_vsububm(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_sub_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ()))))

#define vec_subc(a1, a2) \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
  ((vector unsigned int) __IA32_vsubcuw ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_subs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_subs_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_subs_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsubsws ((vector signed int) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ()))))))))))))

#define vec_vsubsws(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsubsws ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsubuws(a1, a2) \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsubuws ((vector signed int) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vsubshs(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) _mm_subs_epi16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsubuhs(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) _mm_subs_epu16 ((vector signed short) (a1), (vector signed short) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_vsubsbs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) _mm_subs_epi8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsububs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) _mm_subs_epu8 ((vector signed char) (a1), (vector signed char) (a2))), \
__ERROR_INVALID_ARGUMENT ())))

#define vec_sum4s(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsum4ubs ((vector signed char) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsum4sbs ((vector signed char) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsum4shs ((vector signed short) (a1), (vector signed int) (a2))), \
    __ERROR_INVALID_ARGUMENT ())))

#define vec_vsum4shs(a1, a2) \
__ch (__bin_args_eq (vector signed short, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsum4shs ((vector signed short) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsum4sbs(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed int, (a2)), \
      ((vector signed int) __IA32_vsum4sbs ((vector signed char) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vsum4ubs(a1, a2) \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) __IA32_vsum4ubs ((vector signed char) (a1), (vector signed int) (a2))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_sum2s(a1, a2) __IA32_vsum2sws ((a1), (a2))

#define vec_sums(a1, a2) __IA32_vsumsws ((a1), (a2))

#define vec_trunc(a1) __IA32_vrfiz ((a1))

#define vec_unpackh(a1) \
__ch (__un_args_eq (vector signed char, (a1)), \
      ((vector signed short) __IA32_vupkhsb ((vector signed char) SWAP32(a1))), \
__ch (__un_args_eq (vector unsigned short, (a1)), \
      ((vector unsigned int) __IA32_vupkhpx ((vector signed short) (a1))), \
__ch (__un_args_eq (vector signed short, (a1)), \
      ((vector signed int) __IA32_vupkhsh ((vector signed short) SWAP32(a1))), \
    __ERROR_INVALID_ARGUMENT ())))

#define vec_vupkhsh(a1) \
__ch (__un_args_eq (vector signed short, (a1)), \
      ((vector signed int) __IA32_vupkhsh ((vector signed short) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vupkhpx(a1) \
__ch (__un_args_eq (vector unsigned short, (a1)), \
      ((vector unsigned int) __IA32_vupkhpx ((vector signed short) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vupkhsb(a1) \
__ch (__un_args_eq (vector signed char, (a1)), \
      ((vector signed short) __IA32_vupkhsb ((vector signed char) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_unpackl(a1) \
__ch (__un_args_eq (vector signed char, (a1)), \
      ((vector signed short) __IA32_vupklsb ((vector signed char) SWAP32(a1))), \
__ch (__un_args_eq (vector unsigned short, (a1)), \
      ((vector unsigned int) __IA32_vupklpx ((vector signed short) (a1))), \
__ch (__un_args_eq (vector signed short, (a1)), \
      ((vector signed int) __IA32_vupklsh ((vector signed short) SWAP32(a1))), \
    __ERROR_INVALID_ARGUMENT ())))

#define vec_vupklsh(a1) \
__ch (__un_args_eq (vector signed short, (a1)), \
      ((vector signed int) __IA32_vupklsh ((vector signed short) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vupklpx(a1) \
__ch (__un_args_eq (vector unsigned short, (a1)), \
      ((vector unsigned int) __IA32_vupklpx ((vector signed short) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_vupklsb(a1) \
__ch (__un_args_eq (vector signed char, (a1)), \
      ((vector signed short) __IA32_vupklsb ((vector signed char) (a1))), \
__ERROR_INVALID_ARGUMENT ())

#define vec_xor(a1, a2) \
__ch (__bin_args_eq (vector float, ((a1)), vector float, ((a2))), \
      ((vector float) _mm_xor_ps ((vector float) ((a1)), (vector float) ((a2)))), \
__ch (__bin_args_eq (vector float, ((a1)), vector unsigned int, ((a2))), \
      ((vector float) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), vector float, ((a2))), \
      ((vector float) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), vector float, ((a2))), \
      ((vector float) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector float, ((a1)), vector signed int, ((a2))), \
      ((vector float) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), vector signed int, ((a2))), \
      ((vector signed int) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed int, ((a1)), vector unsigned int, ((a2))), \
      ((vector unsigned int) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), vector signed int, ((a2))), \
      ((vector unsigned int) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned int, ((a1)), vector unsigned int, ((a2))), \
      ((vector unsigned int) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), vector unsigned short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), vector unsigned short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), vector signed short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), vector unsigned short, ((a2))), \
      ((vector signed short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), vector signed short, ((a2))), \
      ((vector signed short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed short, ((a1)), vector unsigned short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), vector signed short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned short, ((a1)), vector unsigned short, ((a2))), \
      ((vector unsigned short) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), vector unsigned char, ((a2))), \
      ((vector unsigned char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed char, ((a1)), vector unsigned char, ((a2))), \
      ((vector unsigned char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed char, ((a1)), vector signed char, ((a2))), \
      ((vector signed char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), vector unsigned char, ((a2))), \
      ((vector signed char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector signed char, ((a1)), vector unsigned char, ((a2))), \
      ((vector signed char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
__ch (__bin_args_eq (vector unsigned char, ((a1)), vector signed char, ((a2))), \
      ((vector unsigned char) _mm_xor_si128 ((vector signed int) ((a1)), (vector signed int) ((a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))))))))))))

/* Predicates.  */

#define vec_all_eq(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmpeq_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_ge(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_false_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmpge_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_gt(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmpgt_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_in(a1, a2) (all_true_4f((vector float) __IA32_vcmpbfp ((a1), (a2))))

#define vec_all_le(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_false_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmple_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_lt(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_true_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_true_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_true_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmplt_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_nan(a1) (all_true_4f((vector float) _mm_cmpneq_ps ((a1), (a1))))

#define vec_all_ne(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (all_false_16i(_mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (all_false_16i(_mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (all_false_16i(_mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (all_false_16i(_mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (all_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (all_false_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (all_true_4f((vector float) _mm_cmpneq_ps ( (vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_all_nge(a1, a2) all_true_4f((vector float) _mm_cmpnge_ps ((a1), (a2)))

#define vec_all_ngt(a1, a2) all_true_4f((vector float) _mm_cmpngt_ps ((a1), (a2)))

#define vec_all_nle(a1, a2) all_true_4f((vector float) _mm_cmpnle_ps ((a1), (a2)))

#define vec_all_nlt(a1, a2) all_true_4f((vector float) _mm_cmpnlt_ps ((a1), (a2)))

#define vec_all_numeric(a1) all_true_4f((vector float) _mm_cmpeq_ps ((a1), (a1)))

#define vec_any_eq(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmpeq_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmpeq_ps ( (vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_ge(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmpge_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_gt(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmpgt_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_le(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) __IA32_cmpgt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmpgt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) __IA32_cmpgt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmpgt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) __IA32_cmpgt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmpgt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmple_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_lt(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_true_16i((vector unsigned char) __IA32_cmplt_epu8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_true_16i((vector unsigned char) _mm_cmplt_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_true_8i((vector unsigned short) __IA32_cmplt_epu16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_true_8i((vector unsigned short) _mm_cmplt_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_true_4i((vector unsigned int) __IA32_cmplt_epu32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_true_4i((vector unsigned int) _mm_cmplt_epi32 ((vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmplt_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_nan(a1) (any_true_4f((vector float) _mm_cmpneq_ps ((a1), (a1))))

#define vec_any_ne(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector signed char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      (any_false_16i((vector unsigned char) _mm_cmpeq_epi8 ((vector signed char) (a1), (vector signed char) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector signed short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      (any_false_8i((vector unsigned short) _mm_cmpeq_epi16 ((vector signed short) (a1), (vector signed short) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ( (vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ( (vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector signed int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ( (vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      (any_false_4i((vector unsigned int) _mm_cmpeq_epi32 ( (vector signed int) (a1), (vector signed int) (a2)))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      (any_true_4f((vector float) _mm_cmpneq_ps ((vector float) (a1), (vector float) (a2)))), \
    __ERROR_INVALID_ARGUMENT ())))))))))))))

#define vec_any_nge(a1, a2) any_true_4f((vector float) _mm_cmpnge_ps ((a1), (a2)))

#define vec_any_ngt(a1, a2) any_true_4f((vector float) _mm_cmpngt_ps ((a1), (a2)))

#define vec_any_nle(a1, a2) any_true_4f((vector float) _mm_cmpnle_ps ((a1), (a2)))

#define vec_any_nlt(a1, a2) any_true_4f((vector float) _mm_cmpnlt_ps ((a1), (a2)))

#define vec_any_numeric(a1) any_true_4f((vector float) _mm_cmpeq_ps ((a1), (a1)))

#define vec_any_out(a1, a2) any_true_4f((vector float) __IA32_vcmpbfp ((a1), (a2)))

/**************************************************************************/
#endif /* __cplusplus */

#endif /* _ALTIVEC_H */
