/* 
   Test program for altivec_extras.h
   On Intel, compile with:
   gcc -I. -flax-vector-conversions -mavx -Wall -W -O  -o test_vec test_vec.c && ./sde -- ./test_vec

or on AltiVec
   /usr/local/gcc/bin/gcc -flax-vector-conversions -maltivec test_vec.c -Wall -O -o test_vec && ./test_vec

 */

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
/* #define MEMORY_ORDER */
#include "altivec2avx.h"
/* #define HEXCHAR */ /* Output vec_print_u8 as hex. See vec_print.h */
#include "vec_print.h"


/* Some PPC functions give the error
   error: invalid parameter combination for AltiVec intrinsic 
   so this option removes them.
*/
#ifndef __ALTIVEC__
#define ALTIVEC_INTRINSIC
#endif

/* -------------------------------------------------------------------------- */
#define DONE
/* Use
#define DONE exit(1)
to stop on an error
*/

#ifdef MEMORY_ORDER

#define CHECK1(f, A, B, R)   \
  B = f(A);		     \
  if (!vec_all_eq(R, B)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print(A);	     \
    printf("gave\n");	     \
    vec_print(B);	     \
    printf(#R" should be\n");\
    vec_print(R);	     \
    printf("\n");	     \
    DONE;		     \
  }


#define CHECK1f(f, A, B, R)   \
  B = f(A);		     \
  if (!vec_all_eq(R, B)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print_f32(A);	     \
    printf("gave\n");	     \
    vec_print_f32(B);	     \
    printf(#R" should be\n");\
    vec_print_f32(R);	     \
    printf("\n");	     \
    DONE;		     \
  }


#define CHECK2(f, A, B, C, R) \
  C = f(A, B);		     \
  if (!vec_all_eq(R, C)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print(A);	     \
    printf("B = ");          \
    vec_print(B);	     \
    printf("gave\n");	     \
    vec_print(C);	     \
    printf(#R" should be\n");\
    vec_print(R);	     \
    printf("\n");	     \
    DONE;		     \
  }


#define CHECK2b(f, A, B, C, R) \
  C = f(A, B);		     \
  if (!vec_all_eq(R, C)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print_int(A);	     \
    printf("B = ");          \
    vec_print_ptr(B);	     \
    printf("gave\n");	     \
    vec_print(C);	     \
    printf(#R" should be\n");\
    vec_print(R);	     \
    printf("\n");	     \
  }


#define CHECK3(f, A, B, C, D, R)\
  D = f(A, B, C);		\
  if (!vec_all_eq(R, D)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print(A);	     \
    printf("B = ");          \
    vec_print(B);	     \
    printf("C = ");          \
    vec_print(C);	     \
    printf("gave\n");	     \
    vec_print(D);	     \
    printf(#R" should be\n");\
    vec_print(R);	     \
    printf("\n");	     \
    DONE;		     \
  }


#define CHECK3f(f, A, B, C, D, R)\
  D = f(A, B, C);		\
  if (!vec_all_eq(R, D)) {   \
    printf(#f" FAILED at line %d\n", __LINE__);   \
    printf("A = ");          \
    vec_print_f32(A);	     \
    printf("B = ");          \
    vec_print_f32(B);	     \
    printf("C = %d\n", C);   \
    printf("gave\n");	     \
    vec_print_f32(D);	     \
    printf(#R" should be\n");\
    vec_print_f32(R);	     \
    printf("\n");	     \
    DONE;		     \
  }


#else


#define CHECK1(f, A, B, R)			  \
  { typeof(A) VA;				  \
    VA = vec_ld(0, &A);				  \
    B = f(VA);					  \
    if (!vec_all_eq(R, B)) {			  \
      printf(#f" FAILED at line %d\n", __LINE__); \
      printf("A = ");				  \
      vec_print(VA);				  \
      printf("gave\n");				  \
      vec_print(B);				  \
      printf(#R" should be\n");			  \
      vec_print(R);				  \
      printf("\n");				  \
      DONE;					  \
    }						  \
  }


#define CHECK1f(f, A, B, R)			  \
  { typeof(A) VA;				  \
    VA = vec_ld(0, &A);				  \
    B = f(VA);					  \
    if (!vec_all_eq(R, B)) {			  \
      printf(#f" FAILED at line %d\n", __LINE__); \
      printf("A = ");				  \
      vec_print_f32(VA);			  \
      printf("gave\n");				  \
      vec_print_f32(B);				  \
      printf(#R" should be\n");			  \
      vec_print_f32(R);				  \
      printf("\n");				  \
      DONE;					  \
    }						  \
  }


#define CHECK2(f, A, B, C, R)			  \
  { typeof(A) VA;				  \
    typeof(B) VB;				  \
    VA = vec_ld(0, &A);				  \
    VB = vec_ld(0, &B);				  \
    C = f(VA, VB);				  \
    if (!vec_all_eq(R, C)) {			  \
      printf(#f" FAILED at line %d\n", __LINE__); \
      printf("A = ");				  \
      vec_print(VA);				  \
      printf("B = ");				  \
      vec_print(VB);				  \
      printf("gave\n");				  \
      vec_print(C);				  \
      printf(#R" should be\n");			  \
      vec_print(R);				  \
      printf("\n");				  \
      DONE;					  \
    }						  \
  }


#define CHECK2b(f, A, B, C, R)				\
  {							\
    C = f(A, B);					\
    if (!vec_all_eq(R, C)) {				\
      printf(#f" FAILED at line %d\n", __LINE__);	\
      printf("A = ");					\
      vec_print_int(A);					\
      printf("B = ");					\
      vec_print_ptr(B);					\
      printf("gave\n");					\
      vec_print(C);					\
      printf(#R" should be\n");				\
      vec_print(R);					\
      printf("\n");					\
      DONE;						\
    }							\
  }


#define CHECK3(f, A, B, C, D, R)			\
  { typeof(A) VA;					\
    typeof(B) VB;					\
    typeof(C) VC;					\
    VA = vec_ld(0, &A);					\
    VB = vec_ld(0, &B);					\
    VC = vec_ld(0, &C);					\
    D = f(VA, VB, VC);					\
    if (!vec_all_eq(R, D)) {				\
      printf(#f" FAILED at line %d\n", __LINE__);	\
      printf("A = ");					\
      vec_print(VA);					\
      printf("B = ");					\
      vec_print(VB);					\
      printf("C = ");					\
      vec_print(VC);					\
      printf("gave\n");					\
      vec_print(D);					\
      printf(#R" should be\n");				\
      vec_print(R);					\
      printf("\n");					\
      DONE;						\
    }							\
  }


#define CHECK3f(f, A, B, C, D, R)			\
  { typeof(A) VA;					\
    typeof(B) VB;					\
    VA = vec_ld(0, &A);					\
    VB = vec_ld(0, &B);					\
    D = f(VA, VB, C);					\
    if (!vec_all_eq(R, D)) {				\
      printf(#f" FAILED at line %d\n", __LINE__);	\
      printf("A = ");					\
      vec_print_f32(VA);				\
      printf("B = ");					\
      vec_print_f32(VB);				\
      printf("C = %d\n", C);				\
      printf("gave\n");					\
      vec_print_f32(D);					\
      printf(#R" should be\n");				\
      vec_print_f32(R);					\
      printf("\n");					\
      DONE;						\
    }							\
  }

#endif	/* MEMORY_ORDER */


/* -------------------------------------------------------------------------- */


#define INF (1./0.)
void test_vec_round(void)
{
  vector float f1 = { 1.0, 2.0, 3.0, 4.0 };  
  vector float f2 = { 0.5, 501.125, -793.5, 8560.125 };  
  vector float f3 = { 0.0, 6.0, -8.0, 3.2 };  
  vector float f4 = { -2.8, 0.01,-0.7, 2.5 };  
  vector float f5 = { -1.5, 3.40282347e+38F, -3.40282347e+38F, INF };  

  /* Results */
  vector float sum01 = {0x1p+0, 0x1p+1, 0x1.8p+1, 0x1p+2};
  vector float sum02 = {0x0p+0, 0x1.f5p+8, -0x1.8dp+9, 0x1.0b8p+13};
  vector float sum03 = {0x0p+0, 0x1.8p+2, -0x1p+3, 0x1.8p+1};
  vector float sum04 = {-0x1.8p+1, 0x0p+0, -0x1p+0, 0x1p+1};
  vector float sum05 = {-0x1p+1, 0x1.fffffep+127, -0x1.fffffep+127, INF};

  vector float sum11 = {0x1p+0, 0x1p+1, 0x1.8p+1, 0x1p+2};
  vector float sum12 = {0x0p+0, 0x1.f5p+8, -0x1.8c8p+9, 0x1.0b8p+13};
  vector float sum13 = {0x0p+0, 0x1.8p+2, -0x1p+3, 0x1.8p+1};
  vector float sum14 = {-0x1p+1, 0x0p+0, 0x0p+0, 0x1p+1};
  vector float sum15 = {-0x1p+0, 0x1.fffffep+127, -0x1.fffffep+127, INF};

  vector float sum21 = {0x1p+0, 0x1p+1, 0x1.8p+1, 0x1p+2};
  vector float sum22 = {0x1p+0, 0x1.f6p+8, -0x1.8c8p+9, 0x1.0b88p+13};
  vector float sum23 = {0x0p+0, 0x1.8p+2, -0x1p+3, 0x1p+2};
  vector float sum24 = {-0x1p+1, 0x1p+0, 0x0p+0, 0x1.8p+1};
  vector float sum25 = {-0x1p+0, 0x1.fffffep+127, -0x1.fffffep+127, INF};

  vector float sum31 = {0x1p+0, 0x1p+1, 0x1.8p+1, 0x1p+2};
  vector float sum32 = {0x0p+0, 0x1.f5p+8, -0x1.8dp+9, 0x1.0b8p+13};
  vector float sum33 = {0x0p+0, 0x1.8p+2, -0x1p+3, 0x1.8p+1};
  vector float sum34 = {-0x1.8p+1, 0x0p+0, -0x1p+0, 0x1p+1};
  vector float sum35 = {-0x1p+1, 0x1.fffffep+127, -0x1.fffffep+127, INF};

  vector float rf;

  CHECK1f(vec_round, f1, rf, sum01);
  CHECK1f(vec_round, f2, rf, sum02);
  CHECK1f(vec_round, f3, rf, sum03);
  CHECK1f(vec_round, f4, rf, sum04);
  CHECK1f(vec_round, f5, rf, sum05);

  CHECK1f(vec_trunc, f1, rf, sum11);
  CHECK1f(vec_trunc, f2, rf, sum12);
  CHECK1f(vec_trunc, f3, rf, sum13);
  CHECK1f(vec_trunc, f4, rf, sum14);
  CHECK1f(vec_trunc, f5, rf, sum15);

  CHECK1f(vec_ceil, f1, rf, sum21);
  CHECK1f(vec_ceil, f2, rf, sum22);
  CHECK1f(vec_ceil, f3, rf, sum23);
  CHECK1f(vec_ceil, f4, rf, sum24);
  CHECK1f(vec_ceil, f5, rf, sum25);

  CHECK1f(vec_floor, f1, rf, sum31);
  CHECK1f(vec_floor, f2, rf, sum32);
  CHECK1f(vec_floor, f3, rf, sum33);
  CHECK1f(vec_floor, f4, rf, sum34);
  CHECK1f(vec_floor, f5, rf, sum35);

  printf("vec_round done.\n");
}


/* -------------------------------------------------------------------------- */


void test_vec_addc(void)
{
  vector unsigned int u0 = 
    { 2, 2, 2, 2 };
  vector unsigned int u1 = 
    { 2, 3, 4, 5 };
  vector unsigned int u2 = 
    { UINT_MAX-3, UINT_MAX-3, UINT_MAX-3, UINT_MAX-3 };

  /* Results */
  vector unsigned int isum1 = {   0,   0,   0,  0 };
  vector unsigned int isum2 = {   0,   0,   1,  1 };
  vector unsigned int isum3 = {   1,   0,   0,  0 };
  vector unsigned int isum4 = {   0,   0,   0,  0 };
  vector unsigned int ru;

  /* vec_addc */
  CHECK2(vec_addc, u1, u1, ru, isum1);
  CHECK2(vec_addc, u1, u2, ru, isum2);
  printf("vec_addc done.\n");

  /* vec_subc */
  CHECK2(vec_subc, u0, u1, ru, isum3);
  CHECK2(vec_subc, u0, u2, ru, isum4);
  printf("vec_subc done.\n");

  return;
}


/* -------------------------------------------------------------------------- */

void test_vec_adds(void)
{
  vector unsigned int u1 = { 100, UINT_MAX-1, 100,          UINT_MAX-100 };
  vector unsigned int u2 = { 100, 1,          UINT_MAX-101, UINT_MAX-100 };

  vector int i1 = { INT_MAX-1, INT_MAX-1, INT_MIN+1, INT_MIN+1 };
  vector int i2 = { 1, 2, -1, -4 };
  vector int i3 = { INT_MAX-1, INT_MIN+1, INT_MAX-1, INT_MIN+1 };

  /* Results */
  vector unsigned int isum1 = {2147483746U, 4294967295U, 2147483749U, 4294967295U};
  vector unsigned int isum2 = {101U, 4294967295U, 4294967295U, 4294967295U};
  vector unsigned int isum3 = {2147483746U, 4294967295U, 2147483749U, 4294967295U};
  vector unsigned int isum4 = {101U, 4294967295U, 4294967295U, 4294967295U};
  vector unsigned int isum5 = {200U, 4294967295U, 4294967294U, 4294967295U};
#ifdef ALTIVEC_INTRINSIC
  vector unsigned int isum6 = {2147483647,  2147483647, INT_MIN, INT_MIN};
  vector unsigned int isum7 = {2147483647, -2147483645,  2147483645, INT_MIN};
#endif
  vector unsigned int isum10 = {2147483546U, 0U, 2147483549U, 0U};
  vector unsigned int isum11 = {2147483546U, 2147483645U, 0U, 0U};
  vector unsigned int isum12 = {0U, 0U, 4294967195U, 97U};
  vector unsigned int isum13 = {0U, 1U, 101U, 97U};
  vector unsigned int isum14 = {0U, 2147483648U, 0U, 2147483546U};
  vector unsigned int isum15 = {0U, 0U, 2147483545U, 2147483546U};
  vector unsigned int isum16 = {99U, 4294967292U, 0U, 0U};
  vector unsigned int isum17 = {99U, 0U, 0U, 0U};
  vector unsigned int isum18 = {0U, 4294967293U, 0U, 0U};
  vector unsigned int isum19 = {0U, 0U, 4294967094U, 0U};
  vector int isum20 = {2147483645, 2147483644, -2147483646, -2147483643};
  vector int isum21 = {-2147483645, -2147483644, 2147483646, 2147483643};
  vector int isum22 = {-2147483645, 2147483647, -2147483647, 2147483643};

  vector signed int ri;
  vector unsigned int ru;

#ifdef ALTIVEC_INTRINSIC
  /* __IA32_vadduws */
  /* unsigned int = vec_adds(signed int a1, unsigned int a2) */
  CHECK2(vec_adds, i1, u1, ru, isum1);
  CHECK2(vec_adds, i2, u1, ru, isum2);
  /* unsigned int = vec_adds (unsigned int a1, signed int a2) */
  CHECK2(vec_adds, u1, i1, ru, isum3);
  CHECK2(vec_adds, u1, i2, ru, isum4);
#endif
  /* unsigned int = vec_adds (unsigned int a1, unsigned int a2) */
  CHECK2(vec_adds, u1, u2, ru, isum5);
  /* unsigned int = vec_vadduws (signed int a1, unsigned int a2) */
  CHECK2(vec_vadduws, i1, u1, ru, isum1);
  CHECK2(vec_vadduws, i2, u1, ru, isum2);
  /* unsigned int = vec_vadduws (unsigned int a1, signed int a2) */
  CHECK2(vec_vadduws, u1, i1, ru, isum3);
  CHECK2(vec_vadduws, u1, i2, ru, isum4);
  /* unsigned int = vec_vadduws (unsigned int a1, unsigned int a2) */
  CHECK2(vec_vadduws, u1, u2, ru, isum5);

#ifdef ALTIVEC_INTRINSIC
  /* __IA32_vaddsws */
  /* signed int = vec_adds (signed int a1, signed int a2) */
  CHECK2(vec_adds, i1, i2, ri, isum6);
  CHECK2(vec_adds, i2, i3, ri, isum7);
  /* signed int = vec_vaddsws (signed int a1, signed int a2) */
  CHECK2(vec_vaddsws, i1, i2, ri, isum6);
  CHECK2(vec_vaddsws, i2, i3, ri, isum7);
#endif
  printf("vec_adds done.\n");

#ifdef ALTIVEC_INTRINSIC
  /* __IA32_vsubuws */
  /* unsigned int = vec_subs (signed int a1, unsigned int a2) */
  CHECK2(vec_subs, i1, u1, ru, isum10);
  CHECK2(vec_subs, i1, u2, ru, isum11);
  CHECK2(vec_subs, i2, u1, ru, isum12);
  CHECK2(vec_subs, i2, u2, ru, isum13);
  /* unsigned int = vec_subs (unsigned int a1, signed int a2) */
  CHECK2(vec_subs, u1, i1, ru, isum14);
  CHECK2(vec_subs, u2, i1, ru, isum15);
  CHECK2(vec_subs, u1, i2, ru, isum16);
  CHECK2(vec_subs, u2, i2, ru, isum17);
#endif
  /* unsigned int = vec_subs (unsigned int a1, unsigned int a2) */
  CHECK2(vec_subs, u1, u2, ru, isum18);
  CHECK2(vec_subs, u2, u1, ru, isum19);
  /* unsigned int = vec_vsubuws (signed int a1, unsigned int a2) */
  CHECK2(vec_vsubuws, i1, u1, ru, isum10);
  CHECK2(vec_vsubuws, i1, u2, ru, isum11);
  CHECK2(vec_vsubuws, i2, u1, ru, isum12);
  CHECK2(vec_vsubuws, i2, u2, ru, isum13);
  /* unsigned int = vec_vsubuws (unsigned int a1, signed int a2) */
  CHECK2(vec_vsubuws, u1, i1, ru, isum14);
  CHECK2(vec_vsubuws, u2, i1, ru, isum15);
  CHECK2(vec_vsubuws, u1, i2, ru, isum16);
  CHECK2(vec_vsubuws, u2, i2, ru, isum17);
  /* unsigned int = vec_vsubuws (unsigned int a1, unsigned int a2) */
  CHECK2(vec_vsubuws, u1, u2, ru, isum18);
  CHECK2(vec_vsubuws, u2, u1, ru, isum19);

  /* __IA32_vsubsws */
  /* signed int = vec_subs (signed int a1, signed int a2) */
  CHECK2(vec_subs, i1, i2, ri, isum20);
  CHECK2(vec_subs, i2, i1, ri, isum21);
  CHECK2(vec_subs, i2, i3, ri, isum22);
  /* signed int = vec_vsubsws (signed int a1, signed int a2) */
  CHECK2(vec_vsubsws, i1, i2, ri, isum20);
  CHECK2(vec_vsubsws, i2, i1, ri, isum21);
  CHECK2(vec_vsubsws, i2, i3, ri, isum22);
  printf("vec_subs done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_madds(void)
{
  vector signed short s0 = { 128,-127,128,-127,
			     128,-128,128,-128};
  vector signed short s1 = { -1, 2, -3, 4, -5, 6, -7, 8 };
  vector signed short s2 = { SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5, 
			     SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5};
  /* Results */
  vector short sum1 = {-32768, 32766, -32764, 32761, -32768, 32766, -32764, 32761};
  vector short sum2 = {-1, 2, -3, 4, -5, 6, -7, 8};
  vector short sum3 = {-32768, 32767, -32763, 32762, -32768, 32767, -32763, 32762};
  vector short sum4 = {0, 2, -2, 4, -4, 7, -6, 9};
  vector signed short rs;

  /* signed short = vec_madds(short, short, short) */
  CHECK3(vec_madds, s0, s1, s2, rs, sum1);
  CHECK3(vec_madds, s0, s0, s1, rs, sum2);

  /* signed short = vec_mradds(short, short, short) */
  CHECK3(vec_mradds, s0, s1, s2, rs, sum3);
  CHECK3(vec_mradds, s0, s0, s1, rs, sum4);

  printf("vec_madds done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_msums(void)
{
  vector signed short s0 = { 128,-127,128,-127,
			     128,-128,128,-128};
  vector signed short s1 = { -1, 2, -3, 4, -5, 6, -7, 8 };
  vector signed short s2 = { SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5, 
			     SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5};
  vector unsigned short us0 = { USHRT_MAX, 0, USHRT_MAX, 0,
				100, 200, 300, 400};
  vector unsigned short us1 = { -1, 2, -3, 4, -5, 6, -7, 8 };
  vector unsigned short us2 = { 0, USHRT_MAX, 0, USHRT_MAX-5, 
				0, USHRT_MAX, 5, USHRT_MAX-5};

  vector unsigned int u1 = 
    { 100, UINT_MAX-1, 100, UINT_MAX-100 };
  vector unsigned int u2 = { 100, 1000, 10000, 10 };
  vector int i1 = { INT_MAX-1, INT_MAX-1, INT_MIN+1, INT_MIN+1 };
  vector int i2 = { 100, -100, 100, -100 };

  /* Results */
  vector unsigned int sum1 = {4294836325U, 4294967295U, 6554400U, 4294967295U};
  vector unsigned int sum2 = {100U, 1000U, 13117000U, 26213510U};
  vector signed int sum3 = {2147483264, 2147482754, INT_MIN, INT_MIN};
  vector signed int sum4 = {-8355613, -8354538, -8388380, -8387300};

  vector unsigned int ru;
  vector signed int rs;

  /* unsigned int = vec_msums(unsigned short, unsigned short, unsigned int) */
  CHECK3(vec_msums, us0, us1, u1, ru, sum1);
  CHECK3(vec_msums, us0, us2, u2, ru, sum2);

  /* int = vec_msums(signed short, signed short, signed int) */
  CHECK3(vec_msums, s0, s1, i1, rs, sum3);
  CHECK3(vec_msums, s0, s2, i2, rs, sum4);

  printf("vec_msums done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_sums(void)
{
  vector unsigned char uc1 =
    { 255, 255, 255, 255,
      0,  1,  0,  0,
      15, 15, 15, 15,
      25, 25, 25, 25};
  vector signed char c1 = 
    { SCHAR_MIN, SCHAR_MAX, SCHAR_MIN, SCHAR_MAX,
      0,  1,  0,  0,
      SCHAR_MIN, SCHAR_MAX, SCHAR_MIN, SCHAR_MAX,
      0, -1,  0,  0};
  vector signed short s1 = 
    { 100, 200, 300, 400,
      SHRT_MIN+1, SHRT_MAX-1, SHRT_MIN+1, SHRT_MAX-1};
  vector unsigned int u1 = 
    { 100, UINT_MAX-1, 100, UINT_MAX-100 };
  vector int i1 = { INT_MAX-1, INT_MAX-1, INT_MIN+1, INT_MIN+1 };
  vector int i2 = { 1, 2, -1, -4 };
  vector int i3 = { INT_MAX-1, INT_MIN+1, INT_MAX-1, INT_MIN+1 };

  /* Results */
  vector int isum1 = {   0,   0,   0, -6  };
  vector int isum2 = {   0,   1,  0,  -5  };
  vector unsigned int isum3 = { 1120, 4294967295U, 160, 4294967295U};
  vector int isum4 = { 2147483644, 2147483647, INT_MIN, INT_MIN};
  vector int isum5 = { 2147483647, 2147483647, INT_MIN, INT_MIN};

  vector signed int ri;
  vector unsigned int ru;

  /* vec_sums */
  CHECK2(vec_sums, i1, i2, ri, isum1);
  printf("vec_sums done.\n");

  /* vec_sum2s */
  CHECK2(vec_sum2s, i3, i2, ri, isum2);
  printf("vec_sum2s done.\n");

  /* unsigned int = vec_sum4s(unsigned char, unsigned int) */
  CHECK2(vec_sum4s, uc1, u1, ru, isum3);
  /* int = vec_sum4s(signed char, int) */
  CHECK2(vec_sum4s, c1, i1, ri, isum4);
  /* int = vec_sum4s(signed short, int) */
  CHECK2(vec_sum4s, s1, i1, ri, isum5);
  printf("vec_sum4s done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_mladd(void)
{
  vector signed short s0 = { 128,-127,128,-127,
			     128,-128,128,-128};
  vector signed short s1 = { -1, 2, -3, 4, -5, 6, -7, 8 };
  vector signed short s2 = { SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5, 
			     SHRT_MIN, SHRT_MAX, SHRT_MIN+5, SHRT_MAX-5};
  vector unsigned short us0 = { USHRT_MAX, 0, USHRT_MAX, 0,
				100, 200, 300, 400};
  vector unsigned short us1 = { -1, 2, -3, 4, -5, 6, -7, 8 };
  vector unsigned short us2 = { 0, USHRT_MAX, 0, USHRT_MAX-5, 
				0, USHRT_MAX, 5, USHRT_MAX-5};

  /* Results */
  vector signed short sum1 = 
    {32640, 32513, 32389, 32254, 32128, 31999, 31877, 31738};
  vector signed short sum2 = 
    {-32767, -32765, -32754, -32758, -32743, -32733, -32714, -32710};
  vector signed short sum3 = 
    {-128, -1, -128, -6, 12800, -25601, -27131, 14330};
  vector signed short sum4 = 
    {0, 4, 8, 16, 125, 236, 349, 464};
  vector signed short sum5 = 
    {32640, 32767, 32645, 32762, -19968, 7167, 5637, -18438};
  vector signed short sum6 = 
    {129, -123, 137, -111, 153, -92, 177, -64};
  vector unsigned short sum7 = 
    {1, 65535, 1, 65530, 10000, 39999, 24469, 28922};
  vector unsigned short sum8 = 
    {0, 4, 8, 16, 125, 236, 349, 464};

  vector unsigned short ru;
  vector signed short rs;

  /* signed short = vec_mladd(signed short, signed short, signed short) */
  CHECK3(vec_mladd, s0, s1, s2, rs, sum1);
  CHECK3(vec_mladd, s1, s1, s2, rs, sum2);
  /* signed short = vec_mladd(signed short, unsigned short, unsigned short) */
  CHECK3(vec_mladd, s0, us0, us2, rs, sum3);
  CHECK3(vec_mladd, s1, us1, us0, rs, sum4);
  /* signed short = vec_mladd(unsigned short, signed short, signed short) */
  CHECK3(vec_mladd, us0, s0, s2, rs, sum5);
  CHECK3(vec_mladd, us1, s1, s0, rs, sum6);
  /* unsigned short = vec_mladd(unsigned short, unsigned short, unsigned short) */
  CHECK3(vec_mladd, us0, us0, us2, ru, sum7);
  CHECK3(vec_mladd, us1, us1, us0, ru, sum8);

  printf("vec_mladd done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_packs(void)
{
  vector signed char sc1 =
    { 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16 };

  vector signed short ss1 = 
    { 100, 200, 300, 400,
      SHRT_MIN+1, SHRT_MAX-1, SHRT_MIN+1, SHRT_MAX-1};
  vector signed short ss2 = 
    { 0x001f, 0x0008, 0xa3d0, 0x009c,
      0x0300, 0x5aed, 0x007f, 0x0054};

  vector unsigned short us1 = 
    { 0x0074, 0x0075, 0x741c, 0xe655, 0xda27, 0x00f7, 0x1d00, 0x005b};
  vector unsigned short us2 = 
    { 0x51d3, 0x6bc1, 0x0086, 0x00e1, 0xd3bc, 0x0022, 0x1f00, 0x0043};

  vector unsigned int u1 = 
    { 100, UINT_MAX-1, 100, UINT_MAX-100 };
  vector unsigned int u2 = 
    { 0x7057d09d, 0xa01873fa, 0x5aaf3b66, 0xe68e3e7f };

  vector int i1 = { INT_MAX-1, INT_MAX-1, INT_MIN+1, INT_MIN+1 };
  vector int i2 = { 1, 2, -1, -4 };

  /* Results */
  vector unsigned short isum1 = { 12, 65535, 12, 65523, 11091, 3551, 21740, 17647};
  vector unsigned short isum2 = { 11091, 3551, 21740, 17647, 12, 65535, 12, 65523
  };
  vector unsigned char isum3 = {116, 117, 255, 255, 255, 247, 255, 91, 255, 255, 134, 225, 255, 34, 255, 67 };
  vector unsigned char isum4 = {255, 255, 134, 225, 255, 34, 255, 67, 116, 117, 255, 255, 255, 247, 255, 91};

  vector unsigned char isum5 = {100, 200, 255, 255, 0, 255, 0, 255, 31, 8, 0, 156, 255, 255, 127, 84 };
  vector unsigned char isum6 = {31, 8, 0, 156, 255, 255, 127, 84, 100, 200, 255, 255, 0, 255, 0, 255};
  vector unsigned short isum7 = {100, 65535, 100, 65535, 65535, 65535, 65535, 65535 };
  vector unsigned short isum8 = {65535, 65535, 65535, 65535, 100, 65535, 100, 65535 };
  vector unsigned short isum9 = {65535, 65535, 0, 0, 1, 2, 0, 0 };
  vector unsigned short isum10 = { 1, 2, 0, 0, 65535, 65535, 0, 0};
  vector signed short isum11 = {1, -2, 3, -4, 5, -6, 7, -8};
  vector signed int isum12 = {100, 200, 300, 400};
  vector signed int isum13 = {31, 8, -23600, 156};
#ifdef ALTIVEC_INTRINSIC
  vector unsigned int isum14 = {788U, 789U, 1900572U, 1700373U};
#endif
  vector signed short isum15 = {9, -10, 11, -12, 13, -14, 15, -16};
  vector signed int isum16 = {-32767, 32766, -32767, 32766};
  vector signed int isum17 = {768, 23277, 127, 84};
#ifdef ALTIVEC_INTRINSIC
  vector unsigned int isum18 = {788U, 789U, 1900572U, 1700373U};
#endif

#ifdef ALTIVEC_INTRINSIC
  vector unsigned int ru;
#endif
  vector signed int ri;
  vector signed short rss;
  vector unsigned short rus;
  vector unsigned char ruc;

  /* unsigned short = vec_packpx(unsigned int, unsigned int) */
  CHECK2(vec_packpx, u1, u2, rus, isum1);
  CHECK2(vec_packpx, u2, u1, rus, isum2);
  printf("vec_packpx done.\n");

  /* unsigned char = vec_packsu(unsigned short, unsigned short) */
  /* 
vec_packsu FAILED at line 458
A = 116, 117, 29724, 58965, 55847, 247, 7424, 91
B = 20947, 27585, 134, 225, 54204, 34, 7936, 67
gave
116, 117, 255, 0,   0,   247, 255, 91, 255, 255, 134, 225,   0, 34, 255, 67
isum3 should be
116, 117, 255, 255, 255, 247, 255, 91, 255, 255, 134, 225, 255, 34, 255, 67

Error in 
__IA32_vpkuhus _mm_packus_epi16
 */
  CHECK2(vec_packsu, us1, us2, ruc, isum3);
  /* 
255, 255, 134, 225,   0, 34, 255, 67, 116, 117, 255,   0,   0, 247, 255, 91
isum4 should be
255, 255, 134, 225, 255, 34, 255, 67, 116, 117, 255, 255, 255, 247, 255, 91
Error in

 */
  CHECK2(vec_packsu, us2, us1, ruc, isum4);
  /* unsigned char = vec_packsu(signed short, signed short) */
  CHECK2(vec_packsu, ss1, ss2, ruc, isum5);
  CHECK2(vec_packsu, ss2, ss1, ruc, isum6);
  /* unsigned short = vec_packsu(unsigned int, unsigned int) */
  CHECK2(vec_packsu, u1, u2, rus, isum7);
  CHECK2(vec_packsu, u2, u1, rus, isum8);
  /* unsigned short = vec_packsu(signed int, signed int) */
  CHECK2(vec_packsu, i1, i2, rus, isum9);
  CHECK2(vec_packsu, i2, i1, rus, isum10);
  printf("vec_packsu done.\n");

  /* signed short = vec_unpackh (signed char) */
  CHECK1(vec_unpackh, sc1, rss, isum11);
  /* signed int = vec_unpackh (signed short) */
  CHECK1(vec_unpackh, ss1, ri, isum12);
  CHECK1(vec_unpackh, ss2, ri, isum13);
#ifdef ALTIVEC_INTRINSIC
  /* unsigned int = vec_unpackh (unsigned short) */
  CHECK1(vec_unpackh, us1, ru, isum14);
#endif
  printf("vec_unpackh done.\n");

  /* signed short = vec_unpackl (signed char) */ 
  CHECK1(vec_unpackl, sc1, rss, isum15);
  /* signed int = vec_unpackl (signed short) */
  CHECK1(vec_unpackl, ss1, ri, isum16);
  CHECK1(vec_unpackl, ss2, ri, isum17);
#ifdef ALTIVEC_INTRINSIC
  /* unsigned int = vec_unpackl (unsigned short) */
  CHECK1(vec_unpackl, us1, ru, isum18);
#endif
  printf("vec_unpackl done.\n");

}


/* -------------------------------------------------------------------------- */

void test_vec_sld(void)
{
  vector float f1 = { 0x1.040608p-125, 0x1.0c0e1p-117, 0x1.141618p-109, 0x1.1c1ep-101 };  
  vector float f2 = { -0x1.434546p-62, -0x1.4b4d4ep-54,-0x1.535556p-46, -0x1.5b5d5ep-38};
  vector float rf;
  vector float sum1 = {0x1.06080ap-123, 0x1.0e1012p-115, 0x1.16181ap-107, 0x1.1e014p-99};
  vector float sum2 = {0x1.080a0cp-121, 0x1.101214p-113, 0x1.181a1cp-105, 0x1.014142p-97};
  vector float sum3 = {0x1.0a0c0ep-119, 0x1.121416p-111, 0x1.1a1c1ep-103, 0x1.414344p-126};
  vector float sum4 = {0x1.0c0e1p-117, 0x1.141618p-109, 0x1.1c1ep-101, -0x1.434546p-62};
  vector float sum5 = {0x1.414344p-126, -0x1.494b4cp-56, -0x1.515354p-48, -0x1.595b5cp-40};
  /* float = vec_sld(float, float, char) */
  CHECK3f(vec_sld, f1, f2, 1, rf, sum1);
  CHECK3f(vec_sld, f1, f2, 2, rf, sum2);
  CHECK3f(vec_sld, f1, f2, 3, rf, sum3);
  CHECK3f(vec_sld, f1, f2, 4, rf, sum4);
  CHECK3f(vec_sld, f1, f2, 15, rf, sum5);
  printf("vec_sld done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_lvsl(void)
{
  unsigned int u1[] = {1,2,3,4};
  vector unsigned char ruc;
  vector unsigned char sum1 = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};

  vector unsigned char sum2 = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
  vector unsigned char sum3 = {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
  vector unsigned char sum4 = {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
  vector unsigned char sum5 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
  vector unsigned char sum6 = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22};
  vector unsigned char sum7 = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
  vector unsigned char sum8 = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22};
  vector unsigned char sum9 = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
  vector unsigned char sum10 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  vector unsigned char sum11 = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};

  /* unsigned char = vec_lvsl(int, unsigned char) */
  CHECK2b(vec_lvsl, 0, u1, ruc, sum1);
  CHECK2b(vec_lvsl, 1, u1, ruc, sum2);
  CHECK2b(vec_lvsl, 4, u1, ruc, sum3);
  CHECK2b(vec_lvsl, 7, u1, ruc, sum4);
  CHECK2b(vec_lvsl, 8, u1, ruc, sum5);
  CHECK2b(vec_lvsl, 15, u1, ruc, sum6);
  printf("vec_lvsl done.\n");

  /* unsigned char = vec_lvsr(int, unsigned char) */
  CHECK2b(vec_lvsr, 0, u1, ruc, sum7);
  CHECK2b(vec_lvsr, 1, u1, ruc, sum8);
  CHECK2b(vec_lvsr, 4, u1, ruc, sum9);
  CHECK2b(vec_lvsr, 7, u1, ruc, sum10);
  CHECK2b(vec_lvsr, 8, u1, ruc, sum11);
  printf("vec_lvsr done.\n");
}


/* -------------------------------------------------------------------------- */

void test_vec_perm(void)
{
  vector unsigned char vpat0 = 
    { 0x01, 0x14, 0x18, 0x10,
      0x06, 0x15, 0x19, 0x1A,
      0x1C, 0x1C, 0x1C, 0x13,
      0x08, 0x1D, 0x1B, 0x0E};

  vector unsigned char vpat1 = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};

  vector unsigned char vA = 
    { 0xA0, 0xA1, 0xA2, 0xA3, 
      0xA4, 0xA5, 0xA6, 0xA7, 
      0xA8, 0xA9, 0xAA, 0xAB,
      0xAC, 0xAD, 0xAE, 0xAF};

  vector unsigned char vB = 
    { 0xB0, 0xB1, 0xB2, 0xB3, 
      0xB4, 0xB5, 0xB6, 0xB7, 
      0xB8, 0xB9, 0xBA, 0xBB,
      0xBC, 0xBD, 0xBE, 0xBF};

  vector unsigned char rc;
  vector unsigned char sum1 = {161, 180, 184, 176, 166, 181, 185, 186, 188, 188, 188, 179, 168, 189, 187, 174};
  vector unsigned char sum2 = {162, 163, 166, 167, 170, 171, 174, 175, 178, 179, 182, 183, 186, 187, 190, 191};

  /* float = vec_perm(float, float, unsigned char) */
  /* signed int = vec_perm(signed int, signed int, unsigned char) */
  /* unsigned int = vec_perm(unsigned int, unsigned int, unsigned char) */
  /* signed short = vec_perm(signed short, signed short, unsigned char) */
  /* unsigned short = vec_perm(unsigned short, unsigned short, unsigned char) */
  /* signed char = vec_perm(signed char, signed char, unsigned char) */
  /* unsigned char = vec_perm(unsigned char, unsigned char, unsigned char) */
  CHECK3(vec_perm, vA, vB, vpat0, rc, sum1);
  CHECK3(vec_perm, vA, vB, vpat1, rc, sum2);
  printf("vec_perm done.\n");
}


/* -------------------------------------------------------------------------- */

int main(void)
{
  test_vec_round();
  test_vec_adds();
  test_vec_madds();
  test_vec_msums();
  test_vec_sums();
  test_vec_mladd();
  test_vec_packs();
  test_vec_sld();
  test_vec_lvsl();
  test_vec_perm();
  return 0;
}
