#include <immintrin.h>
#include <stdio.h>
void test(int32_t *Y, int32_t *X)
{
__m128i *v1 __attribute__((aligned (16)));
__m128i *v2 __attribute__((aligned (16)));
__m128i v3 __attribute__((aligned (16)));
__m128i v4 __attribute__((aligned (16)));
int32_t * rslt;
int64_t * rslt64;
v1 = (__m128i *) X;
v2 = (__m128i *) Y;
rslt = (int32_t * ) v1;
printf("In test, V1 after MUL SHUFFLE: %d\t%d\t%d\t%d\t\n", rslt[0], rslt[1], rslt[2], rslt[3]);
rslt = (int32_t * ) v2;
printf("In test, V2 before MUL SHUFFLE: %d\t%d\t%d\t%d\t\n", rslt[0], rslt[1], rslt[2], rslt[3]);
v3 = _mm_mul_epi32(*v1, *v2);
v4 = _mm_mul_epi32(_mm_shuffle_epi32(*v1, _MM_SHUFFLE(2, 3, 0, 1)), _mm_shuffle_epi32(*v2, _MM_SHUFFLE(2, 3, 0, 1)));
rslt64 = (int64_t * ) &v3;
printf("In REDC, product before SHUFFLE: %ldt%ldn", rslt64[0], rslt64[1]);
rslt64 = (int64_t * ) &v4;
printf("In REDC, product after SHUFFLE: %ldt%ldn", rslt64[0], rslt64[1]);
rslt = (int32_t * ) v1;
printf("In REDC, 4-way vect before SHUFFLE: %dt%dt%dt%dtn", rslt[0], rslt[1], rslt[2], rslt[3]);
*v1 = _mm_shuffle_epi32(*v1, _MM_SHUFFLE(2, 3, 0, 1));
rslt = (int32_t * ) v1;
printf("In REDC, 4-way vect after SHUFFLE: %dt%dt%dt%dtn", rslt[0], rslt[1], rslt[2], rslt[3]);
}
int main (int nb, char** argv){
int32_t a = (int32_t)1234;
int32_t b = (int32_t)5678;
test(&a,&b);
}
Shuffle in SSE
15 February 2018