#include <immintrin.h> #include <stdio.h> void test(int32_t *Y, int32_t *X) { __m128i *v1 __attribute__((aligned (16))); __m128i *v2 __attribute__((aligned (16))); __m128i v3 __attribute__((aligned (16))); __m128i v4 __attribute__((aligned (16))); int32_t * rslt; int64_t * rslt64; v1 = (__m128i *) X; v2 = (__m128i *) Y; rslt = (int32_t * ) v1; printf("In test, V1 after MUL SHUFFLE: %d\t%d\t%d\t%d\t\n", rslt[0], rslt[1], rslt[2], rslt[3]); rslt = (int32_t * ) v2; printf("In test, V2 before MUL SHUFFLE: %d\t%d\t%d\t%d\t\n", rslt[0], rslt[1], rslt[2], rslt[3]); v3 = _mm_mul_epi32(*v1, *v2); v4 = _mm_mul_epi32(_mm_shuffle_epi32(*v1, _MM_SHUFFLE(2, 3, 0, 1)), _mm_shuffle_epi32(*v2, _MM_SHUFFLE(2, 3, 0, 1))); rslt64 = (int64_t * ) &v3; printf("In REDC, product before SHUFFLE: %ldt%ldn", rslt64[0], rslt64[1]); rslt64 = (int64_t * ) &v4; printf("In REDC, product after SHUFFLE: %ldt%ldn", rslt64[0], rslt64[1]); rslt = (int32_t * ) v1; printf("In REDC, 4-way vect before SHUFFLE: %dt%dt%dt%dtn", rslt[0], rslt[1], rslt[2], rslt[3]); *v1 = _mm_shuffle_epi32(*v1, _MM_SHUFFLE(2, 3, 0, 1)); rslt = (int32_t * ) v1; printf("In REDC, 4-way vect after SHUFFLE: %dt%dt%dt%dtn", rslt[0], rslt[1], rslt[2], rslt[3]); } int main (int nb, char** argv){ int32_t a = (int32_t)1234; int32_t b = (int32_t)5678; test(&a,&b); }
Shuffle in SSE
15 February 2018