#include <iostream>
#include <string>
#include <vector>
#include <functional>
#include <chrono>
#include <smmintrin.h>
#include <unistd.h>
#include <glm.hpp>
#include <gtx/simd_vec4.hpp>
#include <gtx/simd_mat4.hpp>
#include <gtc/type_ptr.hpp>
#include <immintrin.h>
namespace ch = std::chrono;
const int Iter = 1<<28;
void RunBench_GLM()
{
glm::vec4 v(1.0f);
glm::vec4 v2;
glm::mat4 m(1.0f);
for (int i = 0; i < Iter; i++)
{
v2 += m * v;
}
auto t = v2;
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_GLM_SIMD()
{
glm::detail::fvec4SIMD v(1.0f);
glm::detail::fvec4SIMD v2(0.0f);
glm::detail::fmat4x4SIMD m(1.0f);
for (int i = 0; i < Iter; i++)
{
v2 += v * m;
}
auto t = glm::vec4_cast(v2);
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_Double_GLM()
{
glm::dvec4 v(1.0);
glm::dvec4 v2;
glm::dmat4 m(1.0);
for (int i = 0; i < Iter; i++)
{
v2 += v * m;
}
auto t = v2;
std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}
void RunBench_Double_AVX()
{
__m256d v = _mm256_set_pd(1, 1, 1, 1);
__m256d s = _mm256_setzero_pd();
__m256d m[4] =
{
_mm256_set_pd(1, 0, 0, 0),
_mm256_set_pd(0, 1, 0, 0),
_mm256_set_pd(0, 0, 1, 0),
_mm256_set_pd(0, 0, 0, 1)
};
for (int i = 0; i < Iter; i++)
{
__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3));
__m256d m0 = _mm256_mul_pd(m[0], v0);
__m256d m1 = _mm256_mul_pd(m[1], v1);
__m256d m2 = _mm256_mul_pd(m[2], v2);
__m256d m3 = _mm256_mul_pd(m[3], v3);
__m256d a0 = _mm256_add_pd(m0, m1);
__m256d a1 = _mm256_add_pd(m2, m3);
__m256d a2 = _mm256_add_pd(a0, a1);
s = _mm256_add_pd(s, a2);
}
double t[4];
_mm256_store_pd(t, s);
std::cout << t[0] << " " << t[1] << " " << t[2] << " " << t[3] << std::endl;
}
int main()
{
std::vector<std::pair<std::string, std::function<void ()>>> benches;
benches.push_back(std::make_pair("GLM", RunBench_GLM));
benches.push_back(std::make_pair("GLM_SIMD", RunBench_GLM_SIMD));
benches.push_back(std::make_pair("Double_GLM", RunBench_Double_GLM));
benches.push_back(std::make_pair("Double_AVX", RunBench_Double_AVX));
auto startInitial = ch::high_resolution_clock::now();
for (int i=0;i<500000;i++){
asm("NOP");
}
auto endInitial = ch::high_resolution_clock::now();
double elapsedInitial = (double)ch::duration_cast<ch::milliseconds>(endInitial - startInitial).count() ;
std::cout << "resolution :" <<elapsedInitial <<std::endl;
for (auto& bench : benches)
{
std::cout << "Begin [ " << bench.first << " ]" << std::endl;
auto start = ch::high_resolution_clock::now();
bench.second();
auto end = ch::high_resolution_clock::now();
double elapsed = (double)ch::duration_cast<ch::milliseconds>(end - start).count() / 1000.0;
std::cout << "End [ " << bench.first << " ] : " << elapsed << " seconds" << std::endl;
}
std::cin.get();
return 0;
}
Std Chrono, a high resolution timer ?
15 February 2018