#include <iostream> #include <string> #include <vector> #include <functional> #include <chrono> #include <smmintrin.h> #include <unistd.h> #include <glm.hpp> #include <gtx/simd_vec4.hpp> #include <gtx/simd_mat4.hpp> #include <gtc/type_ptr.hpp> #include <immintrin.h> namespace ch = std::chrono; const int Iter = 1<<28; void RunBench_GLM() { glm::vec4 v(1.0f); glm::vec4 v2; glm::mat4 m(1.0f); for (int i = 0; i < Iter; i++) { v2 += m * v; } auto t = v2; std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; } void RunBench_GLM_SIMD() { glm::detail::fvec4SIMD v(1.0f); glm::detail::fvec4SIMD v2(0.0f); glm::detail::fmat4x4SIMD m(1.0f); for (int i = 0; i < Iter; i++) { v2 += v * m; } auto t = glm::vec4_cast(v2); std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; } void RunBench_Double_GLM() { glm::dvec4 v(1.0); glm::dvec4 v2; glm::dmat4 m(1.0); for (int i = 0; i < Iter; i++) { v2 += v * m; } auto t = v2; std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl; } void RunBench_Double_AVX() { __m256d v = _mm256_set_pd(1, 1, 1, 1); __m256d s = _mm256_setzero_pd(); __m256d m[4] = { _mm256_set_pd(1, 0, 0, 0), _mm256_set_pd(0, 1, 0, 0), _mm256_set_pd(0, 0, 1, 0), _mm256_set_pd(0, 0, 0, 1) }; for (int i = 0; i < Iter; i++) { __m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0)); __m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1)); __m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2)); __m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3)); __m256d m0 = _mm256_mul_pd(m[0], v0); __m256d m1 = _mm256_mul_pd(m[1], v1); __m256d m2 = _mm256_mul_pd(m[2], v2); __m256d m3 = _mm256_mul_pd(m[3], v3); __m256d a0 = _mm256_add_pd(m0, m1); __m256d a1 = _mm256_add_pd(m2, m3); __m256d a2 = _mm256_add_pd(a0, a1); s = _mm256_add_pd(s, a2); } double t[4]; _mm256_store_pd(t, s); std::cout << t[0] << " " << t[1] << " " << t[2] << " " << t[3] << std::endl; } int main() { std::vector<std::pair<std::string, std::function<void ()>>> benches; benches.push_back(std::make_pair("GLM", RunBench_GLM)); benches.push_back(std::make_pair("GLM_SIMD", RunBench_GLM_SIMD)); benches.push_back(std::make_pair("Double_GLM", RunBench_Double_GLM)); benches.push_back(std::make_pair("Double_AVX", RunBench_Double_AVX)); auto startInitial = ch::high_resolution_clock::now(); for (int i=0;i<500000;i++){ asm("NOP"); } auto endInitial = ch::high_resolution_clock::now(); double elapsedInitial = (double)ch::duration_cast<ch::milliseconds>(endInitial - startInitial).count() ; std::cout << "resolution :" <<elapsedInitial <<std::endl; for (auto& bench : benches) { std::cout << "Begin [ " << bench.first << " ]" << std::endl; auto start = ch::high_resolution_clock::now(); bench.second(); auto end = ch::high_resolution_clock::now(); double elapsed = (double)ch::duration_cast<ch::milliseconds>(end - start).count() / 1000.0; std::cout << "End [ " << bench.first << " ] : " << elapsed << " seconds" << std::endl; } std::cin.get(); return 0; }
Std Chrono, a high resolution timer ?
15 February 2018