Category: Timer

Evaluation of std::chrono

If you have a look around in the web, a solution to correctly measure time is to use a new C++ package: std::chrono , which is part of the standard C++ library.
So the aim of this article is to investigate if this solution can be used to have a very high resolution timer. If you remember well as we are doing small improvement we want to be able to measure the improvement (or degradation). of optimization.

First step is to

#include <chrono>
#include <ratio>
#include <climits>
#include <algorithm>    // std::max
int main()
{
  long long  value = 0;
  double max = LONG_MIN ;
  double min = LONG_MAX;
  for (int i=  1;i<100;i++){

    auto startInitial = std::chrono::high_resolution_clock::now();              
    auto endInitial = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::nano > elapsedInitial = (endInitial - startInitial) ;
    max = std::max(max,elapsedInitial.count());
    min = std::min(min,elapsedInitial.count());
    value=value+elapsedInitial.count();
  }
  std::cout <<"Sum for 100 loop"<<value<<" " <<value/100<<"ns"std::endl;
  std::cout<<" Max:" <<max <<"ns Min:"<<min<<"ns"<<std::endl;
}
fflayol@:/tmp$ g++ test1.c  -std=c++11;./a.out 
Sum for 100 loop: 2235
Mean: 22ns
Max : 53ns
Min : 21ns

This example shows that the call last at means 20ns which is quite too long for our purpose.
Indeed if we are trying to be more accurate:

#include <iostream>
#include <chrono>
#include <ratio>
#include <climits>
#include <algorithm>    // std::max
int main()
{
  {  
    long long  value = 0;
    double max = LONG_MIN ;
    double min = LONG_MAX;
    for (int i=  1;i<100;i++){
  
      auto startInitial = std::chrono::high_resolution_clock::now();              
      auto endInitial = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double, std::nano > elapsedInitial = (endInitial - startInitial) ;
      max = std::max(max,elapsedInitial.count());
      min = std::min(min,elapsedInitial.count());
      value=value+elapsedInitial.count();
    }
    std::cout <<"Sum for 100 loop"<<value<<" " <<value/100<<"ns"<<std::endl;
    std::cout<<" Max:" <<max <<"ns Min:"<<min<<"ns"<<std::endl;
  }
  std::cout <<"Second function"<<std::endl;
  { 
    long long  value = 0;
    double max = LONG_MIN ;
    double min = LONG_MAX;
    for (int i=  1;i<100;i++){
 
      auto startInitial = std::chrono::high_resolution_clock::now();

      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      asm("nop");
      auto endInitial = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double, std::nano > elapsedInitial = (endInitial - startInitial) ;
      max = std::max(max,elapsedInitial.count());
      min = std::min(min,elapsedInitial.count());
      value=value+elapsedInitial.count();
    }
    std::cout <<"Sum for 100 loop"<<value<<" " <<value/100<<"ns"<<std::endl;
    std::cout<<" Max:" <<max <<"ns Min:"<<min<<"ns"<<std::endl;
  }
}

Std Chrono, a high resolution timer ?

#include <iostream>
#include <string>
#include <vector>
#include <functional>
#include <chrono>
#include <smmintrin.h>
#include <unistd.h>
#include <glm.hpp>
#include <gtx/simd_vec4.hpp>
#include <gtx/simd_mat4.hpp>
#include <gtc/type_ptr.hpp>
#include <immintrin.h>


namespace ch = std::chrono;

const int Iter = 1<<28;

void RunBench_GLM()
{
	glm::vec4 v(1.0f);
	glm::vec4 v2;
	glm::mat4 m(1.0f);
	
	for (int i = 0; i < Iter; i++)
	{
		v2 += m * v;
	}

	auto t = v2;
	std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}


void RunBench_GLM_SIMD()
{
	glm::detail::fvec4SIMD v(1.0f);
	glm::detail::fvec4SIMD v2(0.0f);
	glm::detail::fmat4x4SIMD m(1.0f);

	for (int i = 0; i < Iter; i++)
	{
		v2 += v * m;
	}

	auto t = glm::vec4_cast(v2);
	std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}



void RunBench_Double_GLM()
{
	glm::dvec4 v(1.0);
	glm::dvec4 v2;
	glm::dmat4 m(1.0);

	for (int i = 0; i < Iter; i++)
	{
		v2 += v * m;
	}

	auto t = v2;
	std::cout << t.x << " " << t.y << " " << t.z << " " << t.w << std::endl;
}

void RunBench_Double_AVX()
{
	__m256d v = _mm256_set_pd(1, 1, 1, 1);
	__m256d s = _mm256_setzero_pd();
	__m256d m[4] =
	{
		_mm256_set_pd(1, 0, 0, 0),
		_mm256_set_pd(0, 1, 0, 0),
		_mm256_set_pd(0, 0, 1, 0),
		_mm256_set_pd(0, 0, 0, 1)
	};

	for (int i = 0; i < Iter; i++)
	{
		__m256d v0 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(0, 0, 0, 0));
		__m256d v1 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(1, 1, 1, 1));
		__m256d v2 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(2, 2, 2, 2));
		__m256d v3 = _mm256_shuffle_pd(v, v, _MM_SHUFFLE(3, 3, 3, 3));

		__m256d m0 = _mm256_mul_pd(m[0], v0);
		__m256d m1 = _mm256_mul_pd(m[1], v1);
		__m256d m2 = _mm256_mul_pd(m[2], v2);
		__m256d m3 = _mm256_mul_pd(m[3], v3);

		__m256d a0 = _mm256_add_pd(m0, m1);
		__m256d a1 = _mm256_add_pd(m2, m3);
		__m256d a2 = _mm256_add_pd(a0, a1);

		s = _mm256_add_pd(s, a2);
	}

	double t[4];
	_mm256_store_pd(t, s);
	std::cout << t[0] << " " << t[1] << " " << t[2] << " " << t[3] << std::endl;
}

int main()
{
	std::vector<std::pair<std::string, std::function<void ()>>> benches;
	benches.push_back(std::make_pair("GLM", RunBench_GLM));
	benches.push_back(std::make_pair("GLM_SIMD", RunBench_GLM_SIMD));
	benches.push_back(std::make_pair("Double_GLM", RunBench_Double_GLM));
	benches.push_back(std::make_pair("Double_AVX", RunBench_Double_AVX));
	auto startInitial = ch::high_resolution_clock::now();
        
        for (int i=0;i<500000;i++){
          asm("NOP");
        }
        
        auto endInitial = ch::high_resolution_clock::now();

	double elapsedInitial = (double)ch::duration_cast<ch::milliseconds>(endInitial - startInitial).count() ;
	std::cout << "resolution :" <<elapsedInitial <<std::endl;
	for (auto& bench : benches)
	{
		std::cout << "Begin [ " << bench.first << " ]" << std::endl;

		auto start = ch::high_resolution_clock::now();
		bench.second();
		auto end = ch::high_resolution_clock::now();	

		double elapsed = (double)ch::duration_cast<ch::milliseconds>(end - start).count() / 1000.0;
		std::cout << "End [ " << bench.first << " ] : " << elapsed << " seconds" << std::endl;
	}
	
	std::cin.get();
	return 0;
}