/*
* $Id: timing.c,v 1.8 2003/02/09 15:28:07 rgb Exp $
*
* See copyright in copyright.h and the accompanying file COPYING
*
*/

/*
 *========================================================================
 * timing and utility sources.  tv_start and tv_stop are globals.
 *========================================================================
 */

#include "cpu_rate.h"

/*
 * The following to programs constitute a NONportable, but very accurate,
 * intel/amd timer for pentia and athlons (tsc flag in /proc/cpuinfo).
 * The init call basically extracts the CPU clock from /proc/cpuinfo
 * and converts it into nanoseconds/cpu clock cycle.  The second reads
 * the cpu cycle counter directly and converts it into nanoseconds.
 *
 * To avoid potential problems with loss of precision on the interval
 * subtraction, this program stores the register contents on the
 * first call and only counts the nanoseconds from the first call, not
 * since the last boot.
 *
 * It is quite fast (order of 40-50 nsec).
 */

double init_nanotimer()
{

 int i,numfields;
 char statbuf[K];
 char delim[2],*nextval;
 FILE *cpuinfo_fd;
 double nsec_per_cycle = 0.0;

#ifdef NTDEF
 delim[0] = ':';                /* separator */
 delim[1] = (char) NULL;        /* string terminator */
 cpuinfo_fd = fopen("/proc/cpuinfo","r");
 while(-1){

   /* Normal EOF causes break from while loop */
   if((fgets(statbuf,K,cpuinfo_fd) == NULL)) break;

   if(strncmp(statbuf,"cpu MHz",7) == 0) {
     nextval = strtok(statbuf,delim);       /* first field skip */
     nextval = strtok((char *)NULL,delim);  /* second field is it */
     nsec_per_cycle = 1000.0/atof(nextval);
     break;
   }
 }

 fclose(cpuinfo_fd);

 if(nsec_per_cycle == 0.0){
   fprintf(stderr,"Error: Cannot parse out the cpu MHz from /proc/cpuinfo.\n");
   exit(0);
 }

#endif
 return(nsec_per_cycle);

}

static double nsec_per_cycle = 0.0;
static unsigned long ax_first,dx_first;
static unsigned long long count_first;

double nanotimer()
{

#ifdef NTDEF
 unsigned long ax, dx;
 unsigned long long count;
 double nanotime;

 if(nsec_per_cycle == 0.0) {
   nsec_per_cycle = init_nanotimer();
   /*
    * We subtract off the time base to ensure that times of
    * order seconds (and then some) have resolvable differences
    * in double precision.
    */
   asm volatile("rdtsc" : "=a" (ax_first), "=d" (dx_first));
   count_first = dx_first;
   count_first = count_first<<32;
   count_first += ax_first;
 }
 
 asm volatile("rdtsc" : "=a" (ax), "=d" (dx));
 count = dx;
 count = count<<32;
 count += ax;
 count -= count_first;
 nanotime = (double) count;
 nanotime *= nsec_per_cycle;
 return(nanotime);
#endif
}

/*
 * This is a portable nanosecond timer.  It uses gettimeofday (wall clock
 * time) with the time of the first call subtracted off to keep intervals
 * from horribly overflowing the double with irrelevant numbers (causing
 * a loss of precision).  Note that my direct measurements show that
 * gettimeofday() itself takes about 2 usec to complete.
 */

static struct timeval tv_first;

double gettimeofday_nanotimer()
{

 struct timeval tv_now;
 double nanotime;

 /*
  * This avoids potential precision problems by computing the starting
  * time as of the first call, and measuring all subsequent times
  * relative to this.  Gets rid of a LOT of seconds.
  */
 if((tv_first.tv_sec == 0) && (tv_first.tv_usec == 0)){
    gettimeofday(&tv_first, (struct timezone *) NULL);
 }
 
 gettimeofday(&tv_now, (struct timezone *) NULL);
 nanotime = (double)(tv_now.tv_sec - tv_first.tv_sec);
 nanotime += 1.0e-6*(double)(tv_now.tv_usec - tv_first.tv_usec);

 /* return nanoseconds */	  
 nanotime *= 1.e+9;
 return(nanotime);
 
}

/*
 * This from Patrick Reynolds on dulug.  It uses the cycle timing
 * counter on all Pentia and Athlon CPUs.  It is adapted above into
 * nanotimer().
 */
unsigned long long gettsc(void)
{
 unsigned long ax, dx;
 asm volatile("rdtsc" : "=a" (ax), "=d" (dx));
 return ((unsigned long long)dx << 32) + ax;
}

#ifdef NTDEF
/*
 * Here's a version that's quicker and timing-stable BUT BREAKS IF INLINED:
 */
unsigned long long fast_gettsc(void) {
 asm volatile("rdtsc");
}

#endif
