/*
 * $Id: stream_add.c,v 1.12 2005/03/10 15:45:10 rgb Exp $
 * See copyright in copyright.h and the accompanying file COPYING
 */

#include "benchmaster.h"

 /*
  *==================================================================
  * This is the "add" test from the stream suite.  It is not
  * directly comparable to stream results for a variety of reasons.
  * For one, it uses malloc to allocate all vectors and consequently
  * adds one more degree of indirection to address resolution.  It
  * returns results that are SLIGHTLY LOWER/SLOWER than regular
  * stream (but which may be more realistic for general purpose code).
  *
  * It also uses a different timing harness, one that is both
  * more accurate (uses a superior timer) and which repeats the
  * computation many times to obtain both a mean and a standard
  * deviation on the test results.
  *==================================================================
  */

void stream_add_init(Test *mytest){

 int i;

 mytest->vector = 1;
 mytest->ranok = 0;
 mytest->alloc = stream_add_alloc;
 mytest->free = stream_add_free;
 mytest->test = stream_add_test;
 mytest->results = stream_add_results;
 snprintf(mytest->name,K,"stream add");
 snprintf(mytest->about,K,"d[i] = a[i] + b[i]  (standard is -s 1000000 -i 1 -n 10)");

 if(verbose == VERBOSE || verbose == V_INIT){
   printf("# Init for test %s\n",mytest->name);
 }

}

void stream_add_alloc()
{

 int i;

 /*
  * Allocate vector(s) to be tested with and initialize it and all
  * associated test-specific variables.
  */
 a = (double *) malloc((size_t) (3*size*sizeof(double)));
 b = &a[size];
 d = &a[2*size];
 stride = 1; /* override command line for stream */

 /* xtest is set from the command line, default PI */
 /* Initialize the vectors */
 for(i=0;i<size;i++){
   a[i] = xtest;
   b[i] = xtest;
   d[i] = 0.0;
 }

}

void stream_add_free()
{

 int i;

 /*
  * Free all the memory we just allocated, to be neat and clean and
  * all that.
  */
 free(a);

}

int stream_add_test(int full_flag)
{

 int i;
 
 if(full_flag){
   for(i=0;i<size;i+=stride){
     d[i] = a[i] + b[i];
   }
 } else {
   return(full_flag);
 }
}

void stream_add_results(Test *mytest)
{

 double nanotime_norm;

 /*
  * This is the number of copy operations in the core loop.  We adjust the
  * test normalization so it is the SAME as that of stream, which computes
  * the rate as "megabytes/seconds": 1.0e-6*2*sizeof(double)*nsize/time
  * (in seconds).  We measure nanoseconds, so ours is just a teeny bit
  * different.
  */
 nanotime_norm = (double)size/stride;

 mytest->avg_time = fabs(mytest->avg_time_full - mytest->avg_time_empty)/nanotime_norm;
 mytest->sigma = (mytest->sigma_time_full + mytest->sigma_time_empty)/nanotime_norm;
 mytest->min_time = fabs(mytest->min_time - mytest->avg_time_empty)/nanotime_norm;
 mytest->max_time = fabs(mytest->max_time - mytest->avg_time_empty)/nanotime_norm;
 mytest->avg_megarate = 1000.0*3*sizeof(double)/mytest->avg_time;
 mytest->max_megarate = 1000.0*3*sizeof(double)/mytest->min_time;
 mytest->min_megarate = 1000.0*3*sizeof(double)/mytest->max_time;

 show_results(mytest);

}

