/*
 * $Id: stream_copy.c,v 1.13 2005/04/27 21:25:01 rgb Exp $
 * See copyright in copyright.h and the accompanying file COPYING
 */

#include "benchmaster.h"

 /*
  *==================================================================
  * This is the "copy" test from the stream suite.  It is not
  * directly comparable to stream results for a variety of reasons.
  * For one, it uses malloc to allocate all vectors and consequently
  * adds one more degree of indirection to address resolution.  It
  * returns results that are SLIGHTLY LOWER/SLOWER than regular
  * stream (but which may be more realistic for general purpose code).
  *
  * It also uses a different timing harness, one that is both
  * more accurate (uses a superior timer) and which repeats the
  * computation many times to obtain both a mean and a standard
  * deviation on the test results.
  *==================================================================
  */


void stream_copy_init(Test *mytest){

 int i;

 mytest->vector = 1;
 mytest->ranok = 0;
 mytest->alloc = stream_copy_alloc;
 mytest->free = stream_copy_free;
 mytest->test = stream_copy_test;
 mytest->results = stream_copy_results;
 snprintf(mytest->name,K,"stream copy");
 snprintf(mytest->about,K,"d[i] = a[i] (standard is -s 1000000 -i 1 -n 10)");

 if(verbose == VERBOSE || verbose == V_INIT){
   printf("# Init for test %s\n",mytest->name);
 }


}

void stream_copy_alloc()
{

 int i;

 /*
  * Allocate vector(s) to be tested with and initialize it and all
  * associated test-specific variables.  Note that in order to be
  * JUST like stream, we have to allocate one big block and then
  * split it with pointers to assure contiguity.  Note also that
  * stream ignores stride, so stride should be set back to 1 (ignoring
  * whatever was passed.  This is because using i+=stride instead
  * of i++ in the loop increases the runtime by OVER TEN PERCENT!
  */

 if(verbose == VERBOSE || verbose == V_INIT){
   printf("# stream_copy_alloc() entry\n");
 }


 a = (double *) malloc((size_t) (2*size*sizeof(double)));
 d = &a[size];
 stride = 1; /* override command line for stream */

 /*
  * Initialize the vectors. xtest is set from the command line, default PI.
  */
 for(i=0;i<size;i++){
   a[i] = xtest;
   d[i] = 0.0;
 }

 if(verbose == VERBOSE || verbose == V_INIT){
   printf("# stream_copy_alloc() exit.\n");
 }

}

void stream_copy_free()
{

 int i;

 /*
  * Free all the memory we just allocated, to be neat and clean and
  * all that.
  */
 free(a);

}

int stream_copy_test(int full_flag)
{

 int i;

 if(full_flag){
   for(i=0;i<size;i++){
     d[i] = a[i];
   }
   return(full_flag);
 } else {
   return(full_flag);
 }

}

void stream_copy_results(Test *mytest)
{

 double nanotime_norm;

 /*
  * This is the number of copy operations in the core loop.  We adjust the
  * test normalization so it is the SAME as that of stream, which computes
  * the rate as "megabytes/seconds": 1.0e-6*2*sizeof(double)*nsize/time
  * (in seconds).  We measure nanoseconds, so ours is just a teeny bit
  * different.
  */
 nanotime_norm = (double)size/stride;

 mytest->avg_time = fabs(mytest->avg_time_full - mytest->avg_time_empty)/nanotime_norm;
 mytest->sigma = (mytest->sigma_time_full + mytest->sigma_time_empty)/nanotime_norm;
 mytest->min_time = fabs(mytest->min_time - mytest->avg_time_empty)/nanotime_norm;
 mytest->max_time = fabs(mytest->max_time - mytest->avg_time_empty)/nanotime_norm;
 mytest->avg_megarate = 1000.0*2*sizeof(double)/mytest->avg_time;
 mytest->max_megarate = 1000.0*2*sizeof(double)/mytest->min_time;
 mytest->min_megarate = 1000.0*2*sizeof(double)/mytest->max_time;

 show_results(mytest);

}

