#!/usr/bin/perl -w
# plot-cpu_rate
#
# Copyright (c) 2000 Robert G. Brown <rgb@phy.duke.edu>
# Licensed according to GPL v2b (see file "Copying" in this distribution).
#
# $Id: log-plot-cpu-rate,v 1.4 2002/11/21 21:04:03 rgb Exp $
#

 use Getopt::Std;	# Single character options are plenty.

#========================================================================
# Set defaults for variables
# We'll assume that 100 runs are enough for decent statistics in general
#========================================================================
 $hostname = `hostname`;
 chop($hostname);
 $samples = 10;
 $begin_size = 1;	# Remember that double precision is 2^3 = 8 bytes
 $log2_max = 23;	# 2^{23+3} is 32 MB, corrected for double and single.
 $floatflag = 0;	# This probably doesn't do anything.  Love the "probably".
 $verbose = 0;
 $statsonly = 0;
 $testid = 0;
 $outfile = "";

#========================================================================
# Now we parse the CL with getopt standard (single character)
#========================================================================
# Option list
 getopts('b:dfhl:n:s:t:v:');

# Assignments
 if($opt_b) {$begin_size = $opt_b;}
 if($opt_l) {$log2_max = $opt_l;}
 if($opt_h) {$opt_h=0;Usage(); exit(1);}
 if($opt_n) {$samples = $opt_n;}
 if($opt_s) {$statsonly = 1;$hostname = $opt_s;}
 if($opt_t) {$testid = $opt_t;}
 if($opt_v) {$verbose = $opt_v;}

 if($floatflag) { $log2_max++; }	# floats are only four bytes, need one more.

# If leftovers, punt with error message and Usage()
 $ARGC = @ARGV;
 if($ARGC) {
   Usage("Arguments left over -- incorrect number or type of arguments");
   exit(1);
 }

 $end_size = 2**($log2_max + $floatflag);
 if($verbose) {
   print<<EOF;
log-plot-cpu_rate beginning at $begin_size, ending at $end_size, increment
in powers of two (plus odd neighbor), accumulating $samples samples.
EOF
 }

 if(!$statsonly){
   # Start clean.  It has to occur INSIDE the not-stats-only
   # conditional or the samples will be erased before reaching
   # the stats part.
   $outfile = "results/$hostname"."_$testid";
   foreach $file (<$outfile.*>) {
     unlink($file);
   }

   # Now run the benchmark suite, one at a time, $sample times.
   for($n=1;$n<=$samples;$n++){
     $outfile = "results/$hostname"."_$testid".".$n";
     # float rate
     $size = $begin_size;
     system("touch $outfile");
     while($size < $end_size){
       print "executing cpu_rate -t $testid -n 10 -s $size -q >> $outfile\n";
       $rateok = system("cpu_rate -t $testid -n 10 -s $size -q >> $outfile");
       # if($rateok){
       #  print STDERR "Error:  cpu_rate is not on path or cpu_rate not functional.\n";
       #  print STDERR "  Add directory with cpu_rate binary to path.\n";
       #  print STDERR "  msg: $rateok\n";
       #  exit(0);
       # }
       $size_odd = $size + 5;
       print "executing cpu_rate -t $testid -n 10 -s $size_odd -q >> $outfile\n";
       $rateok = system("cpu_rate -t $testid -n 10 -s $size_odd -q >> $outfile");
       # double the size to build a log curve in powers of 2 x (4 or 8)
       $size *= 2;
     }
   }
 }

# The following is basically lmbench statsummary, except that I'm
# adding stuff to be merged back into it for handing e.g. configuration
# parsing.  The other change is that this will do stats for JUST THIS
# HOST.  The idea is that one runs "plot-cpu_rate" ONCE on a host and
# come back in an hour and have "perfect" results.  Eventually we'll do
# the same for "run-lm-bench".
$numruns = 0;
$outfile = "results/$hostname"."_$testid";
foreach $file (<$outfile.*>) {
  if($verbose){
    print "Processing $file.\n";
  }
  open(FD, $file) || die "$0: can't open $file";
  # Strip off the path
  $file =~ s/(.*)\///;
  # Split the filename from the number.
  $filecount = "";
  ($file,$filecount) = split(/\./,$file);
  # Debugging...
  # print STDERR "Found file $file with count $filecount\n";
  push(@file, $file);
  push(@filecount, $filecount);

  # This should just push UNIQUE new hosts onto @hosts.
  $numhosts = @hosts;
  if($numhosts){ 
    $lasthost = $hosts[$numhosts-1];
  } else {
    $lasthost = "";
  }
  if($lasthost !~ /$file/){
    push(@hosts, $file);
  }

  # Read in the results file(s) and push parsed data onto the appropriate
  # arrays.  Note that we get the data into a form we can average by
  # cleverly pushing it onto a hash of results arrays.
  while (<FD>) {
    chop;
    @_ = split;
    $sizebytes = $_[0];
    $mflops = $_[2];
    push(@{$bogomflops{$sizebytes}},$mflops);
    $tmp = $bogomflops{$sizebytes}[0];	# Just to shut up the error parser
    # print STDERR "bogomflops{$sizebytes}: $tmp\n";
  }
  $numruns++;
}

#
# OK, now all those arrays are packed.  Because everything is keyed
# on raw hostname, we can do all the stats evaluations using a combination
# of @file and the array -- we march through @file and create a stats
# object (a % hash) with its name and do the obvious sums and so forth.
# should be very simple.
#
# However, to be fair to Larry, we do want to preserve the general flavor
# of the summary.  However, the summary is now going to be output BY HOST
# and so we need a separate host-description section for each host.
#
# First we have to evaluate the stats, though.
#

#
# Fine, that seems to work.  Now we break up the summary, BY HOST.
# For each host we print just ONE TIME key values that don't really
# vary (like its architecture information and clock).  Then we print
# out a modified version of Larry's old summary.
#

#
# First the header
#
 print<<EOF;
#========================================================================
# 
#                    CPU-RATE 0.0.3 PLOT DATA
#
#========================================================================
EOF

 #
 # We now need to run parseconfig($host).  This fills a global hash with
 # host information that we might need in our report.
 # 
 parseconfig($hostname);

 # Obviously we need a better way to fill in this information.
 # Linux provides /proc/cpuinfo, which is just perfect and trivial
 # to parse.  However, we should probably read this in from e.g.
 # config/$host.conf, which can be created either automagically or
 # by hand.  This file should also be used to control the running
 # of the benchmark suite, which in turn should be done by means of
 # a script call, not a make target.  I'm getting there...
 #
 # Oh, one last note.  It would be VERY CONVENIENT to have the config
 # information stored in perl.  So convenient that the following should
 # BE the format of the config file... (up to the next comment)
 #
 # OK, given this wealth of detail (which can be sourced directly into
 # the perl script from the host config file if we are clever) we now
 # print it into the report/summary.
 #
 printf("# HOSTNAME:\t\t$HOSTNAME\n");
 printf("# CPU Family:\t\t$CPUFAMILY\n");
 printf("# Motherboard:\t\t$MOTHERBOARD\n");
 printf("# OS Kernel:\t\t$SYSTEM $KERNEL\n");
 for($i=0;$i<$NUMCPUS;$i++){
   printf("# CPU $i Type:\t\t$CPUTYPE[$i]\n");
   printf("# CPU $i MHz:\t\t$CPUMHZ[$i]\n");
   printf("# CPU $i L1 Cache Size:\t$CPUL1CODE[$i] (code)/$CPUL1DATA[$i] (data)\n");
   printf("# CPU $i L2 Cache Size:\t$CPUL2SIZE[$i] \n");
 }
 printf("# Memory:\t\t\t$MEMTOTAL of $MEMCLOCK $MEMTYPE\n");
 printf("# Disk(s):\n");
 $numdisks = @DISK;
 for($j=0;$j<$numdisks;$j++){
   printf("# \t%d) %s: %s\n",$j+1,$DISK[$j],$DISKTYPE[$j]);
 }
 printf("# Network(s):\n");
 $numnets = @NETWORK;
 for($j=0;$j<$numnets;$j++){
   printf("# \t%d) %s: %s\n",$j+1,$NETWORK[$j],$NICTYPE[$j]);
   printf("# \t   on a %s switch/hub\n",$NETHUB[$j]);
 }
 printf("# System Notes:\n");
 foreach $line (@NOTES){
   printf("# $line\n");
 }
    

#
# PROCESSOR TIMES AND RATES
#
  print<<EOF;
# ========================================================================
EOF
print "# size(bytes) 10^6 ops/sec Error\n";

# 
# Generate statistics for these
 foreach $size (sort{$a <=> $b} keys %bogomflops){
   makestats($size,$bogomflops{$size});
   printf("%10d   %8.2f   %8.3f\n",$size,$stats{$bogomflops{$size}}{mean},$stats{$bogomflops{$size}}{stddev});
 }

exit 0;

sub makestats
{

 my $cnt=0;
 my $size = shift;
 my $array = shift;
 # Debugging
 # print STDERR "Ready to make stats for array $array at $size\n";
 # Zero the counters
 my $host;
 $numhosts = @hosts;
 for($i=0;$i<$numhosts;$i++){
   $host = $hosts[$i];
   $stats{$array}{mean} = 0.0;
   $stats{$array}{stddev} = 0.0;
   $stats{$array}{count} = 0;
 }
 # Loop through ALL DATA.  We use the hash to direct results to
 # to the appropriate counters.
 foreach $value (@$array){
   $host = $file[$cnt];
   if($$array[0] == -1){
     $stats{$array}{mean} = -1;
     $stats{$array}{stddev} = -1;
     # Debugging (and curiosity)
     print STDERR "Oops.  $array is empty.\n";
     return;
   }    
   # Debugging
   # print STDERR "$host/$array ($cnt): value is $value\n";
   $stats{$array}{mean} += $value;
   $stats{$array}{stddev} += $value*$value;
   $stats{$array}{count}++;
   $cnt++;
 }
 for($i=0;$i<$numhosts;$i++){
   $host = $hosts[$i];
   $cnt = $stats{$array}{count};
   # Debugging Only
   # print STDERR "Evaluating final mean/stddev of $cnt objects in $host/$array\n";
   if($cnt>1) {
     $stats{$array}{mean} = $stats{$array}{mean} / $cnt;
     $stats{$array}{stddev} = sqrt(($stats{$array}{stddev} / $cnt 
          - $stats{$array}{mean}*$stats{$array}{mean})/($cnt-1));
   } elsif($cnt == 1) {
     # Wish one could assign "infinity".  This probably breaks somewhere.
     $stats{$array}{stddev} = 1.0e+1000;
   } else {
     # print STDERR "Error:  Cannot average 0 $array results on $host\n";
   }

   # Debugging Only.
   # print STDERR "$host/$array (average): $stats{$array}{mean} +/- $stats{$array}{stddev}\n";
 }

}

sub Usage {

 my $message = shift;
 if($message) {print STDERR "Error: $message\n";}
 print STDERR "
Usage:
  log-plot-cpu_rate [-t test] [-b begin_size] [-l log2_max]
                    [-h] [-n samples] [-s hostname] [-v]
where
  -t testnumber selects benchmark to be run:
     0 is idiot = dummy (almost) empty loop
     1 is bogomflops d[i] = (d[i] + ad)*(bd - d[i])/d[i]
     2 is copy d[i] = a[i]
     3 is scale d[i] = xtest*d[i]
     4 is add d[i] = a[i] + b[i]
     5 is triad d[i] = a[i] + xtest*b[i]
     6 is trid (average +-*) d[i] = (ad + bd - cd)*d[i]
     7 is memtest sequential ai[ai[i]] swap
     8 is memtest random ai[ai[i]] swap
     9 is savage: xtest = tan(atan(exp(log(sqrt(xtest*xtest)))))
  Any other values test the empty (timing) loop

  -b sets the beginning vector size (for vector tests, best a
     power of 2)
  -l sets the maximum power of 2 times this size to be tested
     (the vector size is doubled each pass for l passes)
  -n sets the number of independent samples to be evaluated at each size
  -s hostname causes the stats ONLY to be run on preexisting data files
     in the results subdirectory this program creates.  This allows the
     results of a run to be redisplayed without rerunning the whole test.

NOTE WELL!  This is beta software under development and various things may
not work correctly or at all.  This script in particular isn't fully
debugged and will probably do silly things for some values of -t, and the
stats routine is NOT smart enough to recognize which test it is resumming
(it will just crunch through the output files in results, which are replaced
by each new test).  USE CAUTION, and REPORT BUGS (preferrably with fixes:-)
to rgb\@phy.duke.edu.
\n";

 exit;
}

sub parseconfig {

  my $host = shift;
  my $hostread;
  # perl is a pain if something is handled indirectly (or rather, I
  # don't know the syntactically non-deprecated way to do so to shut it
  # up).  SO, I'm going to initialize each configuration entry here which
  # will BOTH shut it up AND actually do a good thing by letting a user
  # know what isn't set in config/host.
  $HOSTNAME = "Unknown";
  $MOTHERBOARD = "Unknown";
  $CPUFAMILY = "Unknown";
  $SYSTEM = "Unknown";
  $KERNEL = "Unknown";
  $NUMCPUS = 0;
  $CPUTYPE = ();
  $CPUMHZ = ();
  $CPUL1CODE = ();
  $CPUL1DATA = ();
  $CPUL2SIZE = ();
  $MEMTOTAL = "Unknown";
  $MEMCLOCK = "Unknown";
  $MEMTYPE = "Unknown";
  @DISK = ();
  @DISKTYPE = ();
  @NETWORK = ();
  @NICTYPE = ();
  @NETHUB = ();
  $NOTES = "This system is not yet initialized.  Run host-config.";
  open(FD, "config/$host-config") || die "$0: can't open config/$host-config";
  # Read in the results file(s) and push parsed data onto the appropriate
  # arrays.
  # Debugging
  # print STDERR "Opened config/$host-config for parsing.\n";
  while (<FD>) {
    chop;
    # print STDERR "Parsing $_\n";
    if (/^HOSTNAME:/) {
      @_ = split(/=/);
      $hostread = $_[1];
      if(/!$host/){
         print STDERR "Whoops!  $host isn't the same as $hostread!\n";
         exit(1);
      }
    }
    # Easiest to do the following via a list, I think...
    foreach $param (
      'HOSTNAME', 'NUMCPUS',
      'CPUFAMILY','SYSTEM','KERNEL','CPUTYPE','CPUVENDOR',
      'CPUMHZ','MOTHERBOARD','CPUL1CODE','CPUL1DATA','CPUL2SIZE',
      'MEMTOTAL','MEMCLOCK','MEMTYPE','DISK','DISKTYPE',
      'NETWORK','NICTYPE','NETHUB','NOTES',
    ){
      if (/^$param = \(/) {
        # Debugging
        # print STDERR "Loop found $param:\n";
        while (<FD>) {
          chop;
          if(/^\)/){
            last;
          } else {
            # Strip double quotes and trim leading whitespace
            $_ =~ s/"//g;
            $_ =~ s/^\s+//;
            push(@$param,$_);
          }
        }
      } elsif (/^$param =/) {
        @_ = split(/=/);
        $$param = $_[1];
        # Strip double quotes and leading whitespace
        $$param =~ s/"//g;
        $$param =~ s/^\s+//;
        $tmp = $$param;	# Just to shut up the error parser
        # Debugging
        # print STDERR "Loop found $param = $$param\n";
      }
    }
  # Debugging
  # print STDERR "Finished parsing config/$host.\n";
  }
}
