#!/usr/bin/perl
#========================================================================
#
# $Id: amazon_sales_rank.pl 2 2001-02-15 14:26:25Z rgb $
#
# This is a GPL v2.0 suite of tools.
#
# See accompanying copyright and the file COPYING
#
#========================================================================

 use Getopt::Std;	# Single character options are plenty.
 use GD::Graph::lines;  # Build a straight numerical graph in jpeg or png

#========================================================================
# We parse the CL with getopt standard (single character)
#========================================================================
# Option list.  First set the defaults.
 $verbose = 1;

 #=======================================================================
 # The tool is free and open source, but if you ever read the source you
 # get to see this one advertisement.  Sorry, but I'm writing the tool
 # to USE it myself (of course:-) and this is what I'm using it on.
 #
 # The default display is MY book:
 #
 #           The Book of Lilith, by Robert G. Brown
 #
 # It is available through Amazon.  It is, in simple fact, a really,
 # really good book.  Read the reviews.  If you buy it, you won't regret
 # it.  In fact, you might really enjoy it and learn something, as it is
 # a book that will make you laugh and think.  Regardless, by buying it 
 # you will more than compensate me for the use of this toolset (noting 
 # that people will rent you the equivalent functionality for $6/month
 # or more).  If you visit the book website at:
 #
 #   http://www.phy.duke.edu/~rgb/Lilith/Lilith.php
 #
 # you will find links to other sales venues as well as alternative
 # (and cheaper) e-book formats such as straight PDF, Sony format e-book,
 # html format e-book and (through Amazon) Kindle format e-book.
 #=======================================================================

 $isbn = 1430322454;   # The Book of Lilith, replace with your isbn or not

 #=======================================================================
 # The default is to loop and get the rank every 60 minutes.  AFAICT,
 # the sales rank updates roughly hourly (this tool will in fact tell
 # me EXACTLY what the update interval is if I run it on a finer
 # granularity).  It forms a timestamp of the retrieval time and then
 # prints out a simple table of timestamped rankings.  It checks the
 # rank delta -- a DROP in sales rank signals at least one sale in the
 # previous sample interval.  There is no way to count how many
 # although eventually experience may permit one to find the 2+ book
 # "barrier" that indicates a probable sale of two or more.  Please
 # do NOT try hammering your Amazon site every minute as the rank is
 # NOT UPDATED IN REAL TIME and there is NO POINT in abusing Amazon's
 # servers in that way.
 #=======================================================================

 $loop_minutes = 60;

 #=======================================================================
 # Default output format is t(able).  One can select straight r(ank),
 # g(raph), h(tml table).  graph produces a running rank_$isbn.png that
 # one can embed in a web page.
 #=======================================================================
 $output_format = "t";

 getopts('hi:m:o:v:');
 #=======================================================================
 # Assignments
 #=======================================================================
 if($opt_h) {Usage();exit;}
 if($opt_i) {$isbn = $opt_i;}
 if($opt_m) {$loop_minutes = $opt_m;}
 # if($opt_o) {$output_format = $opt_o;}
 if($opt_v) {$verbose = $opt_v;}

 #=======================================================================
 # If leftovers, punt with Usage()
 #=======================================================================
 $ARGC = @ARGV;
 if($ARGC) {
   Usage("Incorrect number or type of arguments");
   exit;
 }

 #=======================================================================
 # One last time, don't go under 60 unless you are just experimenting.
 #=======================================================================
 if($loop_minutes < 60){
   print STDERR "Warning!  Hammering Amazon's servers is likely to get you\n";
   print STDERR "blacklisted!  The default loop time matches their rank update\n";
   print STDERR "interval (as I determined by hammering their servers FOR you:-)\n";
   print STDERR "and there is usually no point in reducing it, although you may\n";
   print STDERR "well want to increase it.\n";
 }

 #=======================================================================
 # OK.  We are going to ALWAYS open a file called:
 #      amazon_sales_rank_table.$isbn
 # if it exists, for read and append.  First we will read it and
 # try to determine total hours we've had at least one sale in.
 # Then we'll (resume) adding to the table therein, forever.
 #=======================================================================
 $asr_table = "amazon_sales_rank_table.$isbn";
 get_local_time();

 # Read in all data to date to plot.
 $i=0;
 my @graph_xy = ();
 open(ASRT,"<$asr_table");
 $total_sales = 0;
 $last_rank = 0;
 $first_day_of_month = -1;
 while(<ASRT>){
   chomp;
   if(!/^#/){
     my @row = split;
     $total_sales = $row[0];
     $last_rank=$row[1];
     $file_isbn=$row[2];
     $rank_date=$row[3];
     $rank_time=$row[4];
     $_ = $rank_date;
     my @row = split('/');
     $rank_mon = $row[0];
     $rank_day = $row[1];
     $rank_yr  = $row[2];
     # print "rank date: $rank_day $rank_mon $rank_yr\n";
     $_ = $rank_time;
     my @row = split(':');
     $rank_hour = $row[0];
     $rank_min = $row[1];
     $rank_sec  = $row[2];
     # print "rank time: $rank_hour $rank_min  $rank_sec\n";
     if($first_day_of_month == -1){
       $first_day_of_month = $rank_day;
       $first_hour = $rank_hour;
       $current_month = $rank_month;
       $current_year = $rank_yr;
     }
     # Compute current hour from this offset.
     $g_day = $rank_day - $first_day_of_month;
     $g_hour = $rank_hour - $first_hour;
     $g_time = 24*$g_day + $g_hour;
     # print "($g_time,$last_rank)\n";
     unshift @{$graph_xy[0]},$g_time;
     unshift @{$graph_xy[1]},$last_rank;
   }
 }

 close(ASRT);

 open(ASRT,">>$asr_table") or die "Can't reopen $asr_table for append\n";
 printf ASRT ("#==================================================================\n");
 printf ASRT ("# Starting run at %02d:%02d:%02d on %02d/%02d/%04d\n",$hour,$min,$sec,$mon,$mday,$year);
 printf ASRT ("# Sale-hours       Rank        ISBN        Date     Time\n");
 printf ASRT ("#==================================================================\n");

 printf ("#==================================================================\n");
 printf ("# Starting run at %02d:%02d:%02d on %02d/%02d/%04d\n",$hour,$min,$sec,$mon,$mday,$year);
 printf ("# Sale-hours       Rank        ISBN        Date     Time\n");
 printf ("#==================================================================\n");

 #=======================================================================
 # Loop forever.
 #=======================================================================
 # Debugging appearance only, dummy call.
 # plot_rank(\@graph_xy);
 # exit;
 while(1){
   #=======================================================================
   # Timestamp the retrieval
   #=======================================================================
   get_local_time();

   #=======================================================================
   # Get the rank.  The function below is probably semi-portable.
   #=======================================================================
   $rank = get_rank($isbn);

   #=======================================================================
   # if rank dropped by at least 5000, we are certain of at least one sale
   # in the hour or other interval.  We might want to be able to override
   # this, but for the moment we won't bother.  Note that empirically,
   # sales rank CAN drop just a tiny bit (order a hundred) during the wee
   # hours of the morning when presumably nothing much is selling, so we
   # need the discriminator.
   #=======================================================================
   if(($rank+5000) < $last_rank) {
     $total_sales++;
   }
   $last_rank = $rank;   # Save the new rank for next time

   #=======================================================================
   # table output.  Feel free to rearrange columns or add your own.
   # I'm hoping I can figure out how to feed this into one of the nifty
   # html graph tools stolen from e.g. webalyzer to get a nice sales rank
   # bar graph at various year/month/day granularities out of this.
   #=======================================================================
   printf ASRT ("%7d          %7d %14s %02d/%02d/%04d %02d:%02d:%02d\n",$total_sales,$rank,$isbn,$mon,$mday,$year,$hour,$min,$sec);
   flush ASRT;

   printf ("%7d          %7d %14s %02d/%02d/%04d %02d:%02d:%02d\n",$total_sales,$rank,$isbn,$mon,$mday,$year,$hour,$min,$sec);
   flush stdout;

   #=======================================================================
   # graph -- we ALWAYS build the graph and the table, not as an option.
   # we might as well just echo the table onto the command line as well.
   #=======================================================================
   # Add this data to the data added above from the old file.
   # Eventually we're going to want to use "$hour" as the x coordinate
   # in some way that respects the modulus of the epoch parsed out
   # of the current table.
   #=======================================================================
   if($first_day_of_month == -1){
     $first_day_of_month = $day;
     $first_hour = $hour;
     $current_month = $month;
     $current_year = $year;
   }
   # Compute current hour from this offset.
   $g_day = $mday - $first_day_of_month;
   $g_hour = $hour - $first_hour;
   $g_time = 24*$g_day + $g_hour;
   print "($g_time,$last_rank)\n";
   unshift @{$graph_xy[0]},$g_time;
   unshift @{$graph_xy[1]},$rank;
   # print "Calling plot rank with xy_data\n";
   plot_rank(\@graph_xy);

   #=======================================================================
   # For the umptieth time, do not reduce this below 60 minutes 3600
   # seconds for both practical and ethical reasons.  Tracking sales and
   # rank is legitimate business; overloading amazon's servers by
   # hammering your book's page as fast as the tool can download it is just
   # dumb, especially when SALES RANK ONLY UPDATES ONCE AN HOUR!
   #=======================================================================
   sleep($loop_minutes*60);

 }

sub get_rank {

 my $isbn = shift(@_);
 my $link = "http://www.amazon.com/gp/product/$isbn";
 my $book_page_html = `links -source $link | grep "Sales Rank"`;
 my $rank;

 # This isn't necessarily the best way to do this -- I shouldn't
 # need to use grep, for example, but I'm too lazy to dig out how to
 # process multiple lines in a single variable although I've done it before
 # and I have code where I've done it around somewhere.
 #

 # The one line that contains the sales rank
 $_ = $book_page_html;

 # Split into terms
 @terms = split;
 foreach (@terms){

   # Find the one that contains the rank
   if(/^#/){

     $rank = $_;

     # Get rid of the # and , in the rank (if any)
     $rank =~ s/#//;
     $rank =~ s/,//;

     # $rank is now rank!

   }
 }
 return $rank;

}

sub plot_rank {

 # These are REFERENCES to the array.
 my $xy_data = shift;

 # print "In plot_rank.\n";

 #=======================================================================
 # Create a bar chart (for starters) of sales rank
 #=======================================================================
 my $g = new GD::Graph::lines(900,500);
 # my @test = ();
 # for($i=0;$i<100;$i++){
 #   unshift @{$test[0]}, $i;
 #   unshift @{$test[1]}, sin($i*1.0/10.0);
 # }
 
 # use GD::Graph::colour;
 # @list = GD::Graph::colour::colour_list(110);
 # print "@list\n";
 $g->set_title_font(GD::Font->Giant);
 $g->set_legend_font(GD::Font->Giant);
 $g->set(
        'title' => 'Amazon Sales Rank',

    
	bgclr => 'white',
	fgclr => 'black',
	dclrs => [qw(black red blue green)],
	boxclr => 'lgray',
	x_label => 'Time',
	x_label_position => 0,
	x_labels_vertical => 0,
	y_label => 'Rank',
	y_min_value => 0,
	y_max_value => 600000,
	y_label_skip => 100000,
	# y_max_value => 1.2,
	# y_label_skip => 0.2,
	y_tick_number => 6,
	x_min_value => 0,
	x_max_value => 110,
	x_label_skip => 1,
	x_tick_number => 11,
	box_axis => 1,
	# l_margin => 10,
	r_margin => 20,
	t_margin => 10,
	b_margin => 10,
	line_width => 3,
	transparent => 0 );


 $g->set_legend("ISBN: $isbn");
 open(OUT, ">rank_$isbn.png") or die "Cannot open rank_$isbn.png for write: $!";
 print OUT $g->plot(\@$xy_data)->png;
 # print OUT $g->plot(\@test)->png;
 binmode OUT;
 close OUT;

}

sub get_local_time(){
 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
 $year += 1900;   # Have to be Y2K compliant...
 $mon++;          # It is one low to be an index into names.
}

 exit;
