#!/usr/bin/perl
#========================================================================
#
# $Id: amazon_sales_rank.pl 2 2001-02-15 14:26:25Z rgb $
#
# This is a GPL v2.0 suite of tools.
#
# See accompanying copyright and the file COPYING
#
#========================================================================

 use Getopt::Std;	# Single character options are plenty.
 use GD::Graph::lines;  # Build a straight numerical graph in jpeg or png

#========================================================================
# We parse the CL with getopt standard (single character)
#========================================================================
# Option list.  First set the defaults.
 $verbose = 1;

 #=======================================================================
 # The tool is free and open source, but if you ever read the source you
 # get to see this one advertisement.  Sorry, but I'm writing the tool
 # to USE it myself (of course:-) and this is what I'm using it on.
 #
 # The default display is MY book:
 #
 #           The Book of Lilith, by Robert G. Brown
 #
 # It is available through Amazon.  It is, in simple fact, a really,
 # really good book.  Read the reviews.  If you buy it, you won't regret
 # it.  In fact, you might really enjoy it and learn something, as it is
 # a book that will make you laugh and think.  Regardless, by buying it 
 # you will more than compensate me for the use of this toolset (noting 
 # that people will rent you the equivalent functionality for $6/month
 # or more).  If you visit the book website at:
 #
 #   http://www.phy.duke.edu/~rgb/Lilith/Lilith.php
 #
 # you will find links to other sales venues as well as alternative
 # (and cheaper) e-book formats such as straight PDF, Sony format e-book,
 # html format e-book and (through Amazon) Kindle format e-book.
 #=======================================================================
 # Note that one enters the ASIN (Amazon Standard Identification Number) 
 # to use this tool.  For "real books" this is the ISBN-10.  For Kindle
 # and other non-book products (that might or might not have a
 # meaningful sales rank) is is the ASIN and will usually have a letter
 # prefix.
 #
 # You may modify the following line FOR YOUR OWN USE with the ASIN of
 # your own book (basically changing the default) but you may not
 # redistribute this tool with your book's ASIN in place or with my
 # Lilith advertisement removed.  Find your OWN way to promote your
 # book(s)...:-)
 #=======================================================================
 $asin = 1430322454;   # The Book of Lilith is the default.

 #=======================================================================
 # The default is to loop and get the rank every 60 minutes.  AFAICT,
 # the sales rank updates roughly hourly (this tool will in fact tell
 # me EXACTLY what the update interval is if I run it on a finer
 # granularity).  It forms a timestamp of the retrieval time and then
 # prints out a simple table of timestamped rankings.  It checks the
 # rank delta -- a DROP in sales rank signals at least one sale in the
 # previous sample interval.  There is no way to count how many
 # although eventually experience may permit one to find the 2+ book
 # "barrier" that indicates a probable sale of two or more.  Please
 # do NOT try hammering your Amazon site every minute as the rank is
 # NOT UPDATED IN REAL TIME and there is NO POINT in abusing Amazon's
 # servers in that way.
 #
 # Note that I empirically correct 3600 seconds down by roughly three
 # seconds as the time required to access and load the page.  This keeps
 # the tool's loop better synchronized with exactly 60 minutes.
 #=======================================================================
 $loop_minutes = 60;

 #=======================================================================
 # Default output format is t(able).  One can select straight r(ank),
 # g(raph), h(tml table).  graph produces a running rank_$asin.png that
 # one can embed in a web page.
 #=======================================================================
 $output_format = "t";

 getopts('a:hm:v:');
 #=======================================================================
 # Assignments
 #=======================================================================
 if($opt_a) {$asin = $opt_a;}
 if($opt_h) {Usage();exit;}
 if($opt_m) {$loop_minutes = $opt_m;}
 if($opt_v) {$verbose = $opt_v;}

 #=======================================================================
 # If leftovers, punt with Usage()
 #=======================================================================
 $ARGC = @ARGV;
 if($ARGC) {
   Usage("Incorrect number or type of arguments");
   exit;
 }

 #=======================================================================
 # One last time, don't go under 60 unless you are just experimenting.
 #=======================================================================
 if($loop_minutes < 60){
   print STDERR "Warning!  Hammering Amazon's servers is likely to get you\n";
   print STDERR "blacklisted!  The default loop time matches their rank update\n";
   print STDERR "interval (as I determined by hammering their servers FOR you:-)\n";
   print STDERR "and there is usually no point in reducing it, although you may\n";
   print STDERR "well want to increase it.\n";
 }

 #=======================================================================
 # OK.  We are going to ALWAYS open a file called:
 #      amazon_sales_rank_table.$asin
 # if it exists, for read and append.  First we will read it and
 # try to determine total hours we've had at least one sale in.
 # Then we'll (resume) adding to the table therein, forever.
 #=======================================================================
 $asr_table = "amazon_sales_rank_table.$asin";
 get_local_time();

 # Read in all data to date to plot.
 $i=0;
 my @graph_xy = ();
 open(ASRT,"<$asr_table");
 $total_sales = 0;
 $last_rank = 0;
 $first_day_of_month = -1;
 while(<ASRT>){
   chomp;
   if(!/^#/){
     my @row = split;
     $total_sales = $row[0];
     $rank=$row[1];
     $file_asin=$row[2];
     $rank_date=$row[3];
     $rank_time=$row[4];
     $_ = $rank_date;
     my @row = split('/');
     $rank_mon = $row[0];
     $rank_day = $row[1];
     $rank_yr  = $row[2];
     # print "rank date: $rank_day $rank_mon $rank_yr\n";
     $_ = $rank_time;
     my @row = split(':');
     $rank_hour = $row[0];
     $rank_min = $row[1];
     $rank_sec  = $row[2];
     # print "rank time: $rank_hour $rank_min  $rank_sec\n";
     # if($first_day_of_month == -1){
     #  $first_day_of_month = $rank_day;
     #  $first_hour = $rank_hour;
     #  $current_month = $rank_month;
     #  $current_year = $rank_yr;
     # }
     # Compute current hour from this offset.
     # $g_day = $rank_day - $first_day_of_month;
     # $g_hour = $rank_hour - $first_hour;
     # $g_time = 24*$g_day + $g_hour;

     #==================================================================
     # Compute $g_time as decimal fraction of days from beginning
     # of $rank_month.
     $g_day = $rank_day - 1;
     $g_hour = $rank_hour;
     $g_time = $g_day + $g_hour/24.0;
     # print "($g_time,$last_rank)\n";
     unshift @{$graph_xy[0]},$g_time;
     unshift @{$graph_xy[1]},$rank;
     if(($rank+5000) < $last_rank) {
       $total_sales++;
     }
     $last_rank = $rank;
   }
 }

 close(ASRT);

 open(ASRT,">>$asr_table") or die "Can't reopen $asr_table for append\n";
 printf ASRT ("#==================================================================\n");
 printf ASRT ("# Starting run at %02d:%02d:%02d on %02d/%02d/%04d\n",$hour,$min,$sec,$mon,$mday,$year);
 printf ASRT ("# Sale-hours       Rank        ISBN        Date     Time\n");
 printf ASRT ("#==================================================================\n");

 printf ("#==================================================================\n");
 printf ("# Starting run at %02d:%02d:%02d on %02d/%02d/%04d\n",$hour,$min,$sec,$mon,$mday,$year);
 printf ("# Sale-hours       Rank        ISBN        Date     Time\n");
 printf ("#==================================================================\n");

 #=======================================================================
 # Loop forever.
 #=======================================================================
 # Debugging appearance only, dummy call.
 # plot_rank(\@graph_xy);
 # exit;
 while(1){
   #=======================================================================
   # Timestamp the retrieval
   #=======================================================================
   get_local_time();

   #=======================================================================
   # Get the rank.  The function below is probably semi-portable.
   #=======================================================================
   $rank = get_rank($asin);

   #=======================================================================
   # if rank dropped by at least 1000, we are certain of at least one sale
   # in the hour or other interval, for at least my two books.  Sales
   # of Kindle books need a smaller drop than regular book sales
   # because they don't get up very high in the first place.  Amazon
   # books could possibly use a larger barrier, but I'll wait to see
   # some false positives, first.  Variations of hundreds hour to hour
   # (even down) are not sales, they are artifacts of the sales rank.
   # algorithm.  This algorithm cannot resolve multiple sales in a
   # single hour.
   #=======================================================================
   if(($rank+1000) < $last_rank) {
     $total_sales++;
   }
   $last_rank = $rank;   # Save the new rank for next time

   #=======================================================================
   # table output.  Feel free to rearrange columns or add your own.
   # I'm hoping I can figure out how to feed this into one of the nifty
   # html graph tools stolen from e.g. webalyzer to get a nice sales rank
   # bar graph at various year/month/day granularities out of this.
   #=======================================================================
   printf ASRT ("%7d          %7d %14s %02d/%02d/%04d %02d:%02d:%02d\n",$total_sales,$rank,$asin,$mon,$mday,$year,$hour,$min,$sec);
   flush ASRT;

   printf ("%7d          %7d %14s %02d/%02d/%04d %02d:%02d:%02d\n",$total_sales,$rank,$asin,$mon,$mday,$year,$hour,$min,$sec);
   flush stdout;

   #=======================================================================
   # graph -- we ALWAYS build the graph and the table, not as an option.
   # we might as well just echo the table onto the command line as well.
   #=======================================================================
   # Add this data to the data added above from the old file.
   # Eventually we're going to want to use "$hour" as the x coordinate
   # in some way that respects the modulus of the epoch parsed out
   # of the current table.
   #=======================================================================
   # if($first_day_of_month == -1){
   #   $first_day_of_month = $day;
   #   $first_hour = $hour;
   #   $current_month = $month;
   #   $current_year = $year;
   # }
   # Compute current hour from this offset.
   # $g_day = $mday - $first_day_of_month;
   # $g_hour = $hour - $first_hour;
   # $g_time = 24*$g_day + $g_hour;
   #==================================================================
   # Compute $g_time as decimal fraction of days from beginning
   # of $rank_month.
   $g_day = $mday - 1;
   $g_hour = $hour;
   $g_time = $g_day + $g_hour/24.0;
   # print "($g_time,$last_rank)\n";
   unshift @{$graph_xy[0]},$g_time;
   unshift @{$graph_xy[1]},$rank;
   # print "Calling plot rank with xy_data\n";
   plot_rank(\@graph_xy);

   #=======================================================================
   # For the umptieth time, do not reduce this below 60 minutes 3600
   # seconds for both practical and ethical reasons.  Tracking sales and
   # rank is legitimate business; overloading amazon's servers by
   # hammering your book's page as fast as the tool can download it is just
   # dumb, especially when SALES RANK ONLY UPDATES ONCE AN HOUR!
   #
   # The three second correction is empirical for the time required to
   # load a book page.  Might be a little short; better statistics will
   # tell.
   #=======================================================================
   sleep($loop_minutes*60 - 3);

 }

sub get_rank {

 my $asin = shift(@_);

 #=======================================================================
 # For books, ASIN is always ISBN-10.  ISBN-13 will not work.  ASIN
 # "may" work for non-book products, but that isn't the point of this
 # tool.  According to Wikipedia, the following link form is "the"
 # general ASIN-based address for books (or other related objects, e.g.
 # Kindle books) carried by Amazon.  Note that ASIN uses at least hex
 # digits (so there are at least 16^10 \approx 1 trillion) objects) if not
 # ASCII (62^10 is around 10^18, a billion billion objects).  Large enough,
 # I'd say.
 #=======================================================================
 my $link = "http://www.amazon.com/o/ASIN/$asin";

 #=======================================================================
 # We need to fix this not to use grep.  I want to be able to parse out
 # the book's title and author as well to decorate the graph with.
 #=======================================================================
 my $book_page_html = `links -source $link | grep "Sales Rank"`;
 my $rank;

 # This isn't necessarily the best way to do this -- I shouldn't
 # need to use grep, for example, but I'm too lazy to dig out how to
 # process multiple lines in a single variable although I've done it before
 # and I have code where I've done it around somewhere.
 #

 # The one line that contains the sales rank
 $_ = $book_page_html;

 # Split into terms
 @terms = split;
 foreach (@terms){

   # Find the one that contains the rank
   if(/^#/){

     $rank = $_;

     # Get rid of the # and , in the rank (if any)
     $rank =~ s/#//;
     $rank =~ s/,//;

     # $rank is now rank!

   }
 }
 return $rank;

}

sub plot_rank {

 # These are REFERENCES to the array.
 my $xy_data = shift;

 # print "In plot_rank.\n";

 #=======================================================================
 # Create a bar chart (for starters) of sales rank
 #=======================================================================
 my $g = new GD::Graph::lines(900,350);
 # my @test = ();
 # for($i=0;$i<100;$i++){
 #   unshift @{$test[0]}, $i;
 #   unshift @{$test[1]}, sin($i*1.0/10.0);
 # }
 
 # use GD::Graph::colour;
 # @list = GD::Graph::colour::colour_list(110);
 # print "@list\n";
 $g->set_title_font(GD::Font->Giant);
 $g->set_legend_font(GD::Font->Giant);
 $g->set(
        'title' => 'Amazon Sales Rank',

    
	bgclr => 'white',
	fgclr => 'black',
	dclrs => [qw(black red blue green)],
	boxclr => 'lgray',
	x_label => 'Time (days)',
	x_label_position => 0,
	x_labels_vertical => 0,
	y_label => 'Rank (x100K)',
	y_min_value => 0,
	y_max_value => 600000,
	y_label_skip => 100000,
	# y_max_value => 1.2,
	# y_label_skip => 0.2,
	y_tick_number => 6,
	# These numbers do one month hour by hour.
	x_min_value => 0,
	x_max_value => 33,
	x_label_skip => 1,
	x_tick_number => 33,
	box_axis => 1,
	# l_margin => 10,
	r_margin => 20,
	t_margin => 10,
	b_margin => 10,
	line_width => 3,
	transparent => 0 );


 $g->set_legend("ISBN: $asin");
 open(OUT, ">rank_$asin.jpeg") or die "Cannot open rank_$asin.jpeg for write: $!";
 print OUT $g->plot(\@$xy_data)->jpeg;
 # print OUT $g->plot(\@test)->jpeg;
 binmode OUT;
 close OUT;

}

sub get_local_time(){
 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
 $year += 1900;   # Have to be Y2K compliant...
 $mon++;          # It is one low to be an index into names.
}

sub Usage() {

 my $message = shift;

 if($message) {print STDERR "Error: $message\n";}
 print STDERR "Usage:\n";
 print STDERR "  amazon_sales_rank -a ASIN [-m delay] [-h] [-v level]\n";
 print STDERR "    -a ASIN where ASIN is the Amazon Standardized Inventory Number\n";
 print STDERR "       For books this is the ISBN-10.\n";
 print STDERR "    -m sampling delay loop.  DO NOT use except to debug.\n";
 print STDERR "    -v level sets verbosity or selects debug targets\n";
 print STDERR "    -h shows Usage/help (this message).\n";
 exit;

}
 exit;

