use warnings;
use strict;
use Data::Dumper; 

########################################
my $default_lines_per_page = 25;
my $default_characters_per_line = 60; # 
my $default_tab_stop = 5;
my $default_words_per_page = 250;
my $default_output_text = 0;
########################################

if( (scalar(@ARGV)==0) or ($ARGV[0]=~m{\A-h}) ) {
	print <<"HELP";

Call this script, passing in the name of a text file
containing your story in plaintext format. This script
will then read it, and generate a publisher-friendly
word count (note that this is a different sort of 
word count than is reported by normal word processors.)

Example:

	perl wordcount.pl mystory.txt

By default, the program will assume 
$default_characters_per_line characters per line and
$default_lines_per_page lines per page and 
$default_words_per_page words per page and 
$default_tab_stop characters per tab.

If you want to change any of these values, 
add them on the command line after the filename.
For example:

	perl wordcount.pl mystory.txt -lines=23 -chars=60 -words=245 -tabs=5

If you want to change the defaults, edit the 
first few lines of your copy of the program.

If you want the script to print out the text 
that it formatted to determine the word count,
use the -output=1 option.

Copyright 2006 Greg London
This program licensed under the 
CreativeCommons-Attribution license.
http://creativecommons.org/licenses/by/2.5/

HELP
;

exit;

}

###########################################################
# process arguments and check for errors
###########################################################
my $storyname = shift(@ARGV);


my %actual_values = (
	lines => $default_lines_per_page,
	chars => $default_characters_per_line,
	tabs  => $default_tab_stop,
	words => $default_words_per_page,
	output=> $default_output_text,
);

while(scalar(@ARGV)) {
	my $arg = shift(@ARGV);

	if ($arg=~m{-(\w+)=(\d+)}) {
		my ($key, $val) = ($1,$2);

		unless(exists($actual_values{$key})) {
			die "Error: unknown argument '$arg'";
		}

		$actual_values{$key}=$val;
	}		
}

#print Dumper \%actual_values;

unless(defined($storyname)) {
	die "Error: please provide text filename";
}

unless (-e $storyname) {
	die "Error: could not find file '$storyname'";
}



###########################################################
# only print out if -text=1
###########################################################
sub pprint {
###########################################################
	if($actual_values{output}) {
		print @_;
	}
}

###########################################################
#process file
###########################################################

open(my $in, $storyname) 
	or die "Error: unable to open $storyname";

my $linecounter = 0;
my $columncounter = 0;

my $chunk='';

while(<$in>) {
	$linecounter++;
	my $linetext = $_;

	while(length($linetext)) {
		

		if(0) {

		# tabs
		} elsif ($linetext =~ s{\A(\t)}{}) {
			$chunk = ' ' x $actual_values{tabs};
			$columncounter += $actual_values{tabs};
			#warn "aaa";
		
		} elsif ($linetext =~ s{\A(\n)}{}) {
			$chunk = $1;
			$columncounter = $actual_values{chars} + 10;
			#warn "aaa";

		# whitespace can go on end of line past line.
		} elsif ($linetext =~ s{\A(\s)}{}) {
			$chunk = $1;
			$columncounter += 1;
			#warn "aaa";

		# don't split words with trailing punctuation.
		} elsif ($linetext =~ s{\A(\w+\S+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";

		# don't split words, but if followed by whitespace, can ignore space this round.
		} elsif ($linetext =~ s{\A(\w+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";

		# punctuation marks must be surrouned by whitespace
		} elsif ($linetext =~ s{\A([^\s\w]+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";
		} else {
			die "Parse Error: no match on remaining text, '$linetext'";
		}

		#print "chunk is '$chunk'\n";

		# now figure out if we're about to go past end of column
		if($columncounter> $actual_values{chars}) {

			# if white space at end of line, don't print it.
			if($chunk =~ m{\A\s})  {
				$columncounter = 0;
				pprint("\n");

			# if non-white space at end of line, print it on next line
			} else {
				pprint("\n");
				pprint($chunk);
				$columncounter = length($chunk);
			}


			$linecounter++;
		} else {

			# haven't reached end of line, print out plain text
			pprint($chunk)
		}

	} # while (linetext)
} # while (in)

close ($in) or warn "Warning: problem closing '$storyname', I hope it's OK.";

my $characters_per_line = $actual_values{chars};

my $total_pages_float = $linecounter / $actual_values{lines};

my $total_pages_int = int($total_pages_float * 10);

my $total_pages = $total_pages_int / 10;

my $words_per_page = $actual_values{words};

my $publisher_word_count = $total_pages * $words_per_page;




print "\n\n\n";
print "#"x40;
print "\n";

print "Characters per line is $characters_per_line\n";
print "total lines for this text is $linecounter\n";
print "With a lines per page of ". ($actual_values{lines})."\n";
print "that yields a total of $total_pages pages\n";
print "With a words per page of $words_per_page,\n";
print "that yields a publisher word count of $publisher_word_count\n";
print "#"x40;
print "\n\n\n";