#!/usr/bin/perl


while (substr($ARGV[0], 0, 1) eq "-") {
    $option = substr($ARGV[0], 1);
    shift(@ARGV);
    
    if ($option eq "n") { $numbers = 1; }
    else {
        die "Invalid argument -$option.  Use tp -h for help.\n";
    }
} 



open(INPUT_FILE, "oi_identify -m -t -s | tolower | hist | sort -r -n | grep -v \"\\-\\-\" | ") || die "Fatal error:$!\n";

$total_forms_static = 8821;
$total_tokens_static = 44147;

$total_forms = $total_forms_static;
$total_tokens = $total_tokens_static;

while (<INPUT_FILE>) {
    
    chop;

    ($count, $word) = split(/\t/);

    if ($numbers == 0) {
	if ($word ne "") {
	    print $_, "\n";
	}
    }
    else {
	if ($word ne "") {
	    $total_tokens -= $count;
	    $total_forms--;
	}
    }

}


if ($numbers == 1) {
    
    print "\nTotal tokens in corpus: 44147\n";
    print "Recognized tokens: $total_tokens\n";
    $percent = $total_tokens / 44147;
    $percent *= 10000; $percent = int($percent); $percent /= 100;
    print "Percentage of tokens recognized: ", $percent, "%\n";
    print "\n";

    print "Total wordforms in corpus: 8821\n";
    print "Recognized tokens: $total_forms\n";
    $percent = $total_forms / 8821;
    $percent *= 10000; $percent = int($percent); $percent /= 100;
    print "Percentage of wordforms recognized: ", $percent, "%\n";
    print "\n";
    
}
