#!/usr/bin/perl


while (substr($ARGV[0], 0, 1) eq "-") {
    $option = substr($ARGV[0], 1);
    shift(@ARGV);
    
    if ($option eq "n") { $hist = 1;  $numbers = 1; }
    else {
        die "Invalid argument -$option.  Use tp -h for help.\n";
    }
} 



open(FORMS_FILE, "< /home/kurisuto/documents/linguistics/norse_class/oi_wordforms") || die "Fatal error:$!\n";

while (<FORMS_FILE>) {

    chop;
    ($word, $pos, $def) = split(/\t/);

    $wordhash{$word} = $pos . "\t" . $def;

}


open(INPUT_FILE, "< /home/kurisuto/documents/linguistics/norse_class/oigt_goodhist") || die "Fatal error:$!\n";

while (<INPUT_FILE>) {
    
    chop;

    $word = $_;

    if ($hist == 1) {
	($count, $word) = split(/\t/);
    }

    if ($numbers == 0) {
	if ($hist == 1) {
	    print $count, "\t";
	}
	print $word, "\t", $wordhash{$word}, "\n";
    }
    else {
	if ($wordhash{$word} ne "") {
	    $total_tokens += $count;
	    $total_forms++;
	}
    }

}


if ($numbers == 1) {
    
    print "\nTotal tokens in corpus: 44147\n";
    print "Recognized tokens: $total_tokens\n";
    $percent = $total_tokens / 44147;
    $percent *= 10000; $percent = int($percent); $percent /= 100;
    print "Percentage of tokens recognized: ", $percent, "%\n";
    print "\n";

    print "Total wordforms in corpus: 8821\n";
    print "Recognized tokens: $total_forms\n";
    $percent = $total_forms / 8821;
    $percent *= 10000; $percent = int($percent); $percent /= 100;
    print "Percentage of wordforms recognized: ", $percent, "%\n";
    print "\n";
    
}
