#! /usr/bin/perl -w

# dl10n-rrd -- Debian l10n statistics (rrd format)
#
# Copyright (C) 2007 Nicolas François
#
# Based on dl10n-txt:
# Copyright (C) 2004 Martin Quinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# WARNING: This script must not be run twice per day
#          (see also the step variable below)
#          This script should not be run between 01H00 and the update of the
#          database.
#

use strict;
use Getopt::Long; #to parse the args

my $progname= $0; $progname= $& if $progname =~ m,[^/]+$,;

my $VERSION = "1.0"; #External Version Number
my $BANNER = "Debian l10n infrastructure -- rrd statistics extractor v$VERSION"; # Version Banner - text form
my $DB_FILE="./data/status";
my $IGNORE_FILE='';
my $RRD_DATE;
my $STATUS_FILE='./data/status.$lang';
my $assume_bts = 0;

use Debian::L10n::Db;

sub syntax_msg {
    my $msg = shift;
    if (defined $msg) {
        print "$progname: $msg\n";
    } else {
        print "$BANNER\n";
    }
    print 
"Syntax: $0 [options]
General options:
    -h, --help                display short help text
    -V, --version             display version and exit

Informations to display:
    -a,--assume-bts           Assume that the content bugs in the BTS were
                                applied.

Database to use:
    --db=DB_FILE              use DB_FILE as database file
                                (instead of $DB_FILE)
    --idb=IGNORE_FILE         use IGNORE_FILE as list of packages to ignore
    --sdb=STATUS_FILE         use STATUS_FILE as status file
                                (instead of $STATUS_FILE)
    --date=RRD_DATE           RRD start date (in secondes since Epoch).
                                Default date is the current time rounded to
                                the end of the current step.
";
    if (defined $msg) {
        exit 1;
    } else {
        exit 0;
    }
}

# Display Version Banner
# Options: -V|--version, --print-version
sub banner {
    if ($_[0] eq 'print-version') {
        print "$VERSION\n";
    } else {
        print "$BANNER\n";
    }
    exit 0;
}

# Hash used to process commandline options
my %opthash = (
# ------------------ general options
        "help|h"        => \&syntax_msg,
        "version|V"     => \&banner,
        "print-version" => \&banner,

# ------------------ configuration options
        "assume-bts|a"  => \$assume_bts,

        "db=s"          => \$DB_FILE,
        "idb=s"         => \$IGNORE_FILE,
        "sdb=s"         => \$STATUS_FILE,
        "date=i"        => \$RRD_DATE,
        );

# init commandline parser
Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

# process commandline options
GetOptions(%opthash)
    or syntax_msg("error parsing options");

#-----------------------------------------------------------------------------
#                        The main program                                     
#-----------------------------------------------------------------------------
###
### initialisation
###

my $data = Debian::L10n::Db->new();
$data->read($DB_FILE);

my %ignored_pkgs = ();
if ($IGNORE_FILE) {
    open IGNORE, "$IGNORE_FILE"
        or die "Impossible to read the ignore file $IGNORE_FILE\n";
    while (<IGNORE>) {
        chomp;
        next unless $_;
        $ignored_pkgs{$_} = 1;
    }
    close IGNORE;
}

my @poparts=qw(po podebconf po4a); # Only POs, no template or man

my %total;

foreach my $pkg ($data->list_packages()) {
    next if defined $ignored_pkgs{$pkg};

    my (%score,%ori);
    foreach my $part (@poparts) {
        my $has_part="has_$part";
        $score{$part} = {};
        if ($data->$has_part($pkg)) {
            foreach my $line (@{$data->$part($pkg)}){
                my ($pofile, $lang, $stat) = @{$line};
                if (defined $lang and length $lang) {
                    $score{$part}{$lang} = add_stat($stat,
                                                    $score{$part}{$lang});
                }
            }

            my $lang;
            if ($assume_bts and defined $total{$part}) {
            foreach $lang (keys %{$total{$part}}) {
                # FIXME: as we do not have the list of languages, we can only
                #        check the BTS for the languages with a translation in
                #        the previous packages.
                #        This should be mostly OK.
                if (defined $score{$part}{'_'} and length $score{$part}{'_'}) {
                    $score{$part}{$lang} = merge_bts_stats($pkg,
                                                           $lang,
                                                           $part,
                                                           $score{$part}{$lang},
                                                           $score{$part}{'_'});
                }
            }
            }

            unless (defined $score{$part}{'_'}) {
                # If there is not POT file, try to find the number of strings
                # from the other PO. This is usually a sign for non up to date
                # PO files, so the number of strings in the PO files may vary.
                # I choose to take the greatest number.
                # This is a bug and should be reported.
                my $t = 0;
                foreach $lang (keys %{$score{$part}}) {
                    if (tot($score{$part}{$lang})>$t) {
                        $t = tot($score{$part}{$lang});
                    }
                }
                $score{$part}{'__'}="0t0f".$t."u";
            } else {
                $score{$part}{'__'}=$score{$part}{'_'};
            }
            {
                my $t = 0;
                foreach $lang (keys %{$score{$part}}) {
                    if (tot($score{$part}{$lang})>$t) {
                        $t = tot($score{$part}{$lang});
                    }
                }
                $score{$part}{'___'}="0t0f".$t."u";
            }
            # Here we could also check that every pkg has the same number of
            # strings for each language.

            foreach $lang (keys %{$score{$part}}) {
                if (defined $score{$part}{$lang} && $score{$part}{$lang} ne '---') {
                    $total{$part}{$lang} = add_stat($score{$part}{$lang},
                                                    $total{$part}{$lang});
                }
# QA:
#                if (tot($score{$part}{$lang}) > tot($score{$part}{'__'})) {
#                    print "$lang > _ for $pkg ($part)\n";
#                    print "          '".$score{$part}{$lang}." / ".($score{$part}{'_'}||"")."\n";
#                }
            }
        }
    }
}

foreach my $part (@poparts) {
    foreach my $lang (keys %{$total{$part}}) {
        my $stats = $total{$part}{$lang};
        my $t = "0";
        my $f = "0";
        my $u = "0";

        if ($stats =~ /([0-9]+)t/) {  $t=$1;  }
        if ($stats =~ /([0-9]+)f/) {  $f=$1;  }
        if ($stats =~ /([0-9]+)u/) {  $u=$1;  }

        if ($t+$f+$u == 0) {
            # We do not process this language anymore. This is most
            # probably an (old) error in the language code and there are no
            # PO files anymore
#            warn "Wrong stats format. part: $part, lang: $lang, stats: $stats.\n";
            next;
        }

        if ( ! -d $part) {
            mkdir $part;
        }
        my $step = 60*60*24; # 1 day
        if ( ! -f "$part/$lang.rrd") {
            system "rrdtool create $part/$lang.rrd ".
                   "--step $step ".
                   ((not defined $RRD_DATE or $RRD_DATE eq "N")?"":"--start ".($RRD_DATE)." ").
                   "DS:t:GAUGE:".($step*1.5).":U:U ".
                   "DS:f:GAUGE:".($step*1.5).":U:U ".
                   "DS:u:GAUGE:".($step*1.5).":U:U ".
                   "RRA:AVERAGE:0.5:1:700";
        }
        my $date="N";
        if (defined $RRD_DATE) {
            if ($RRD_DATE ne "N") {
                $date = $RRD_DATE+1;
            }
        } else {
            use POSIX qw(strftime);
            $date = strftime "%s", localtime;
            $date = (int($date / $step)+1)*$step
        }
        system "rrdtool update $part/$lang.rrd $date:$t:$f:$u";
    }
}

sub add_stat {
    my $new=shift;
    my $old=shift;

    return $new unless ($old);
    return $new if ($old eq '---');
    $new =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($nt,$nf,$nu) = ($1||0, $2||0, $3||0);
    $old =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($ot,$of,$ou) = ($1||0, $2||0, $3||0);
    my $res= ($nt+$ot)."t".($nf+$of)."f".($nu+$ou)."u";
    return $res;
}

sub normalize_score {
    my $orig=shift;
    my $trans=shift;

    $orig =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($ot,$of,$ou) = ($1||0, $2||0, $3||0);
    $trans =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($tt,$tf,$tu) = ($1||0, $2||0, $3||0);
    my $res= ($tt)."t".($tf)."f".($ot+$of+$ou-$tf-$tt)."u";
    return $res;
}

sub output_details {
    my $stats = shift||"";
    my $t = "0";
    my $u = "0";
    my $f = "0";

    if ($stats =~ /([0-9]+)t/) {  $t=$1;  }
    if ($stats =~ /([0-9]+)u/) {  $u=$1;  }
    if ($stats =~ /([0-9]+)f/) {  $f=$1;  }
    return ($t+$f+$u == 0 ? $stats : "$t:$f:$u");
}

my %statusDB;
sub merge_bts_stats {
    my $pkg = shift;
    my $lang = shift;
    my $part = shift;
    my $stats = shift;
    my $ori = shift;

    return $stats unless $assume_bts;

    unless (defined $statusDB{$lang}) {
        my $statusDBname = "$STATUS_FILE";
        $statusDBname =~ s/\$lang/$lang/g;

        return $stats unless ( -f $statusDBname );

        $statusDB{$lang} = Debian::L10n::Db->new();
        $statusDB{$lang}->read($statusDBname,0);
    }

    return $stats unless (   $statusDB{$lang}->has_package($pkg)
                          && $statusDB{$lang}->has_status($pkg));

    my $bts_reported = 0;
    foreach my $statusline (@{$statusDB{$lang}->status($pkg)}) {
        my ($kind,$file,$date,$status_from_db,$translator,$url,$bug_nb) = @{$statusline};
        if ($kind eq $part) {
            if ($status_from_db =~ m/^(bts|done|hold|fix|wontfix)$/i) {
                $bts_reported = 1;
            } else {
                $bts_reported = 0;
            }
        }
    }

    if ($bts_reported) {
        $ori =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
        $stats = ($1+$2+$3)."t0f0u";
    }

    return $stats;
}

sub tot {
    my $stats = shift;
    return 0 unless $stats;
    return 0 if $stats eq "---";
    my $t = "0";
    my $f = "0";
    my $u = "0";

    if ($stats =~ /([0-9]+)t/) {  $t=$1;  }
    if ($stats =~ /([0-9]+)f/) {  $f=$1;  }
    if ($stats =~ /([0-9]+)u/) {  $u=$1;  }

    return $t+$f+$u;
}

