#! /usr/bin/env python
# -*- coding: utf-8 -*-

from collections import namedtuple, defaultdict
import gzip

###############
#  Utilities  #
###############

def memoize(func):
    cache ={}
    def wrapper(*args):
        if args in cache:
            return cache[args]
        res = func(*args)
        cache[args] = res
        return res
    return wrapper

def group_by(logs, func):
    chunks = defaultdict(list)
    for log in logs:
        key = func(log)
        chunks[key].append(log)
    return chunks

def count(logs, key):
    counts = {}
    for log in logs:
        k = getattr(log, key)
        counts[k] = counts[k] + 1 if k in counts else 1
    counts = list(counts.items())
    counts.sort(key = lambda x : x[1])
    return counts
###############
#    core     #
###############

log = namedtuple( 'log' , "day_of_week day_of_month month year hour prog email ip portal")

def parse(log_paths):
    logs= []
    for log_path in log_paths:
        
        if log_path.endswith('.gz'):
            log_file = gzip.open(log_path, 'r')
        else:
            log_file = open(log_path, 'r')
        with log_file:
            for line in log_file:
                fields = line.strip().split()
                email = fields[7]
                portal = fields[9] if len(fields) == 10 else 'UNKNOW_PORTAL' 
                l = log(fields[0], 
                        int(fields[1]),
                        fields[2],
                        int(fields[3]),
                        fields[4],
                        fields[5],
                        email,
                        fields[8],
                        portal)
                logs.append(l)
    return  logs

 
def jobs_per_user(logs):
    users = {}
    for log in logs:
        users[log.email] = users[log.email] + 1 if log.email in users else 1
    users = list(users.items())
    users.sort(key = lambda x : x[1])
    return users

def user_dict_2_list(d):
    x_data = []
    y_data = []
    for x, user in enumerate(d, start = 1):
        #print(user)
        x_data.append(x)
        y_data.append(math.log(user[1])) 
    return x_data, y_data

def is_pasteurien(log):
    return log.email.endswith('pasteur.mg')

def progs_sorter(log):
    return log.prog

def day_of_week_sorter(log):
    return log.day_of_week

def month_sorter(log):
    return log.month

def year_sorter(log):
    return log.year

def by_user(log):
    return log.email

def by_day(log): 
    return (log.day_of_month, log.month, log.year)


if __name__ == "__main__":
    import os
    import argparse
    parser = argparse.ArgumentParser(description = """parse mobyle access log 
and generate a report
""")
    parser.add_argument("logs",
                        nargs = '+',
                        help = "the access files log in gz format")
    parser.add_argument("-o", "--output",
                        dest = "output",
                        action = "store",
                        default = "mob_stat.out",
                        help = "the output file"
                       )
    parser.add_argument("-m", "--email",
                        dest = "email",
                        action = "store_true",
                        help = "send the results by email (by default to the Mobyle maintainers)"
                       )
    parser.add_argument("--to",
                        dest = "to",
                        nargs = '+',
                        action = "store",
                        default = None,
                        help = "replace the dest of the email (the -m option must be set)"
                       ) 
    args = parser.parse_args()
    logs = parse(args.logs)
    

    TOTAL_JOBS = len(logs)
    UNIC_USERS = count(logs, 'email')

    ##################
    #  programs used #
    ##################
    prog_used = group_by(logs, progs_sorter)
    PROG_USED = len(prog_used)
    prog_used = count(logs, 'prog')
    PROG_USED_TOP_10 = prog_used[-10:]

    #####################################
    # how many user use a given program #
    #####################################
    progs = []
    by_progs = group_by(logs, progs_sorter) 
    for prog in by_progs:
        users = count(by_progs[prog], 'email')
        progs.append((prog, len(users)))
    progs.sort(key= lambda x : x[1])
    NB_OF_USER_BY_PROG_TOP10 = progs[-10:]
    
    users = jobs_per_user(logs)
    NB_OF_JOBS_BY_USER_TOP10 = users[-10:]

    ##########
    # Report #
    ##########
    with open(args.output, 'w') as report:
        if TOTAL_JOBS:
            report.write('Total number of jobs = {}\n'.format(TOTAL_JOBS))
            nb_of_days = len(group_by(logs, by_day))
            report.write('nb of jobs / days = {:.0f}\n'.format(TOTAL_JOBS / nb_of_days))
            report.write("number of users = {0}\n".format(len(UNIC_USERS)))
         
            title = '\n{0} programs used (top 10)\n'.format(PROG_USED)
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tprogram : nb jobs\n')
            report.write('\t{}\n'.format('-'*17))
            PROG_USED_TOP_10.reverse()
            for prg in PROG_USED_TOP_10:
                report.write('\t{} : {}\n'.format(*prg))
            
            title = "\nNumber of users by program (top 10)\n"
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tprogram : users\n')
            report.write('\t{}\n'.format('-'*15))
            NB_OF_USER_BY_PROG_TOP10.reverse()
            for prg in NB_OF_USER_BY_PROG_TOP10:        
                report.write( '\t{} : {}\n'.format(*prg)) 
             
            title = "\nNumber of jobs by user (top 10)\n"
            report.write(title)
            report.write('{}\n'.format('=' * (len(title)-2)))
            report.write('\tuser : jobs\n')
            NB_OF_JOBS_BY_USER_TOP10.reverse()
            report.write('\t{}\n'.format('-'*11))    
            for usr in NB_OF_JOBS_BY_USER_TOP10:        
                report.write( '\t{} : {}\n'.format(*usr))
        else:
            report.write('No jobs')
            
    ##########################################
    # email the report to mobyle maintainers #
    ##########################################
    if args.email:
        import sys
        MOBYLEHOME = None
        if os.environ.has_key('MOBYLEHOME'):
            MOBYLEHOME = os.environ['MOBYLEHOME']
        if not MOBYLEHOME:
            sys.exit('MOBYLEHOME must be defined in your environment if you want to send statistics by email')
        if (os.path.join(MOBYLEHOME, 'Src')) not in sys.path:
            sys.path.append(os.path.join(MOBYLEHOME, 'Src'))
        

        from Mobyle.ConfigManager import Config
        config = Config()
        from Mobyle.Net import EmailAddress , Email
        
        if args.to:
            email_addr = EmailAddress(args.to)
        else:
            email_addr = EmailAddress(config.maintainer())
            
        email_checked = email_addr.check()
        if not email_checked:
            msg = email_addr.getMessage()
            print >> sys.stderr , msg
            sys.exit(2)       
        
        mail =  Email( email_addr )    
        mail.send('STAT', {'SENDER'     : config.sender() ,
                           'HELP'       : config.mailHelp() ,
                           'SERVER_NAME': config.portal_url()}, 
                  files = [args.output])