#!/usr/bin/env python3
#
# Format a result
#

import pscheduler
import sys
import math
from validate import result_max_schema
from validate import result_is_valid

'''
print float
'''
def format_float(label, value, units="", default="Not Reported"):
    prefix_length=22
    output = label + " ..."
    prefix_length -= len(output)
    for i in range(0, prefix_length):
        output += "."

    if value:
        output += " %.2f %s\n" % (value, units)
    else:
        output += " %s\n" % default
    
    return output

'''
Percentile: Used by histogram class to calculate percentiles using the NIST
algorithm (http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm)
'''
class Percentile(object):
    
    def __init__(self, percentile, sample_size):
        self.value = None
        self.is_calculated = False
        self.percentile = percentile
        self.sample_size = sample_size
        self.n = (self.percentile/100.0)*(sample_size + 1)
        self.k = math.floor(self.n)
        self.d = self.n - self.k
        
        if percentile == 50:
            self.key = "median"
        else:
            self.key = "percentile-%d" % percentile
    
    def findvalue(self, count, hist_value):
        if self.value is not None:
            self.value += (self.d * (hist_value - self.value))
            self.is_calculated = True
        elif self.k == 0:
            self.value = hist_value
            self.is_calculated = True
        elif count >= self.sample_size and self.k >= self.sample_size:
            self.value = hist_value
            self.is_calculated = True
        elif (self.k + self.d) < count:
            self.value = hist_value
            self.is_calculated = True
        else:
            self.value = hist_value

'''
Histogram class used to calculate common histogram metrics
'''
class Histogram(object):
    
    def __init__(self, hist_dict):
        self.hist_dict = hist_dict
    
    def get_stats(self, bucket_width=None, units=None):
        #pass one: mode, mean and sample size
        stats = {}
        mean_num = 0
        sample_size = 0

        #First format the dict if bucket_width and units are provided
        if bucket_width and units:
            new_hist_dict = {}
            for k in self.hist_dict:
                new_k = float(k) * (bucket_width / units)
                digits = math.log10(bucket_width)
                if digits >= 0:
                    digits = 0
                else:
                    digits = abs(digits) - 1 # remove 1 otherwise always have extra digit
                formatted_k = '{:.{prec}f}'.format(new_k, prec=int(digits))
                new_hist_dict[formatted_k] = self.hist_dict[k]
            self.hist_dict = new_hist_dict

        for k in self.hist_dict:
            #only can do statistics for histograms with numeric buckets
            try:
                float(k)
            except ValueError:
                return {}
            
            # update calculation values
            if 'mode' not in stats or self.hist_dict[k] > self.hist_dict[stats['mode'][0]]:
               stats['mode'] = [ k ]
            elif self.hist_dict[k] == self.hist_dict[stats['mode'][0]]:
                stats['mode'].append(k)
            mean_num += (float(k) * self.hist_dict[k])
            sample_size += self.hist_dict[k]
        if sample_size == 0:
            return {}
        stats['mean'] = (mean_num/(1.0*sample_size))
        
        #sort items. make sure sort as numbers not strings
        sorted_hist = sorted(iter(self.hist_dict.items()), key=lambda k: float(k[0]))
        
        #make mode floats.
        stats['mode'] = [float(x) for x in stats['mode']]
        #get min and max
        stats['minimum'] = float(sorted_hist[0][0])
        stats['maximum'] = float(sorted_hist[len(sorted_hist)-1][0])
        
        #pass two: get quantiles, variance, and std deviation
        stddev = 0
        quantiles = [25, 50, 75, 95]
        percentiles = [Percentile(q, sample_size) for q in quantiles]
        percentile = percentiles.pop(0)
        curr_count = 0
        for hist_item in sorted_hist:
            #stddev/variance
            stddev += (math.pow(float(hist_item[0]) - stats['mean'], 2)*hist_item[1])
            #quantiles
            curr_count += hist_item[1]
            while percentile is not None and curr_count >= percentile.k:
                percentile.findvalue(curr_count, float(hist_item[0]))
                #some percentiles require next item in list, so may have to wait until next iteration
                if percentile.is_calculated:
                    #calculated so add to dict
                    stats[percentile.key] = percentile.value
                else:
                    #unable to calculate this pass, so break loop
                    break
                
                #get next percentile
                if len(percentiles) > 0:
                    percentile = percentiles.pop(0)
                else:
                    percentile = None
                    
        #set standard deviation
        stats['variance'] = stddev/sample_size
        stats['standard-deviation'] = math.sqrt(stats['variance'])    
        return stats


'''
Main Program
'''
#Get format. Currently only support text/plain
try:
   format = sys.argv[1]
except IndexError:
   format = 'text/plain'

if format != 'text/plain':
    pscheduler.fail("Unsupported format '%s'" % format)

#parse JSON input
input = pscheduler.json_load(exit_on_error=True, max_schema=result_max_schema())

#validate against JSON schema file
if "result" not in input:
    pscheduler.fail("Missing 'result' key in input passed to result-format")
valid, message = result_is_valid(input["result"])
if not valid:
    pscheduler.fail(message)

json = input["result"]
# get bucket width and default to 1ms
bucket_width = input.get('spec', {}).get('bucket-width', 0.001)

#Output basic stats
output = "\nPacket Statistics\n"
output += "-----------------\n"
output += "Packets Sent ......... %s packets\n" % json.get('packets-sent', 'Not Reported')
output += "Packets Received ..... %s packets\n" % json.get('packets-received', 'Not Reported')
output += "Packets Lost ......... %s packets\n" % json.get('packets-lost', 'Not Reported')
output += "Packets Duplicated ... %s packets\n" % json.get('packets-duplicated', 'Not Reported')
output += "Packets Reordered .... %s packets\n" % json.get('packets-reordered', 'Not Reported')

#Output one-way delay histogram
output += "\nOne-way Latency Statistics\n"
output += "--------------------------\n"
owd_hist = Histogram(json.get('histogram-latency', {}))
stats = owd_hist.get_stats(bucket_width=bucket_width, units=0.001)
output += format_float("Delay Median", stats.get('median', None), units="ms")
output += format_float("Delay Minimum", stats.get('minimum', None), units="ms")
output += format_float("Delay Maximum", stats.get('maximum', None), units="ms")
output += format_float("Delay Mean", stats.get('mean', None), units="ms")
output += "Delay Mode ........... " 
for mode in stats.get('mode', []):
    output += "%.2f ms " % mode 
output +=  "\n"
output += format_float("Delay 25th Percentile", stats.get('percentile-25', None), units="ms")
output += format_float("Delay 75th Percentile", stats.get('percentile-75', None), units="ms")
output += format_float("Delay 95th Percentile", stats.get('percentile-95', None), units="ms")
output += "Max Clock Error ...... %s ms\n" % json.get('max-clock-error', 'Not Reported')
output += "Common Jitter Measurements:\n"

if stats.get('percentile-95', None) and stats.get('median', None):
    output += "    P95 - P50 ........ %.2f ms\n" % (stats['percentile-95'] - stats['median'])
if stats.get('percentile-75', None) and stats.get('percentile-25', None):
    output += "    P75 - P25 ........ %.2f ms\n" % (stats['percentile-75'] - stats['percentile-25'])
output += format_float("    Variance", stats.get('variance', None), units="ms")
output += format_float("    Std Deviation", stats.get('standard-deviation', None), units="ms")
output += "Histogram:\n"
for owd_bucket in sorted(list(owd_hist.hist_dict.items()), key=lambda k: float(k[0])):
    output += "    %s ms: %d packets\n" % (owd_bucket[0], owd_bucket[1])
    
#Output TTL histogram
output += "\nTTL Statistics\n"
output += "--------------\n"
ttl_hist = Histogram(json.get('histogram-ttl', {}))
ttl_stats = ttl_hist.get_stats()
output += format_float("TTL Median", ttl_stats.get('median', None))
output += format_float("TTL Minimum", ttl_stats.get('minimum', None))
output += format_float("TTL Maximum", ttl_stats.get('maximum', None))
output += format_float("TTL Mean", ttl_stats.get('mean', None))
output += "TTL Mode ............. " 
for mode in ttl_stats.get('mode', []):
    output += "%.2f " % mode 
output +=  "\n"
output += format_float("TTL 25th Percentile", ttl_stats.get('percentile-25', None))
output += format_float("TTL 75th Percentile", ttl_stats.get('percentile-75', None))
output += format_float("TTL 95th Percentile", ttl_stats.get('percentile-95', None))
output += "Histogram:\n"
for ttl_bucket in sorted(json.get('histogram-ttl', {}).items()):
    output += "    %s: %d packets\n" % (ttl_bucket[0], ttl_bucket[1])

#output raw packets if we have them
if 'raw-packets' in json:
    output += "\nRaw packets\n"
    output += "----------\n"
    output +=  "SEQ SRC-TS SRC-CLOCK-SYNC SRC-CLOCK-ERR DST-TS DST-CLOCK-SYNC DST-CLOCK-ERR TTL\n"
    for p in json['raw-packets']:
        output += "%d %d %s %s %d %s %s %d\n" % (p['seq-num'], p['src-ts'], p['src-clock-sync'], p.get('src-clock-err', 'n/a'),  p['dst-ts'], p['dst-clock-sync'], p.get('dst-clock-err', 'n/a'), p['ip-ttl'])

#Print to stdout
print(output)
