#!/usr/bin/env python

import sys
import re

LITERAL_RE = re.compile("(^|[-\s<>*=()+,]+?)(?:-?\d+(?:\.\d+)?|'(?:''|[^'])*')(?=[-\s<>*=()+,;]|$)")
DECLARE_RE = re.compile("^(DECLARE )\"[^\"]*\"( .*)$")
FETCH_RE = re.compile("^(FETCH (?:ALL|\d+) FROM )\"[^\"]*\"$")
CLOSE_RE = re.compile("^(CLOSE )\"[^\"]*\"$")
def make_generic(query):
    if query.startswith('DECLARE'):
        query = DECLARE_RE.sub('\\1?\\2', query)
    elif query.startswith('FETCH'):
        query = FETCH_RE.sub('\\1?', query)
    elif query.startswith('CLOSE'):
        query = CLOSE_RE.sub('\\1?', query)
    return LITERAL_RE.sub('\\1?', query)

def main():
    STATEMENT_RE = re.compile('^(?:(\d+) )?LOG:\s+statement:\s+(.*)$')
    DURATION_RE = re.compile('^(?:(\d+) )?LOG:\s+duration:\s+(.*)\s+ms$')
    SKIP_RE = re.compile('^(?:\d+ )?(?:(?:DETAIL:\s+|\s*)!\s+.*|LOG:\s+(?:QUERY STATISTICS|connection.*|disconnection.*|database.*|checkpoint.*|redo.*|next.*|unexpected.*)|WARNING:.*|HINT:.*|ERROR:.*|STATEMENT:.*)$')
    
    query_stack = {}
    queries = {}
    lines = 0
    
    last_pid = None
    got_first_statement = False
    last_line_skipped = False
    for l in sys.stdin.xreadlines():
        lines += 1
        m = SKIP_RE.match(l)
        if m:
	    last_line_skipped = True
            continue

        m = STATEMENT_RE.match(l)
        if m:
	    last_line_skipped = False
	    got_first_statement = True
	    last_pid = m.group(1)

	    if m.group(1) in query_stack:
		query_stack[m.group(1)].append(m.group(2))
	    else:
		query_stack[m.group(1)] = [m.group(2)]
            continue
        
        m = DURATION_RE.match(l)
        if m:
	    last_line_skipped = False
	    if m.group(1) not in query_stack:
		continue

	    last_pid = m.group(1)
            g = make_generic(query_stack[m.group(1)].pop(0))
            if g in queries.keys():
                queries[g].append(float(m.group(2)))
            else:
                queries[g] = [float(m.group(2))]
            continue
        
	if not got_first_statement:
	    pass
        elif last_pid is not None and len(query_stack[last_pid]) > 0:
            query_stack[last_pid][-1] += ' ' + l.strip()
	elif last_line_skipped:
	    pass
        else:
            print "LINE %d: %s" % (lines, l.strip())
            raise RuntimeError, 'bad line in input'

    tot_time = 0.0
    for k in queries.keys():
        for d in queries[k]:
            tot_time += d
    tot_queries = reduce(lambda x, y: x + y, [len(queries[q]) for q in queries.keys()])
    
    print "TOTAL QUERIES: %d generics from %d total" % (len(queries.keys()), tot_queries)
    print "TOTAL DB PIDS: %d" % (len(query_stack.keys()),)
    print "TOTAL DB TIME: %0.2fm" % (tot_time / 60000,)
    print
    print "TOP 25 QUERIES BY FREQUENCY:"
    keys = sorted(queries.keys(), lambda x, y: cmp(len(queries[x]), len(queries[y])))
    for k in keys[-25:]:
        print "%d: %0.2fms: %0.2f%%: %0.2f%%: %s" % (len(queries[k]), 
            reduce(lambda x, y: x + y, queries[k]) / float(len(queries[k])),
            len(queries[k]) * 100.0 / tot_queries,
            reduce(lambda x, y: x + y, queries[k]) * 100.0 / tot_time,
            k)
    
    print
    print "TOP 25 QUERIES BY AVG DURATION:"
    keys = sorted(queries.keys(), lambda x, y: cmp(
        reduce(lambda x, y: x + y, queries[x]) / float(len(queries[x])), 
        reduce(lambda x, y: x + y, queries[y]) / float(len(queries[y]))))
    for k in keys[-25:]:
        print "%d: %0.2fms: %0.2f%%: %0.2f%%: %s" % (len(queries[k]), 
            reduce(lambda x, y: x + y, queries[k]) / float(len(queries[k])), 
            len(queries[k]) * 100.0 / tot_queries,            
            reduce(lambda x, y: x + y, queries[k]) * 100.0 / tot_time,            
            k)
            
    print
    print "TOP 25 QUERIES BY TOTAL TIME"
    keys = sorted(queries.keys(), lambda x, y: cmp(
        reduce(lambda x, y: x + y, queries[x]), 
        reduce(lambda x, y: x + y, queries[y])))
    for k in keys[-25:]:
        print "%d: %0.2fms: %0.2f%%: %0.2f%% %s" % (len(queries[k]), 
            reduce(lambda x, y: x + y, queries[k]) / float(len(queries[k])), 
            len(queries[k]) * 100.0 / tot_queries,
            reduce(lambda x, y: x + y, queries[k]) * 100.0 / tot_time,            
            k)

    
if __name__ == '__main__':
    main()
