def test_should_convert_different_data_types_as_defined_in_column_definition( self): panda = panda_parser.convert_to_panda(self.data, self.columns) print panda self.assertEqual('c1', panda['col1'][0]) self.assertEqual(13, panda['col3'][0])
def throughput(dir): # Create the raw table from the log files raw = table_parser.data_table(dir) # Convert to panda df = panda_parser.convert_to_panda(raw, table_parser.column_defs) if df.empty: raise Exception('No data found') # Calculate how much data is added per 'run' inserts_per_iter = long( df[(df['workload'] == 'load') & (df['insertcount'] > 0)]['insertcount'].unique().mean()) field_count = long(df['fieldcount'].unique().mean()) field_len = long(df['fieldlength'].unique().mean()) data_per_iteration = inserts_per_iter * field_count * field_len / 1000 # keep numbers round by approximating KB to 10^3 # Define the x axes as the incremental data we are adding iterations = len(df['key-start'].unique()) + 1 x_axis = list( numpy.arange(data_per_iteration, data_per_iteration * iterations, data_per_iteration)) x_axis = [int(i) for i in x_axis] x_axis.insert(0, 'x') # Define the columns we want in the chart and merge them into a single table # (merging on the key which is key-start) merged = pd.concat( [agg_throughput(df, Workloads.LOAD), agg_throughput(df, Workloads.A)], axis=1) merged.columns = ['load-throughput', 'wla-throughput'] #Pull out each column, give it a name, format the output string plots = (x_axis, ) for column in merged.columns: plot = list(merged[column]) plot.insert(0, column) plot = [ 0 if isinstance(x, numpy.float32) and math.isnan(x) else x for x in plot ] plots += (plot, ) output = "x:'x', columns:[%s,%s,%s]" % plots return output
def test_should_convert_something_larger(self): panda = panda_parser.convert_to_panda(self.larger, self.columns) self.assertEqual(3, len(panda)) self.assertEqual(13, panda['col3'][0]) self.assertEqual(14, panda['col3'][1]) self.assertEqual(17, panda['col3'][2])
def test_should_convert_basic_table_to_panda_columns(self): panda = panda_parser.convert_to_panda(self.data, self.columns) self.assertEqual('c1', panda['col1'][0]) self.assertEqual('mongodb', panda['col2'][0])
import pandas as pd import panda_parser import table_parser dir = '../../logs' # Create the raw table from the log files raw = table_parser.data_table(dir) # Convert to panda df = panda_parser.convert_to_panda(raw, table_parser.column_defs) print df print '' grouped = df.groupby('threadcount') agg = grouped.agg('mean') tp = agg['throughput'] print 'Throughput (ops/sec) by threadcount (higher is better)' print tp print '' grouped = df.groupby('threadcount') agg = grouped.agg('mean') tp = agg['insert-lat'] grouped = df.groupby('threadcount') agg = grouped.agg('mean') tp2 = agg['update-lat']