示例#1
0
    def test_should_convert_different_data_types_as_defined_in_column_definition(
            self):
        panda = panda_parser.convert_to_panda(self.data, self.columns)

        print panda

        self.assertEqual('c1', panda['col1'][0])
        self.assertEqual(13, panda['col3'][0])
示例#2
0
def throughput(dir):
    # Create the raw table from the log files
    raw = table_parser.data_table(dir)

    # Convert to panda
    df = panda_parser.convert_to_panda(raw, table_parser.column_defs)

    if df.empty:
        raise Exception('No data found')

    # Calculate how much data is added per 'run'
    inserts_per_iter = long(
        df[(df['workload'] == 'load')
           & (df['insertcount'] > 0)]['insertcount'].unique().mean())
    field_count = long(df['fieldcount'].unique().mean())
    field_len = long(df['fieldlength'].unique().mean())
    data_per_iteration = inserts_per_iter * field_count * field_len / 1000  # keep numbers round by approximating KB to 10^3

    # Define the x axes as the incremental data we are adding
    iterations = len(df['key-start'].unique()) + 1
    x_axis = list(
        numpy.arange(data_per_iteration, data_per_iteration * iterations,
                     data_per_iteration))
    x_axis = [int(i) for i in x_axis]
    x_axis.insert(0, 'x')

    # Define the columns we want in the chart and merge them into a single table
    # (merging on the key which is key-start)
    merged = pd.concat(
        [agg_throughput(df, Workloads.LOAD),
         agg_throughput(df, Workloads.A)],
        axis=1)
    merged.columns = ['load-throughput', 'wla-throughput']

    #Pull out each column, give it a name, format the output string
    plots = (x_axis, )
    for column in merged.columns:
        plot = list(merged[column])
        plot.insert(0, column)
        plot = [
            0 if isinstance(x, numpy.float32) and math.isnan(x) else x
            for x in plot
        ]
        plots += (plot, )

    output = "x:'x', columns:[%s,%s,%s]" % plots

    return output
示例#3
0
 def test_should_convert_something_larger(self):
     panda = panda_parser.convert_to_panda(self.larger, self.columns)
     self.assertEqual(3, len(panda))
     self.assertEqual(13, panda['col3'][0])
     self.assertEqual(14, panda['col3'][1])
     self.assertEqual(17, panda['col3'][2])
示例#4
0
    def test_should_convert_basic_table_to_panda_columns(self):
        panda = panda_parser.convert_to_panda(self.data, self.columns)

        self.assertEqual('c1', panda['col1'][0])
        self.assertEqual('mongodb', panda['col2'][0])
import pandas as pd

import panda_parser
import table_parser

dir = '../../logs'

# Create the raw table from the log files
raw = table_parser.data_table(dir)

# Convert to panda
df = panda_parser.convert_to_panda(raw, table_parser.column_defs)

print df
print ''

grouped = df.groupby('threadcount')
agg = grouped.agg('mean')
tp = agg['throughput']

print 'Throughput (ops/sec) by threadcount (higher is better)'
print tp
print ''

grouped = df.groupby('threadcount')
agg = grouped.agg('mean')
tp = agg['insert-lat']

grouped = df.groupby('threadcount')
agg = grouped.agg('mean')
tp2 = agg['update-lat']