def benchmark_table(self): """Benchmark table creation from different formats.""" for name in ("df", "dict", "records"): data = getattr(self, name) test_meta = make_meta("table", name) func = Benchmark(lambda: Table(data), meta=test_meta) setattr(self, "table_{0}".format(name), func)
def benchmark_table_arrow(self): """Benchmark table from arrow separately as it requires opening the Arrow file from the filesystem.""" with open(SUPERSTORE_ARROW, "rb") as arrow: data = arrow.read() test_meta = make_meta("table", "arrow") func = Benchmark(lambda: Table(data), meta=test_meta) setattr(self, "table_arrow", func)
def benchmark_view_one(self): """Benchmark view creation with different pivots.""" for pivot in PerspectiveBenchmark.ROW_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta("view", "one_{0}_pivot".format(len(pivot))) view_constructor = partial(self._table.view, row_pivots=pivot) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_view_two(self): """Benchmark view creation with row and column pivots.""" for i in range(len(PerspectiveBenchmark.ROW_PIVOT_OPTIONS)): RP = PerspectiveBenchmark.ROW_PIVOT_OPTIONS[i] CP = PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta("view", "two_{0}x{1}_pivot".format(len(RP), len(CP))) view_constructor = partial( self._table.view, row_pivots=RP, column_pivots=CP ) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_view_one_df_updates(self): """Benchmark dataframe updates for one-sided views.""" table = Table(self._df_schema) views = [table.view(row_pivots=["State", "City"]) for i in range(25)] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "one_df")) setattr(self, "update_one_df", func)
def benchmark_to_format_two_column_only(self): """Benchmark each `to_format` method for two-sided column-only contexts.""" for name in ("dict", "records", "df", "arrow"): for pivot in PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta( "to_format", "{0}_{1}_column".format(name, len(pivot))) view = self._table.view(column_pivots=pivot) func = Benchmark(lambda: getattr(view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func)
def benchmark_view_two(self): """Benchmark view creation with row and Split By.""" for i in range(len(PerspectiveBenchmark.group_by_OPTIONS)): RP = PerspectiveBenchmark.group_by_OPTIONS[i] CP = PerspectiveBenchmark.split_by_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta("view", "two_{0}x{1}_pivot".format(len(RP), len(CP))) view_constructor = partial(self._table.view, group_by=RP, split_by=CP) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_to_format_two(self): """Benchmark each `to_format` method for two-sided contexts.""" for name in ("numpy", "dict", "records", "df", "arrow"): for i in range(len(PerspectiveBenchmark.ROW_PIVOT_OPTIONS)): RP = PerspectiveBenchmark.ROW_PIVOT_OPTIONS[i] CP = PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta( "to_format", "{0}_{1}x{2}".format(name, len(RP), len(CP))) view = self._table.view(row_pivots=RP, column_pivots=CP) func = Benchmark(lambda: getattr(view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func)
def benchmark_view_one_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 25 views.""" table = Table(self._schema) views = [table.view(row_pivots=["State", "City"]) for i in range(25)] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "one")) setattr(self, "update_one", func)
def benchmark_view_two_column_only_df_updates(self): """Benchmark dataframe updates for two-sided column only views.""" table = Table(self._df_schema) views = [ table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only_df")) setattr(self, "update_two_column_only_df", func)
def benchmark_view_zero_df_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 10 views. This version updates using dataframes, and is designed to compare the overhead of dataframe loading vs. regular data structure loading.""" table = Table(self._df_schema) views = [table.view() for i in range(25)] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "zero_df")) setattr(self, "update_zero_df", func)
def benchmark_view_two_column_only_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 25 views.""" table = Table(self._schema) views = [ table.view(split_by=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only")) setattr(self, "update_two_column_only", func)
def benchmark_view_zero(self): """Benchmark view creation with zero pivots.""" func = Benchmark(lambda: self._table.view(), meta=make_meta("view", "zero")) setattr(self, "view_zero", func)
(1, 2**10), (1, 2**13), (1, 2**16), (1, 2**19), #~ (1, 2**18), #~ (1, 2**20), ] """ # ----------------------------------- if type(Wname) is str: Wname = [Wname] bench = Benchmark() #thetitle = str("%s (%s)" % (what_to_params[what]["name"], wname)) thetitle = str("%s" % what_to_params[what]["name"]) bench.new_figure(thetitle, xlabel="Number of points", ylabel="Time (ms)", xlog=True, ylog=True, xlims=(1.1e-2, 1), ylims=(1e-2, 1)) markers = ["o--", "o-."] for i,wname in enumerate(Wname): bench.new_curve("pywt: " + wname, marker=markers[i]) bench.new_curve("PDWT: " + wname, marker=markers[i]) leg = bench.legend() leg.draggable() results_pywt = [] results_pypwt = []