def build_detail_site(data, label_func, j2_env, linestyles, batch=False): for (name, runs) in data.items(): print("Building '%s'" % name) all_runs = runs.keys() label = label_func(name) data = {"normal": [], "scatter": []} for plottype in args.plottype: xn, yn = plot_variants[plottype] data["normal"].append( create_plot(runs, xn, yn, convert_linestyle(linestyles), j2_env)) if args.scatter: data["scatter"].append( create_plot(runs, xn, yn, convert_linestyle(linestyles), j2_env, "Scatterplot ", "bubble")) # create png plot for summary page data_for_plot = {} for k in runs.keys(): data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps') plot.create_plot( data_for_plot, False, False, True, 'k-nn', 'qps', args.outputdir + get_algorithm_name(name, batch) + ".png", linestyles, batch) with open(args.outputdir + get_algorithm_name(name, batch) + ".html", "w") as text_file: text_file.write( j2_env.get_template("detail_page.html").render(title=label, plot_data=data, args=args, batch=batch))
def build_detail_site(data, label_func, j2_env, linestyles, batch=False): for (name, runs) in data.items(): print("Building '%s'" % name) all_runs = runs.keys() label = label_func(name) data = {"normal": [], "scatter": []} for plottype in args.plottype: xn, yn = plot_variants[plottype] data["normal"].append(create_plot( runs, xn, yn, convert_linestyle(linestyles), j2_env)) if args.scatter: data["scatter"].append( create_plot(runs, xn, yn, convert_linestyle(linestyles), j2_env, "Scatterplot ", "bubble")) # create png plot for summary page data_for_plot = {} for k in runs.keys(): data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps') plot.create_plot( data_for_plot, False, False, True, 'k-nn', 'qps', args.outputdir + get_algorithm_name(name, batch) + ".png", linestyles, batch) output_path = "".join([args.outputdir, get_algorithm_name(name, batch), ".html"]) with open(output_path, "w") as text_file: text_file.write(j2_env.get_template("detail_page.html"). render(title=label, plot_data=data, args=args, batch=batch))
def run(definition, dataset, count, run_count, batch): algo = instantiate_algorithm(definition) assert not definition.query_argument_groups \ or hasattr(algo, "set_query_arguments"), """\ error: query argument groups have been specified for %s.%s(%s), but the \ algorithm instantiated from it does not implement the set_query_arguments \ function""" % (definition.module, definition.constructor, definition.arguments) D = get_dataset(dataset) X_train = numpy.array(D['train']) X_test = numpy.array(D['test']) distance = D.attrs['distance'] print("type D: ", type(D)) print("type x_train: ", type(X_train)) print("type x_test: ", type(X_test)) print("type distance: ", type(distance)) print('got a train set of size (%d * %d)' % X_train.shape) print('got %d queries' % len(X_test)) X_train = dataset_transform[distance](X_train) X_test = dataset_transform[distance](X_test) try: prepared_queries = False if hasattr(algo, "supports_prepared_queries"): prepared_queries = algo.supports_prepared_queries() t0 = time.time() memory_usage_before = algo.get_memory_usage() algo.fit(X_train) build_time = time.time() - t0 index_size = algo.get_memory_usage() - memory_usage_before print('Built index in', build_time) print('Index size: ', index_size) query_argument_groups = definition.query_argument_groups # Make sure that algorithms with no query argument groups still get run # once by providing them with a single, empty, harmless group if not query_argument_groups: query_argument_groups = [[]] for pos, query_arguments in enumerate(query_argument_groups, 1): print("Running query argument group %d of %d..." % (pos, len(query_argument_groups))) if query_arguments: algo.set_query_arguments(*query_arguments) descriptor, results = run_individual_query(algo, X_train, X_test, distance, count, run_count, batch) descriptor["build_time"] = build_time descriptor["index_size"] = index_size descriptor["algo"] = get_algorithm_name(definition.algorithm, batch) descriptor["dataset"] = dataset store_results(dataset, count, definition, query_arguments, descriptor, results, batch) finally: algo.done()
def run(definition, dataset, count, run_count, batch): algo = instantiate_algorithm(definition) assert not definition.query_argument_groups \ or hasattr(algo, "set_query_arguments"), """\ error: query argument groups have been specified for %s.%s(%s), but the \ algorithm instantiated from it does not implement the set_query_arguments \ function""" % (definition.module, definition.constructor, definition.arguments) D = get_dataset(dataset) X_train = numpy.array(D['train']) X_test = numpy.array(D['test']) distance = D.attrs['distance'] print('got a train set of size (%d * %d)' % X_train.shape) print('got %d queries' % len(X_test)) try: prepared_queries = False if hasattr(algo, "supports_prepared_queries"): prepared_queries = algo.supports_prepared_queries() t0 = time.time() memory_usage_before = algo.get_memory_usage() algo.fit(X_train) build_time = time.time() - t0 index_size = algo.get_memory_usage() - memory_usage_before print('Built index in', build_time) print('Index size: ', index_size) query_argument_groups = definition.query_argument_groups # Make sure that algorithms with no query argument groups still get run # once by providing them with a single, empty, harmless group if not query_argument_groups: query_argument_groups = [[]] for pos, query_arguments in enumerate(query_argument_groups, 1): print("Running query argument group %d of %d..." % (pos, len(query_argument_groups))) if query_arguments: algo.set_query_arguments(*query_arguments) descriptor, results = run_individual_query( algo, X_train, X_test, distance, count, run_count, batch) descriptor["build_time"] = build_time descriptor["index_size"] = index_size descriptor["algo"] = get_algorithm_name( definition.algorithm, batch) descriptor["dataset"] = dataset store_results(dataset, count, definition, query_arguments, descriptor, results, batch) finally: algo.done()
def run(definition, dataset, count, run_count, batch): algo = instantiate_algorithm(definition) assert not definition.query_argument_groups \ or hasattr(algo, "set_query_arguments"), """\ error: query argument groups have been specified for %s.%s(%s), but the \ algorithm instantiated from it does not implement the set_query_arguments \ function""" % (definition.module, definition.constructor, definition.arguments) D = get_dataset(dataset) X_train = numpy.array(D['train']) X_test = numpy.array(D['test']) if algo.builds_graph(): # Test data first to avoid converting test set index to graph index X_train = numpy.concatenate((X_test, X_train)) # The protocol expects the count to be given at query time, so it has # to be set as a parameter beforehand. algo.set_count(count) distance = D.attrs['distance'] print('got a train set of size (%d * %d)' % X_train.shape) print('got %d queries' % len(X_test)) try: prepared_queries = False if hasattr(algo, "supports_prepared_queries"): prepared_queries = algo.supports_prepared_queries() t0 = time.time() memory_usage_before = algo.get_memory_usage() algo.fit(X_train) build_time = time.time() - t0 index_size = algo.get_memory_usage() - memory_usage_before print('Built index in', build_time) print('Index size: ', index_size) query_argument_groups = definition.query_argument_groups # Make sure that algorithms with no query argument groups still get run # once by providing them with a single, empty, harmless group if not query_argument_groups: query_argument_groups = [[]] for pos, query_arguments in enumerate(query_argument_groups, 1): print("Running query argument group %d of %d..." % (pos, len(query_argument_groups))) if query_arguments: algo.set_query_arguments(*query_arguments) if algo.builds_graph(): descriptor, results = check_graph(algo, X_train, X_test, distance, count) else: descriptor, results = run_individual_query(algo, X_train, X_test, distance, count, run_count, batch) descriptor["build_time"] = build_time descriptor["index_size"] = index_size descriptor["algo"] = get_algorithm_name(definition.algorithm, batch) descriptor["dataset"] = dataset descriptor["count"] = int(count) descriptor["batch_mode"] = batch store_results(dataset, count, definition, query_arguments, descriptor, results, batch) finally: algo.done()