Пример #1
0
def build_detail_site(data, label_func, j2_env, linestyles, batch=False):
    for (name, runs) in data.items():
        print("Building '%s'" % name)
        all_runs = runs.keys()
        label = label_func(name)
        data = {"normal": [], "scatter": []}

        for plottype in args.plottype:
            xn, yn = plot_variants[plottype]
            data["normal"].append(
                create_plot(runs, xn, yn, convert_linestyle(linestyles),
                            j2_env))
            if args.scatter:
                data["scatter"].append(
                    create_plot(runs, xn, yn, convert_linestyle(linestyles),
                                j2_env, "Scatterplot ", "bubble"))

        # create png plot for summary page
        data_for_plot = {}
        for k in runs.keys():
            data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps')
        plot.create_plot(
            data_for_plot, False, False, True, 'k-nn', 'qps',
            args.outputdir + get_algorithm_name(name, batch) + ".png",
            linestyles, batch)
        with open(args.outputdir + get_algorithm_name(name, batch) + ".html",
                  "w") as text_file:
            text_file.write(
                j2_env.get_template("detail_page.html").render(title=label,
                                                               plot_data=data,
                                                               args=args,
                                                               batch=batch))
Пример #2
0
def build_detail_site(data, label_func, j2_env, linestyles, batch=False):
    for (name, runs) in data.items():
        print("Building '%s'" % name)
        all_runs = runs.keys()
        label = label_func(name)
        data = {"normal": [], "scatter": []}

        for plottype in args.plottype:
            xn, yn = plot_variants[plottype]
            data["normal"].append(create_plot(
                runs, xn, yn, convert_linestyle(linestyles), j2_env))
            if args.scatter:
                data["scatter"].append(
                    create_plot(runs, xn, yn, convert_linestyle(linestyles),
                                j2_env, "Scatterplot ", "bubble"))

        # create png plot for summary page
        data_for_plot = {}
        for k in runs.keys():
            data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps')
        plot.create_plot(
            data_for_plot, False,
            False, True, 'k-nn', 'qps',
            args.outputdir + get_algorithm_name(name, batch) + ".png",
            linestyles, batch)
        output_path = "".join([args.outputdir,
                               get_algorithm_name(name, batch),
                               ".html"])
        with open(output_path, "w") as text_file:
            text_file.write(j2_env.get_template("detail_page.html").
                            render(title=label, plot_data=data,
                                   args=args, batch=batch))
Пример #3
0
def run(definition, dataset, count, run_count, batch):
    algo = instantiate_algorithm(definition)
    assert not definition.query_argument_groups \
        or hasattr(algo, "set_query_arguments"), """\
error: query argument groups have been specified for %s.%s(%s), but the \
algorithm instantiated from it does not implement the set_query_arguments \
function""" % (definition.module, definition.constructor, definition.arguments)

    D = get_dataset(dataset)
    X_train = numpy.array(D['train'])
    X_test = numpy.array(D['test'])
    distance = D.attrs['distance']
    print("type D: ", type(D))
    print("type x_train: ", type(X_train))
    print("type x_test: ", type(X_test))
    print("type distance: ", type(distance))
    print('got a train set of size (%d * %d)' % X_train.shape)
    print('got %d queries' % len(X_test))

    X_train = dataset_transform[distance](X_train)
    X_test = dataset_transform[distance](X_test)

    try:
        prepared_queries = False
        if hasattr(algo, "supports_prepared_queries"):
            prepared_queries = algo.supports_prepared_queries()

        t0 = time.time()
        memory_usage_before = algo.get_memory_usage()
        algo.fit(X_train)
        build_time = time.time() - t0
        index_size = algo.get_memory_usage() - memory_usage_before
        print('Built index in', build_time)
        print('Index size: ', index_size)

        query_argument_groups = definition.query_argument_groups
        # Make sure that algorithms with no query argument groups still get run
        # once by providing them with a single, empty, harmless group
        if not query_argument_groups:
            query_argument_groups = [[]]

        for pos, query_arguments in enumerate(query_argument_groups, 1):
            print("Running query argument group %d of %d..." %
                  (pos, len(query_argument_groups)))
            if query_arguments:
                algo.set_query_arguments(*query_arguments)
            descriptor, results = run_individual_query(algo, X_train, X_test,
                                                       distance, count,
                                                       run_count, batch)
            descriptor["build_time"] = build_time
            descriptor["index_size"] = index_size
            descriptor["algo"] = get_algorithm_name(definition.algorithm,
                                                    batch)
            descriptor["dataset"] = dataset
            store_results(dataset, count, definition, query_arguments,
                          descriptor, results, batch)
    finally:
        algo.done()
Пример #4
0
def run(definition, dataset, count, run_count, batch):
    algo = instantiate_algorithm(definition)
    assert not definition.query_argument_groups \
        or hasattr(algo, "set_query_arguments"), """\
error: query argument groups have been specified for %s.%s(%s), but the \
algorithm instantiated from it does not implement the set_query_arguments \
function""" % (definition.module, definition.constructor, definition.arguments)

    D = get_dataset(dataset)
    X_train = numpy.array(D['train'])
    X_test = numpy.array(D['test'])
    distance = D.attrs['distance']
    print('got a train set of size (%d * %d)' % X_train.shape)
    print('got %d queries' % len(X_test))

    try:
        prepared_queries = False
        if hasattr(algo, "supports_prepared_queries"):
            prepared_queries = algo.supports_prepared_queries()

        t0 = time.time()
        memory_usage_before = algo.get_memory_usage()
        algo.fit(X_train)
        build_time = time.time() - t0
        index_size = algo.get_memory_usage() - memory_usage_before
        print('Built index in', build_time)
        print('Index size: ', index_size)

        query_argument_groups = definition.query_argument_groups
        # Make sure that algorithms with no query argument groups still get run
        # once by providing them with a single, empty, harmless group
        if not query_argument_groups:
            query_argument_groups = [[]]

        for pos, query_arguments in enumerate(query_argument_groups, 1):
            print("Running query argument group %d of %d..." %
                  (pos, len(query_argument_groups)))
            if query_arguments:
                algo.set_query_arguments(*query_arguments)
            descriptor, results = run_individual_query(
                algo, X_train, X_test, distance, count, run_count, batch)
            descriptor["build_time"] = build_time
            descriptor["index_size"] = index_size
            descriptor["algo"] = get_algorithm_name(
                definition.algorithm, batch)
            descriptor["dataset"] = dataset
            store_results(dataset, count, definition,
                          query_arguments, descriptor, results, batch)
    finally:
        algo.done()
Пример #5
0
def run(definition, dataset, count, run_count, batch):
    algo = instantiate_algorithm(definition)
    assert not definition.query_argument_groups \
            or hasattr(algo, "set_query_arguments"), """\
error: query argument groups have been specified for %s.%s(%s), but the \
algorithm instantiated from it does not implement the set_query_arguments \
function""" % (definition.module, definition.constructor, definition.arguments)

    D = get_dataset(dataset)
    X_train = numpy.array(D['train'])
    X_test = numpy.array(D['test'])
    if algo.builds_graph():
        # Test data first to avoid converting test set index to graph index
        X_train = numpy.concatenate((X_test, X_train))
        # The protocol expects the count to be given at query time, so it has
        # to be set as a parameter beforehand.
        algo.set_count(count)
    distance = D.attrs['distance']
    print('got a train set of size (%d * %d)' % X_train.shape)
    print('got %d queries' % len(X_test))

    try:
        prepared_queries = False
        if hasattr(algo, "supports_prepared_queries"):
            prepared_queries = algo.supports_prepared_queries()

        t0 = time.time()
        memory_usage_before = algo.get_memory_usage()
        algo.fit(X_train)

        build_time = time.time() - t0
        index_size = algo.get_memory_usage() - memory_usage_before
        print('Built index in', build_time)
        print('Index size: ', index_size)

        query_argument_groups = definition.query_argument_groups
        # Make sure that algorithms with no query argument groups still get run
        # once by providing them with a single, empty, harmless group
        if not query_argument_groups:
            query_argument_groups = [[]]

        for pos, query_arguments in enumerate(query_argument_groups, 1):
            print("Running query argument group %d of %d..." %
                    (pos, len(query_argument_groups)))
            if query_arguments:
                algo.set_query_arguments(*query_arguments)
            if algo.builds_graph():
                descriptor, results = check_graph(algo, X_train, X_test, distance, count)
            else:
                descriptor, results = run_individual_query(algo, X_train, X_test,
                    distance, count, run_count, batch)
            descriptor["build_time"] = build_time
            descriptor["index_size"] = index_size
            descriptor["algo"] = get_algorithm_name(definition.algorithm, batch)
            descriptor["dataset"] = dataset
            descriptor["count"] = int(count)
            descriptor["batch_mode"] = batch
            store_results(dataset, count, definition,
                    query_arguments, descriptor, results, batch)
    finally:
        algo.done()