示例#1
0
def main(param_fn):

    # Read new kernel parameters
    param_new = "parameters.json"
    with open(param_new) as f:
        new_kernels = [params_dict_to_kernel(**params) for params in json.load(f)]

    # Read old kernel parameters
    with open(param_fn) as f:
        old_kernels = [params_dict_to_kernel(**params) for params in json.load(f)]

    # Merge two parameter lists
    print("Merging", param_new, "with", param_fn)
    kernels_dict = dict(zip([(k.m, k.n, k.k) for k in old_kernels], old_kernels))
    new_kernels_dict = dict(zip([(k.m, k.n, k.k) for k in new_kernels], new_kernels))
    kernels_dict.update(new_kernels_dict)

    # Write kernel parameters to new file
    new_file = "parameters.new.json"
    with open(new_file, "w") as f:
        s = json.dumps(
            [
                kernels_dict[kernel].as_dict_for_parameters_json
                for kernel in sorted(kernels_dict.keys())
            ]
        )
        s = s.replace("}, ", "},\n")
        s = s.replace("[", "[\n")
        s = s.replace("]", "\n]")
        f.write(s)

    print("Wrote", new_file)
示例#2
0
def find_optimal_kernel(
    mnk, algo, tree, tree_features, gpu_properties, autotuning_properties
):
    """
    Find the optimal kernel parameter set for a given (m, n, k) and a given algorithm
    :return: optimal_kernels: dictionary, keys: (m, n, k), values: Kernel object describing best parameters
    """

    # Get parameter space for this (m, n, k) and this algorithm
    m, n, k = mnk
    parameter_space_ = kernel_algorithm[algo].promising_parameters(
        m, n, k, gpu_properties, autotuning_properties
    )
    parameter_space = pd.DataFrame(parameter_space_)
    del parameter_space_
    parameter_space["algorithm"] = [algo] * len(
        parameter_space.index
    )  # Add "algorithm" column
    if len(parameter_space.index) == 0:
        optimal_kernels = dict()

    else:

        # Get predictor features from raw parameters
        parameter_sets = PredictiveParameters(
            parameter_space, gpu_properties, autotuning_properties, None
        )
        predictors = parameter_sets.get_features(tree_features)
        if algo == "medium":
            predictors = predictors.rename(
                columns=dict(
                    zip(
                        predictors.columns,
                        [
                            "f{}".format(i)
                            for i in range(0, len(predictors.columns) + 1)
                        ],
                    )
                )
            )

        # Predict performances
        performances_scaled = tree.predict(predictors)
        del predictors
        parameter_performances = parameter_sets.params
        del parameter_sets
        parameter_performances["perf"] = performances_scaled
        del performances_scaled

        # Pick optimal kernel
        optimal_kernel = max(
            parameter_performances.to_dict("records"), key=lambda x: x["perf"]
        )
        del parameter_performances
        optimal_kernels = dict()
        optimal_kernels[(m, n, k)] = params_dict_to_kernel(
            **optimal_kernel, source="predicted"
        )

    return optimal_kernels
示例#3
0
def main(params, njobs, baseline, paths_to_models, chunk_size):
    """
    Update parameter file with new optimal parameter predictions given newly trained decision trees
    """
    # ===============================================================================
    # Load GPU and autotuning properties
    assert (os.path.basename(params) in gpu_architectures.keys()
            ), "Cannot find compute version for file " + str(params)
    arch_code = gpu_architectures[os.path.basename(params)]
    with open("../kernels/gpu_properties.json") as f:
        gpu_properties = json.load(f)[arch_code]
    with open("../kernels/autotuning_properties.json") as f:
        autotuning_properties = json.load(f)

    # Load autotuned kernel parameters
    with open(params) as f:
        all_kernels = [
            params_dict_to_kernel(**params) for params in json.load(f)
        ]
    print("libsmm_acc: Found %d existing parameter sets." % len(all_kernels))
    autotuned_mnks = [(k.m, k.n, k.k) for k in all_kernels if k.autotuned]
    autotuned_kernels_ = [k for k in all_kernels if k.autotuned]
    autotuned_kernels = dict(zip(autotuned_mnks, autotuned_kernels_))

    # ===============================================================================
    # Construct the list of (m,n,k)-triplets for which parameter sets should be made available to libcusmm
    mnks = combinations(list(range(4, 46)))
    mnks = set.union(set(mnks), set(autotuned_kernels.keys()))

    # ===============================================================================
    # Compute parameter sets
    mnks_to_predict = list()
    kernels_to_print = dict()
    for m, n, k in mnks:
        if (m, n, k) in autotuned_kernels.keys():
            kernels_to_print[(m, n, k)] = autotuned_kernels[(m, n, k)]
        else:
            mnks_to_predict.append((m, n, k))

    if baseline:
        kernels = get_baseline_kernels(mnks_to_predict, gpu_properties,
                                       autotuning_properties)
    else:
        kernels = get_optimal_kernels(
            mnks_to_predict,
            njobs,
            chunk_size,
            paths_to_models,
            gpu_properties,
            autotuning_properties,
            1,
        )

    kernels_to_print.update(kernels)

    # ===============================================================================
    # Write to file
    with open(params, "w") as f:
        s = json.dumps([
            kernels_to_print[kernel].as_dict_for_parameters_json
            for kernel in sorted(kernels_to_print.keys())
        ])
        s = s.replace("}, ", "},\n")
        s = s.replace("[", "[\n")
        s = s.replace("]", "\n]")
        f.write(s)
    print("Wrote new predicted parameters to file", params)
示例#4
0
def main(
    param_fn,
    compiler,
    cpus_per_node,
    max_num_nodes,
    blocksizes,
    blocks_from_param_file,
    tune_dir,
):

    # Read existing parameters
    assert (os.path.basename(param_fn) in gpu_architectures.keys(
    )), "Cannot find GPU architecture for file " + os.path.basename(param_fn)
    arch_code = gpu_architectures[os.path.basename(param_fn)]
    with open("../kernels/gpu_properties.json") as f:
        gpu_properties = json.load(f)[arch_code]
    with open("../kernels/autotuning_properties.json") as f:
        autotuning_properties = json.load(f)
    with open(param_fn) as f:
        all_kernels = [
            params_dict_to_kernel(**params) for params in json.load(f)
        ]
    print("Reading parameters from %s" % param_fn)
    autotuned_kernels = [k for k in all_kernels if k.autotuned]
    predicted_kernels = [k for k in all_kernels if not k.autotuned]
    print(
        "libsmm_acc: found %d existing parameter sets, of which %d are autotuned and %d are predicted."
        % (len(all_kernels), len(autotuned_kernels), len(predicted_kernels)))

    # Get blocksizes to be autotuned
    if blocks_from_param_file:  # open and read file
        with open(blocksizes) as f:
            all_kernels_ref = [
                params_dict_to_kernel(**params) for params in json.load(f)
            ]
        print("Reading parameters to autotune from %s" % blocksizes)
        triples = [(k.m, k.n, k.k) for k in all_kernels_ref if k.autotuned]
    else:
        assert len(set(blocksizes)) == len(blocksizes)
        blocksizes.sort()
        # Get (m, n, k) triplets to be autotuned
        triples = combinations(*blocksizes)
    print("Requested to autotune %d triplets" % len(triples))

    for (m, n, k) in triples:
        existing = [
            kern for kern in autotuned_kernels if kern.can_handle(m, n, k)
        ]
        if existing:
            print(
                "Found existing autotuned parameter set for %dx%dx%d, skipping."
                % (m, n, k))
            continue

        outdir = os.path.join(tune_dir, "tune_%dx%dx%d/" % (m, n, k))
        if os.path.exists(outdir):
            print("Directory %s exists already, skipping." % outdir)
            continue
        os.mkdir(outdir)
        gen_benchmark(outdir, gpu_properties, autotuning_properties, compiler,
                      m, n, k)
        gen_jobfile(outdir, compiler, m, n, k, cpus_per_node, max_num_nodes)
        gen_makefile(outdir, compiler, arch_code)