def main():
    logging.getLogger().setLevel(logging.INFO)
    args = parser.parse_args()
    directory = args.dir
    epoch = args.epoch
    problem = inlining_tree.Problem.load(directory)
    hyperparams = learn_problem.load_hyperparams(directory)

    id_to_tree_path = {
        v: k
        for k, v in problem.properties.tree_path_to_ids.items()
    }
    num_vertices = len(problem.properties.tree_path_to_ids)
    num_runs = len(problem.execution_times)

    time_average = np.mean(problem.execution_times)
    execution_times = problem.execution_times
    reference_benefit = learn_problem.ALPHA * np.log(
        execution_times / time_average)

    X_estimate = load_estimates(directory, args.epoch)
    problem_matrices = learn_problem.construct_problem_matrices(
        problem, hyperparams=hyperparams)
    # X_estimate = problem_matrices.participation_mask * X_estimate

    objective_tensors = learn_problem.construct_objective(
        reference_benefit=reference_benefit,
        benefit_relations=problem_matrices.benefit_relations,
        participation_mask=problem_matrices.participation_mask,
        X_init=tf.constant_initializer(X_estimate),
        hyperparams=hyperparams)

    print "Hyperparameters =", hyperparams
    print(">>>>>>> Solution after %d epoch <<<<<<<<" % epoch)

    if args.components:
        participation_counts = np.sum(problem_matrices.participation_mask,
                                      axis=0)
        arr = []
        for i in range(num_vertices):
            path = id_to_tree_path[i]
            lhs = olaf(X_estimate[:, i * 2], participation_counts[i * 2])
            rhs = olaf(X_estimate[:, i * 2 + 1],
                       participation_counts[i * 2 + 1])
            arr.append((path, lhs, rhs))

        arr.sort(key=lambda (a, b, c): a)
        lhs_size = max(len(x) for (_, x, _) in arr) + 1
        rhs_size = max(len(x) for (_, _, x) in arr) + 1

        for (path, lhs, rhs) in arr:
            print lhs,
            print(" " * (lhs_size - len(lhs))),
            print " | ",
            print rhs,
            print(" " * (lhs_size - len(rhs))),
            print " |", str(path)

    elif args.opt_info:
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            loss, benefit_loss, variance_loss = sess.run([
                objective_tensors.loss,
                objective_tensors.benefit_loss,
                objective_tensors.variance_loss,
            ],
                                                         feed_dict={})

            print("- Loss = %.6f" % loss)
            print("- Benefit loss = %.6f" % benefit_loss)
            print("- Variance loss = %.6f" % variance_loss)

    else:
        parser.print_help()
Пример #2
0
def run(args):
    logging.getLogger().setLevel(logging.INFO)
    args = parser.parse_args(args)
    problem_directory = args.directory
    logging.info("Loading problem definition ...")
    hyperparams = HyperParameters(
            decay_factor=args.decay_factor,
            benefit_function=args.benefit_function,
            lasso_factor=None)
    assert args.skip_normalisation
    experiment_name = "lasso"
    exp_directory = os.path.join(
            problem_directory, experiment_name, hyperparams.directory_name())

    if os.path.exists(os.path.join(exp_directory, "contributions.npy")) and not args.force:
        logging.info("A solution already exist for %s/%s/%s! Pass --force to recompute"
                % (problem_directory, experiment_name, hyperparams.directory_name()))
        return

    problem = inlining_tree.Problem.load(problem_directory)
    execution_times = problem.execution_times

    if not os.path.exists(exp_directory):
        os.makedirs(exp_directory)

    normalise_with_num_children = not args.skip_normalisation
    problem_matrices = learn_problem.construct_problem_matrices(
            problem, hyperparams, normalise_with_num_children)

    target_benefit = construct_benefit_from_exec_time(
            args.benefit_function, problem)
    num_features = problem_matrices.benefit_relations.shape[1]

    logging.info("Computing analytical solution for %s." % (experiment_name))
    logging.info("  decay factor = %.6f" % (args.decay_factor))
    logging.info("  benefit function = %s" % args.benefit_function)

    A = problem_matrices.benefit_relations

    search_log = open(os.path.join(exp_directory, "auto_search_log.csv"), "w")
    print "Logging to", os.path.join(exp_directory, "auto_search_log.csv")
    wrt = csv.writer(search_log)
    wrt.writerow(["alpha", "r_squared", "sum(abs(w))", "sum(abs(w) > 0.0)"])

    ctr = 0
    lo = 0.0
    hi = 1.0

    m = int(0.2 * len(A))
    print "Number of entries in validation set =", m
    print "Number of variables in linear eqn =", A.shape[1]

    A_validation = A[:m, :]
    target_benefit_validation = target_benefit[:m]
    A_train = A[m:,:]
    target_benefit_train = target_benefit[m:]

    best_lambda = None
    best_w = None
    best_validation_r_squared = -np.inf


    while (hi - lo) > 1e-6:
        mid = (hi + lo) / 2.0
        lambda_ = mid

        model = sklearn.linear_model.Lasso(alpha=lambda_, fit_intercept=False)
        model.fit(A_train, target_benefit_train)

        w = model.coef_
        print "lambda =", mid
        if len(model.sparse_coef_.indices) > 0:
            sparse_w = model.sparse_coef_
            w_abs = abs(w[sparse_w.indices])
            print w_abs.shape
            print "- mean =", np.mean(w_abs)
            print "- min =", np.min(w_abs)
            print "- max =", np.max(w_abs)
            print "- median =", np.median(w_abs)
        else:
            print "- <skip>"

        assert w.shape == (A_train.shape[1],)
        r_squared = model.score(A_train, target_benefit_train)
        validation_r_squared = model.score(A_validation, target_benefit_validation)

        if validation_r_squared > best_validation_r_squared:
            best_validation_r_squared = validation_r_squared
            best_w = w
            best_lambda = lambda_

        hi = mid
        if r_squared < 0.05:
            continue
        ctr += 1

        search_log.flush()
        row = [lambda_, r_squared, validation_r_squared]
        wrt.writerow([str(x) for x in row])

    hyperparams = HyperParameters(
            decay_factor=args.decay_factor,
            benefit_function=args.benefit_function,
            lasso_factor=None)

    logging.info("Found analytical solution for %s, saving to %s! Best lasso factor is %s"
            % (hyperparams.directory_name(), exp_directory, str(best_lambda)))

    logging.info("Retraining with all data")
    model = sklearn.linear_model.Lasso(
            alpha=best_lambda, fit_intercept=False, max_iter=5000, tol=1e-10)
    model.fit(A, target_benefit)
    print "Best validataion r squared", best_validation_r_squared

    try:
        os.makedirs(exp_directory)
    except:
        pass
    with open(os.path.join(exp_directory, "lambda.txt"), "wb") as f:
        f.write(str(best_lambda))
    with open(os.path.join(exp_directory, "hyperparams.pkl"), "wb") as f:
        pickle.dump(hyperparams, f)
    with open(os.path.join(exp_directory, "contributions.npy"), "wb") as f:
        np.save(f, model.coef_)
Пример #3
0
def run(argv):
    logging.getLogger().setLevel(logging.INFO)
    args = parser.parse_args(argv)
    hyperparams_path = os.path.join(args.experiment_dir, "hyperparams.pkl")

    problem = inlining_tree.Problem.load(args.problem_dir)
    with open(hyperparams_path, "rb") as f:
        hyperparams = pickle.load(f)

    normalise_with_num_children = not args.skip_normalisation
    problem_matrices = learn_problem.construct_problem_matrices(
        problem,
        hyperparams,
        normalise_with_num_children=normalise_with_num_children)
    target_benefit = learn_linear_general_reward.construct_benefit_from_exec_time(
        hyperparams.benefit_function, problem)
    num_nodes = problem_matrices.participation_mask.shape[1] / 2
    participation_count = np.sum(problem_matrices.participation_mask, axis=0)
    w = np.load(os.path.join(args.experiment_dir, "contributions.npy"))

    def fill_node_values(node):
        node_id = node.name
        if participation_count[node_id * 2] > 0:
            lhs = w[node_id * 2]
        else:
            lhs = None
        if participation_count[node_id * 2 + 1] > 0:
            rhs = w[node_id * 2 + 1]
        else:
            rhs = None
        return (node.name, (lhs, rhs))

    def rename_id_to_path(node):
        return (id_to_tree_path[node.name], node.value)

    if args.opt_info:
        A = problem_matrices.benefit_relations
        squared_errors = np.power(target_benefit - np.matmul(A, w), 2)
        mse = np.mean(squared_errors)
        projected_benefits = np.matmul(A, w)

        print "Mean squared error:", mse
        print "Mimimum projected:", min(projected_benefits)
        print "Maximum projected:", max(projected_benefits)
        print "Mimimum error:", min(squared_errors)
        print "Maximum error:", max(squared_errors)

        obtained = np.matmul(A, w)
        target = target_benefit

    elif args.dump_rewards:
        A = problem_matrices.benefit_relations
        adjacency_list = inlining_tree.adjacency_list_from_edge_lists(
            num_nodes=num_nodes, edge_lists=problem.edges_lists)
        tree_path_to_ids = problem.properties.tree_path_to_ids
        id_to_tree_path = {v: k for k, v in tree_path_to_ids.iteritems()}
        root = tree_path_to_ids[inlining_tree.Absolute_path([])]
        tree = inlining_tree.build_from_adjacency_list([None] * num_nodes,
                                                       root, adjacency_list)
        tree = tree.map(f=fill_node_values)
        tree = tree.map(f=rename_id_to_path)
        record_path_long_term_rewards = {}
        (optimal_tree, value) = build_optimal_tree(
            tree,
            hyperparams,
            normalise_with_num_children,
            record_path_long_term_rewards=record_path_long_term_rewards)

        arr = []
        for i in range(num_nodes):
            if participation_count[2 * i] > 0:
                long_term = record_path_long_term_rewards[id_to_tree_path[i]]
                inline_reward = [[["immediate", w[2 * i]],
                                  ["long_term", long_term]]]
            else:
                inline_reward = []

            if participation_count[2 * i + 1] > 0:
                no_inline_reward = w[2 * i + 1]
            else:
                no_inline_reward = None

            no_inline_reward = inlining_tree.sexp_of_option(no_inline_reward,
                                                            f=str)

            arr.append([
                ["path", id_to_tree_path[i].to_sexp()],
                ["inline_reward", inline_reward],
                ["no_inline_reward", no_inline_reward],
            ])
        print sexpdata.dumps(arr)

    elif args.inspect_rewards:
        A = problem_matrices.benefit_relations
        adjacency_list = inlining_tree.adjacency_list_from_edge_lists(
            num_nodes=num_nodes, edge_lists=problem.edges_lists)
        tree_path_to_ids = problem.properties.tree_path_to_ids
        id_to_tree_path = {v: k for k, v in tree_path_to_ids.iteritems()}
        root = tree_path_to_ids[inlining_tree.Absolute_path([])]
        tree = inlining_tree.build_from_adjacency_list([None] * num_nodes,
                                                       root, adjacency_list)
        tree = tree.map(f=fill_node_values)
        tree = tree.map(f=rename_id_to_path)
        print_tree(tree)

    elif args.optimal_decision:

        tree_path_to_ids = problem.properties.tree_path_to_ids
        id_to_tree_path = {v: k for k, v in tree_path_to_ids.iteritems()}
        adjacency_list = inlining_tree.adjacency_list_from_edge_lists(
            num_nodes=num_nodes, edge_lists=problem.edges_lists)
        root = tree_path_to_ids[inlining_tree.Absolute_path([])]
        tree = inlining_tree.build_from_adjacency_list([None] * num_nodes,
                                                       root, adjacency_list)
        tree = tree.map(f=fill_node_values)
        tree = tree.map(f=rename_id_to_path)
        (optimal_tree,
         value) = build_optimal_tree(tree,
                                     hyperparams,
                                     normalise_with_num_children,
                                     record_path_long_term_rewards={})
        sexp_optimal_tree = inlining_tree.sexp_of_top_level(optimal_tree)
        logging.info("Optimal decision has a value of %f" % value)
        sexp_buffer = StringIO.StringIO()
        sexp_utils.dump_without_quotes(sexp_buffer, sexp_optimal_tree)
        with open(args.output, "w") as f:
            f.write(sexp_buffer.getvalue())

    elif args.inspect_run is not None:
        index = args.inspect_run

        adjacency_list = inlining_tree.adjacency_list_from_edge_lists(
            num_nodes=num_nodes, edge_lists=problem.edges_lists)
        tree_path_to_ids = problem.properties.tree_path_to_ids
        id_to_tree_path = {v: k for k, v in tree_path_to_ids.iteritems()}

        A = problem_matrices.benefit_relations
        target_benefit = target_benefit[index]
        projected_benefit = np.matmul(A, w)[index]
        participation_mask = problem_matrices.participation_mask[index, :]
        assert participation_mask.shape == (num_nodes * 2, )
        visited = set()
        projected_benefit_with_dfs = project_benefit_tree(
            root=tree_path_to_ids[inlining_tree.Absolute_path([])],
            hyperparams=hyperparams,
            adjacency_list=adjacency_list,
            id_to_tree_path=id_to_tree_path,
            contributions=w,
            mask=participation_mask,
            normalise_with_num_children=normalise_with_num_children,
            visited=visited)

        visited_count = 0
        for i in range(num_nodes):
            if participation_mask[i * 2] or participation_mask[i * 2 + 1]:
                visited_count += 1
                if i not in visited:
                    print "DID NOT VISIT", i, id_to_tree_path[i]

        print "--- Information on run %d ---" % index
        print "Execution directory =", problem.execution_directories[index]
        print "Target benefit =", target_benefit
        print "Execution time =", problem.execution_times[index]
        print "Projected benefit (with matmul) =", projected_benefit
        print "Projected benefit (with DFS) =", projected_benefit_with_dfs
        print "Number of visited nodes =", visited_count
        print "Number of nodes in problem =", num_nodes

        adjacency_list = []
        for _ in range(num_nodes):
            adjacency_list.append(set())
        for edge in problem.edges_lists[index]:
            adjacency_list[edge[0]].add((edge[1]))
        bfs_edge_list(adjacency_list, id_to_tree_path)

    else:
        assert False
Пример #4
0
def run(args):
    logging.getLogger().setLevel(logging.INFO)
    args = parser.parse_args(args)
    problem_directory = args.directory
    logging.info("Loading problem definition ...")
    hyperparams = HyperParameters(decay_factor=args.decay_factor,
                                  benefit_function=args.benefit_function,
                                  lasso_factor=None)
    assert args.skip_normalisation
    experiment_name = "lasso"
    exp_directory = os.path.join(problem_directory, experiment_name,
                                 hyperparams.directory_name())

    if os.path.exists(os.path.join(exp_directory,
                                   "contributions.npy")) and not args.force:
        logging.info(
            "A solution already exist for %s/%s/%s! Pass --force to recompute"
            %
            (problem_directory, experiment_name, hyperparams.directory_name()))
        return

    problem = inlining_tree.Problem.load(problem_directory)
    execution_times = problem.execution_times

    if not os.path.exists(exp_directory):
        os.makedirs(exp_directory)

    normalise_with_num_children = not args.skip_normalisation
    problem_matrices = learn_problem.construct_problem_matrices(
        problem, hyperparams, normalise_with_num_children)

    target_benefit = construct_benefit_from_exec_time(args.benefit_function,
                                                      problem)
    num_features = problem_matrices.benefit_relations.shape[1]

    logging.info("Computing analytical solution for %s." % (experiment_name))
    logging.info("  decay factor = %.6f" % (args.decay_factor))
    logging.info("  benefit function = %s" % args.benefit_function)

    A = problem_matrices.benefit_relations

    search_log = open(os.path.join(exp_directory, "search_log.csv"), "w")
    print "Logging to", os.path.join(exp_directory, "search_log.csv")
    wrt = csv.writer(search_log)
    wrt.writerow([
        "alpha", "r_squared", "r_squared_validation", "sum(abs(w))",
        "sum(abs(w) > 0.0)"
    ])

    ctr = 0
    lo = 0.0
    hi = 1.0

    m = int(0.2 * len(A))
    print "Number of entries in validation set =", m
    print "Number of variables in linear eqn =", A.shape[1]

    A_validation = A[:m, :]
    target_benefit_validation = target_benefit[:m]
    A_train = A[m:, :]
    target_benefit_train = target_benefit[m:]

    while (hi - lo) > 1e-7:
        mid = (hi + lo) / 2.0
        lambda_ = mid

        model = sklearn.linear_model.Lasso(alpha=lambda_, fit_intercept=False)
        # model = gpu_shit.Lasso(alpha=lambda_, fit_intercept=False)
        model.fit(A_train, target_benefit_train)

        w = model.coef_
        assert w.shape == (A_train.shape[1], )
        r_squared = model.score(A_train, target_benefit_train)
        validation_r_squared = model.score(A_validation,
                                           target_benefit_validation)

        row = [
            lambda_, r_squared, validation_r_squared,
            np.sum(abs(w)),
            np.sum(abs(w) > 0.000000000001)
        ]
        # print lambda_, r_squared, np.sum(abs(w)), np.sum(abs(w) > 0.000000000001), validation_r_squared

        hi = mid
        if r_squared < 0.05:
            continue
        ctr += 1

        search_log.flush()
        wrt.writerow([str(x) for x in row])

        hyperparams = HyperParameters(decay_factor=args.decay_factor,
                                      benefit_function=args.benefit_function,
                                      lasso_factor=lambda_)

        sub_exp_directory = os.path.join(problem_directory, "lasso-with-alpha",
                                         hyperparams.directory_name())
        logging.info("Found analytical solution for %s, saving to %s!" %
                     (hyperparams.directory_name(), sub_exp_directory))
        try:
            os.makedirs(sub_exp_directory)
        except:
            pass
        with open(os.path.join(sub_exp_directory, "hyperparams.pkl"),
                  "wb") as f:
            pickle.dump(hyperparams, f)
        with open(os.path.join(sub_exp_directory, "contributions.npy"),
                  "wb") as f:
            np.save(f, w)
        if ctr > 10:
            break

    search_log.close()