Exemplo n.º 1
0
def main(
    out_path,
    portfolio_name,
    train_bundle,
    test_bundle,
    single = None,
    workers = 0,
    local = False,
    ):
    """Simulate portfolio and solver behavior."""

    # generate jobs
    def yield_runs():
        train_data = borg.storage.RunData.from_bundle(train_bundle)
        test_data = borg.storage.RunData.from_bundle(test_bundle)

        if portfolio_name == "-":
            if single is None:
                makers = map(SolverMaker, train_data.solver_names)
            else:
                makers = map(SolverMaker, [single])
        else:
            makers = [PortfolioMaker(portfolio_name)]

        for maker in makers:
            for _ in xrange(4):
                yield (simulate_split, [maker, train_data, test_data])

    # and run them
    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["path", "solver", "budget", "cost", "success", "answer", "split"])

        condor.do(yield_runs(), workers, lambda _, r: writer.writerows(r), local)
Exemplo n.º 2
0
def main(out_path, bundle, workers=0, local=False):
    """Evaluate the mixture model(s) over a range of component counts."""
    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        validation = sklearn.cross_validation.ShuffleSplit(len(run_data),
                                                           64,
                                                           test_fraction=0.2,
                                                           indices=False)

        for (train_mask, test_mask) in validation:
            split = uuid.uuid4()
            Ks = range(1, 64, 1)

            for K in Ks:
                for model_name in ["mul-dirmix", "mul-dirmatmix"]:
                    yield (evaluate_split, [
                        run_data, model_name, K, split, train_mask, test_mask
                    ])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow([
            "model_name", "components", "instances", "split",
            "mean_log_probability"
        ])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
def main(out_path, num_boards = 10000, min_rollouts = 128, workers = 0, replacement = True,
        player = None, opponent = None, error_thresh = 0.002):

    logger.info("generating state, value pairs using samples from given policy") 

    games = [] 
    grids = numpy.zeros((0,9,9), numpy.int8)
    boards_seen = set()
    while (grids.shape[0] < num_boards) if replacement else (len(boards_seen) < num_boards):
        new_game = gen_game()
        games.append(new_game)
        boards_seen = boards_seen.union(set(map(pyfeat.go.BoardState, new_game.grids)))
        grids = numpy.vstack((grids,new_game.grids))
        logger.info("number of boards in last game's grid: %i", new_game.grids.shape[0])
        logger.info("number of boards gathered: %i", len(boards_seen))
        

    def yield_jobs():
        logger.info("distributing jobs for %i games", len(games))

        for game in games:
            yield (find_values, [game, min_rollouts, None, None, error_thresh])

    evaluated = {}

    for (job, values) in condor.do(yield_jobs(), workers = workers):
        (game, _, _, _, _) = job.args

        evaluated[game] = values
        print 'value of empty board: ', values[0]
        print 'empty board? : ', game.grids[0]
    
    logger.info("about to pickle")
    with pyfeat.util.openz(out_path, "wb") as out_file:
        pickle.dump(evaluated, out_file, protocol = -1)
Exemplo n.º 4
0
def main(out_path, games_path, name = None, samples = None, rollouts = 256, workers = 0):
    logger.info("reading games from %s", games_path)

    with specmine.util.openz(games_path) as games_file:
        games = pickle.load(games_file)

    if name is None:
        if samples is None:
            names = games
        else:
            names = sorted(games, key = lambda _: random.random())[:samples]
    else:
        names = [name]

    def yield_jobs():
        logger.info("distributing jobs for %i games", len(names))

        for name in names:
            yield (find_values, [name, games[name], rollouts])

    evaluated = {}

    for (job, values) in condor.do(yield_jobs(), workers = workers):
        (name, _, _) = job.args

        evaluated[name] = values

    with specmine.util.openz(out_path, "wb") as out_file:
        pickle.dump(evaluated, out_file, protocol = -1)
def clustered_affinity_test(out_path, games_path, values_path, neighbors = 8, workers = 0, interpolate = True, off_graph = False):
    ''' value prediction using features learned from clustered graph '''
    value_list = get_value_list(games_path,values_path) 

    logger.info("number of value samples total: %i", len(value_list))
 
    def yield_jobs():
        min_samples =  20000
        max_samples = 260000
        step_samples = 60000
        cluster_size = 10000 #average
        max_test_samples = 100000
 
        shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) 

        for samples in xrange(min_samples,max_samples,step_samples):

            num_clusters = int(round(samples/cluster_size))
    
            logger.info("number of clusters used: %i", num_clusters)
        
            # randomly sample subset of games 
            value_dict = dict(shuffled_values[:samples])
    
            if off_graph:
                # limit max number of samples tested
                test_values = dict(shuffled_values[samples:max_test_samples+1])
            else: 
                test_values = dict(shuffled_values[:samples])

            boards = value_dict.keys()
            num_boards = len(boards)

            logger.info("kept %i board samples", num_boards)

            index = dict(zip(boards, xrange(num_boards)))
            avectors_ND = numpy.array(map(specmine.go.board_to_affinity, boards))
            affinity_NN = specmine.discovery.affinity_graph(avectors_ND, neighbors, sigma = 1e6)

            for B in numpy.r_[0:300:10j].round().astype(int):
                if interpolate:
                    yield (run_template_features, [2, 2, B, test_values])
                    yield (run_random_features, [B, avectors_ND, index, test_values, interpolate], dict(aff_map = affinity_map))
                    #yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate], dict(aff_map = affinity_map))
                    yield (run_clustered_laplacian_features, ["affinity", B, avectors_ND, affinity_NN, index, test_values, \
                        num_clusters,interpolate], dict(aff_map = affinity_map))
                else:
                    yield (run_template_features, [2, 2, B, test_values])
                    yield (run_random_features, [B, avectors_ND, index, test_values, interpolate])
                    #yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate])
                    #yield (run_graph_features, ["gameplay", B, avectors_ND, gameplay_NN, gameplay_index, test_values, num_clusters, interpolate])
                    yield (run_clustered_laplacian_features, ["affinity", B, avectors_ND, affinity_NN, index, test_values, num_clusters, interpolate])

    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"])

        for (_, row) in condor.do(yield_jobs(), workers):
            writer.writerow(row)
Exemplo n.º 6
0
def main(out_path, bundle, workers=0, local=False):
    """Evaluate the pure multinomial model over a range of smoothing values."""
    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        validation = sklearn.cross_validation.KFold(len(run_data),
                                                    10,
                                                    indices=False)

        for (train_mask, test_mask) in validation:
            split = uuid.uuid4()
            alphas = numpy.r_[1e-8:1e-1:64j]

            for alpha in alphas:
                yield (evaluate_split,
                       [run_data, alpha, split, train_mask, test_mask])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["alpha", "instances", "split", "mean_log_probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
def flat_affinity_test(out_path, games_path, values_path, neighbors = 8, workers = 0, interpolate = True, off_graph = True):
    """Test value prediction in Go."""

    value_list = get_value_list(games_path,values_path) 

    logger.info("number of value samples total: %i", len(value_list))
 
    def yield_jobs():
        min_samples = 10000
        max_samples = 15000
        step_samples = 5000
        max_test_samples = 250000

        shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) 

        for samples in xrange(min_samples,max_samples,step_samples):
            # randomly sample subset of games 
            value_dict = dict(shuffled_values[:samples])
                
            # if testing off-graph use held-out samples
            if off_graph:
                test_values = dict(shuffled_values[samples:max_test_samples+1])
            else: 
                test_values = dict(shuffled_values[:samples])
            print type(test_values)

            boards = value_dict.keys()
            num_boards = len(boards)

            logger.info("kept %i board samples", num_boards)

            index = dict(zip(boards, xrange(num_boards)))
            avectors_ND = numpy.array(map(specmine.go.board_to_affinity, boards))
            affinity_NN = specmine.discovery.affinity_graph(avectors_ND, neighbors, sigma = 1e6)

            for B in numpy.r_[0:250:10j].round().astype(int):
                if interpolate:
                    yield (run_template_features, [2, 2, B, test_values])
                    #yield (run_template_features, [2, 3, B, test_values])
                    yield (run_template_features, [3, 3, B, test_values])
                    yield (run_random_features, [B, avectors_ND, index, test_values, interpolate], dict(aff_map = affinity_map))
                    yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate], dict(aff_map = affinity_map))
                else:
                    yield (run_template_features, [2, 2, B, test_values])
                    #yield (run_template_features, [2, 3, B, test_values])
                    yield (run_template_features, [3, 3, B, test_values])
                    yield (run_random_features, [B, avectors_ND, index, test_values, interpolate])
                    yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate])


    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"])

        for (_, row) in condor.do(yield_jobs(), workers):
            writer.writerow(row)
Exemplo n.º 8
0
    def do(self, tasks):
        import condor

        condor.defaults.condor_matching = (
            "InMastodon"
            " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)"
            " && (Arch == \"X86_64\")"
            " && (OpSys == \"LINUX\")"
            " && (Memory > 1024)")

        return condor.do(tasks, workers=self._workers)
Exemplo n.º 9
0
def main(out_path, runs, repeats=128, workers=0, local=False):
    """Simulate portfolio and solver behavior."""

    logger.info("simulating %i runs", len(runs))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for run in runs:
            all_data = get_run_data(run["bundle"])
            validation = sklearn.cross_validation.ShuffleSplit(
                len(all_data), repeats, test_fraction=0.2, indices=False)

            if run["portfolio_name"] == "-":
                makers = map(borg.experiments.simulate_runs.SolverMaker,
                             all_data.solver_names)
            else:
                makers = [
                    borg.experiments.simulate_runs.PortfolioMaker(
                        run["portfolio_name"])
                ]

            max_instances = len(all_data) * 0.8

            for (train_mask, test_mask) in validation:
                for instances in map(
                        int, map(round, numpy.r_[10.0:max_instances:32j])):
                    for maker in makers:
                        yield (
                            simulate_run,
                            [
                                run,
                                maker,
                                all_data,
                                train_mask,
                                test_mask,
                                instances,
                                run["independent"],
                                run["mixture"],
                            ],
                        )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow([
            "description", "solver", "instances", "successes", "mean_time",
            "median_time"
        ])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 10
0
    def do(self, tasks):
        import condor

        condor.defaults.condor_matching = (
            "InMastodon"
            " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)"
            " && (Arch == \"X86_64\")"
            " && (OpSys == \"LINUX\")"
            " && (Memory > 1024)")

        return condor.do(tasks, workers=self._workers)
Exemplo n.º 11
0
def main(out_path, experiments, workers=0, local=False):
    """Run the specified model evaluations."""

    logger.info("running %i experiments", len(experiments))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for experiment in experiments:
            logger.info("preparing experiment: %s", experiment)

            run_data = get_run_data(experiment["run_data"])
            validation = sklearn.cross_validation.KFold(len(run_data),
                                                        5,
                                                        indices=False)
            (train_mask, test_mask) = iter(validation).next()
            training = run_data.masked(train_mask).collect_systematic([2])
            testing = run_data.masked(test_mask).collect_systematic([4])
            feature_counts = range(0, len(run_data.common_features) + 1, 2)
            replications = xrange(32)
            parameters = list(itertools.product(feature_counts, replications))

            for model_name in experiment["model_names"]:
                model = borg.experiments.common.train_model(
                    model_name, training)
                model.name = model_name

                for (feature_count, _) in parameters:
                    shuffled_names = sorted(
                        run_data.common_features,
                        key=lambda _: numpy.random.random())
                    selected_names = sorted(shuffled_names[:feature_count])

                    yield (
                        evaluate_features,
                        [
                            model,
                            testing,
                            selected_names,
                        ],
                    )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["model_name", "features", "score_name", "score"])

        for (_, rows) in condor.do(yield_jobs(), workers, local):
            writer.writerows(rows)

            out_file.flush()
Exemplo n.º 12
0
def main(
    out_path,
    portfolio_name,
    train_bundle,
    test_bundle,
    single=None,
    workers=0,
    local=False,
):
    """Simulate portfolio and solver behavior."""

    # generate jobs
    def yield_runs():
        train_data = borg.storage.RunData.from_bundle(train_bundle)
        test_data = borg.storage.RunData.from_bundle(test_bundle)

        if portfolio_name == "-":
            if single is None:
                makers = map(SolverMaker, train_data.solver_names)
            else:
                makers = map(SolverMaker, [single])
        else:
            makers = [PortfolioMaker(portfolio_name)]

        for maker in makers:
            for _ in xrange(4):
                yield (simulate_split, [maker, train_data, test_data])

    # and run them
    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["path", "solver", "budget", "cost", "success", "answer", "split"])

        condor.do(yield_runs(), workers, lambda _, r: writer.writerows(r),
                  local)
Exemplo n.º 13
0
def main(out_path, runs, repeats=5, workers=0, local=False):
    """Simulate portfolio and solver behavior."""

    logger.info("simulating %i runs", len(runs) * repeats)

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for run in runs:
            train_data = get_run_data(run["train_bundle"])

            if run.get("only_nontrivial", False):
                train_data = train_data.only_nontrivial()

            if run["test_bundle"] == "-":
                validation = sklearn.cross_validation.KFold(len(train_data),
                                                            repeats,
                                                            indices=False)
                data_sets = [(train_data.masked(v), train_data.masked(e))
                             for (v, e) in validation]
            else:
                test_data = get_run_data(run["test_bundle"])

                if run.get("only_nontrivial", False):
                    test_data = test_data.only_nontrivial()

                data_sets = [(train_data, test_data)] * repeats

            if run["portfolio_name"] == "-":
                makers = map(SolverMaker, train_data.solver_names)
            else:
                makers = [PortfolioMaker(run["portfolio_name"])]

            for maker in makers:
                for (train_fold_data, test_fold_data) in data_sets:
                    yield (simulate_run,
                           [run, maker, train_fold_data, test_fold_data])

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["category", "solver", "budget", "cost", "success", "split"])

        for (_, rows) in condor.do(yield_jobs(), workers, local):
            writer.writerows(rows)

            out_file.flush()
Exemplo n.º 14
0
def main(out_path, experiments, workers = 0, local = False):
    """Run the specified model evaluations."""

    logger.info("running %i experiments", len(experiments))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for experiment in experiments:
            logger.info("preparing experiment: %s", experiment)

            run_data = get_run_data(experiment["run_data"])
            validation = sklearn.cross_validation.KFold(len(run_data), 5, indices = False)
            (train_mask, test_mask) = iter(validation).next()
            training = run_data.masked(train_mask).collect_systematic([2])
            testing = run_data.masked(test_mask).collect_systematic([4])
            feature_counts = range(0, len(run_data.common_features) + 1, 2)
            replications = xrange(32)
            parameters = list(itertools.product(feature_counts, replications))

            for model_name in experiment["model_names"]:
                model = borg.experiments.common.train_model(model_name, training)
                model.name = model_name

                for (feature_count, _) in parameters:
                    shuffled_names = sorted(run_data.common_features, key = lambda _: numpy.random.random())
                    selected_names = sorted(shuffled_names[:feature_count])

                    yield (
                        evaluate_features,
                        [
                            model,
                            testing,
                            selected_names,
                            ],
                        )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["model_name", "features", "score_name", "score"])

        for (_, rows) in condor.do(yield_jobs(), workers, local):
            writer.writerows(rows)

            out_file.flush()
Exemplo n.º 15
0
def main(out_path, runs, repeats = 128, workers = 0, local = False):
    """Simulate portfolio and solver behavior."""

    logger.info("simulating %i runs", len(runs))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for run in runs:
            all_data = get_run_data(run["bundle"])
            validation = sklearn.cross_validation.ShuffleSplit(len(all_data), repeats, test_fraction = 0.2, indices = False)

            if run["portfolio_name"] == "-":
                makers = map(borg.experiments.simulate_runs.SolverMaker, all_data.solver_names)
            else:
                makers = [borg.experiments.simulate_runs.PortfolioMaker(run["portfolio_name"])]

            max_instances = len(all_data) * 0.8

            for (train_mask, test_mask) in validation:
                for instances in map(int, map(round, numpy.r_[10.0:max_instances:32j])):
                    for maker in makers:
                        yield (
                            simulate_run,
                            [
                                run,
                                maker,
                                all_data,
                                train_mask,
                                test_mask,
                                instances,
                                run["independent"],
                                run["mixture"],
                                ],
                            )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["description", "solver", "instances", "successes", "mean_time", "median_time"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 16
0
def main(out_path, runs, repeats = 5, workers = 0, local = False):
    """Simulate portfolio and solver behavior."""

    logger.info("simulating %i runs", len(runs) * repeats)

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for run in runs:
            train_data = get_run_data(run["train_bundle"])

            if run.get("only_nontrivial", False):
                train_data = train_data.only_nontrivial()

            if run["test_bundle"] == "-":
                validation = sklearn.cross_validation.KFold(len(train_data), repeats, indices = False)
                data_sets = [(train_data.masked(v), train_data.masked(e)) for (v, e) in validation]
            else:
                test_data = get_run_data(run["test_bundle"])

                if run.get("only_nontrivial", False):
                    test_data = test_data.only_nontrivial()

                data_sets = [(train_data, test_data)] * repeats

            if run["portfolio_name"] == "-":
                makers = map(SolverMaker, train_data.solver_names)
            else:
                makers = [PortfolioMaker(run["portfolio_name"])]

            for maker in makers:
                for (train_fold_data, test_fold_data) in data_sets:
                    yield (simulate_run, [run, maker, train_fold_data, test_fold_data])

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["category", "solver", "budget", "cost", "success", "split"])

        for (_, rows) in condor.do(yield_jobs(), workers, local):
            writer.writerows(rows)

            out_file.flush()
Exemplo n.º 17
0
def main(out_path, experiments, workers=0, local=False):
    """Run the specified model evaluations."""

    logger.info("running %i experiments", len(experiments))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for experiment in experiments:
            logger.info("preparing experiment: %s", experiment)

            run_data = get_run_data(experiment["run_data"])
            validation = sklearn.cross_validation.ShuffleSplit(
                len(run_data), 32, test_fraction=0.1, indices=False)
            max_instance_count = numpy.floor(0.9 * len(run_data)) - 10
            instance_counts = map(
                int, map(round, numpy.r_[10:max_instance_count:24j]))

            for (train_mask, test_mask) in validation:
                for instance_count in instance_counts:
                    yield (
                        evaluate_split,
                        [
                            run_data,
                            experiment["model_name"],
                            experiment["mixture"],
                            experiment["independent"],
                            instance_count,
                            train_mask,
                            test_mask,
                        ],
                    )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["model_name", "sampling", "instances", "mean_log_probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 18
0
def main(out_path, bundle, workers=0, local=False):
    """Evaluate the mixture model(s) over a range of component counts."""
    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        planner_names = ["knapsack", "streeter", "bellman"]
        bin_counts = xrange(1, 121)
        replications = xrange(16)
        experiments = itertools.product(planner_names, bin_counts,
                                        replications)

        for (planner_name, bin_count, _) in experiments:
            if planner_name != "bellman" or bin_count <= 5:
                yield (run_experiment, [run_data, planner_name, bin_count])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["planner", "bins", "rate"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)
Exemplo n.º 19
0
def main(out_path, bundle, workers = 0, local = False):
    """Evaluate the mixture model(s) over a range of component counts."""

    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        planner_names = ["knapsack", "streeter", "bellman"]
        bin_counts = xrange(1, 121)
        replications = xrange(16)
        experiments = itertools.product(planner_names, bin_counts, replications)

        for (planner_name, bin_count, _) in experiments:
            if planner_name != "bellman" or bin_count <= 5:
                yield (run_experiment, [run_data, planner_name, bin_count])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["planner", "bins", "rate"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)
Exemplo n.º 20
0
def main(out_path, experiments, workers = 0, local = False):
    """Run the specified model evaluations."""

    logger.info("running %i experiments", len(experiments))

    get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle)

    def yield_jobs():
        for experiment in experiments:
            logger.info("preparing experiment: %s", experiment)

            run_data = get_run_data(experiment["run_data"])
            validation = sklearn.cross_validation.ShuffleSplit(len(run_data), 32, test_fraction = 0.1, indices = False)
            max_instance_count = numpy.floor(0.9 * len(run_data)) - 10
            instance_counts = map(int, map(round, numpy.r_[10:max_instance_count:24j]))

            for (train_mask, test_mask) in validation:
                for instance_count in instance_counts:
                    yield (
                        evaluate_split,
                        [
                            run_data,
                            experiment["model_name"],
                            experiment["mixture"],
                            experiment["independent"],
                            instance_count,
                            train_mask,
                            test_mask,
                            ],
                        )

    with borg.util.openz(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["model_name", "sampling", "instances", "mean_log_probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 21
0
def main(domain_name,
         instances_root,
         suffix=".features.csv",
         skip_existing=False,
         workers=0):
    """Collect task features."""

    condor.defaults.condor_matching = \
        "InMastodon" \
        " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \
        " && (Arch == \"X86_64\")" \
        " && (OpSys == \"LINUX\")" \
        " && (Memory > 1024)"

    def yield_runs():
        if os.path.exists(domain_name):
            domain = borg.load_solvers(domain_name).domain
        else:
            domain = borg.get_domain(domain_name)

        paths = list(borg.util.files_under(instances_root, domain.extensions))
        count = 0

        for path in paths:
            if skip_existing and os.path.exists(path + suffix):
                continue

            count += 1

            yield (features_for_path, [domain, path])

        logger.info("collecting features for %i instances", count)

    for (task, (names, values)) in condor.do(yield_runs(), workers):
        (_, cnf_path) = task.args
        csv_path = cnf_path + suffix

        with open(csv_path, "wb") as csv_file:
            csv.writer(csv_file).writerow(names)
            csv.writer(csv_file).writerow(values)
Exemplo n.º 22
0
def main(out_path, games_path, values_path, neighbors = 8, workers = 0, off_graph = True):
    """Test value prediction in Go."""

    value_list = get_value_list(games_path, values_path)

    logger.info("number of value samples total: %i", len(value_list))
 
    def yield_jobs():
        samples = 20000

        shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) 

        # randomly sample subset
        value_dict = dict(shuffled_values[:samples])

        test_samples = 20000
        if off_graph:
            test_values = dict(shuffled_values[-test_samples:])
        else: 
            test_values = dict(shuffled_values[:test_samples])

        logger.info("kept %i board samples", len(value_dict))

        avectors_ND = numpy.array(map(specmine.go.board_to_affinity, value_dict))

        #for B in numpy.r_[1:200:8j].round().astype(int):
        for B in [200]:
            yield (run_graph_features, ["affinity", B, avectors_ND, test_values])

    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["map_name", "features", "score_mean", "score_variance"])

        for (_, row) in condor.do(yield_jobs(), workers):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 23
0
def main(out_path, bundle, workers = 0, local = False):
    """Evaluate the pure multinomial model over a range of smoothing values."""

    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        validation = sklearn.cross_validation.KFold(len(run_data), 10, indices = False)

        for (train_mask, test_mask) in validation:
            split = uuid.uuid4()
            alphas = numpy.r_[1e-8:1e-1:64j]

            for alpha in alphas:
                yield (evaluate_split, [run_data, alpha, split, train_mask, test_mask])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["alpha", "instances", "split", "mean_log_probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
Exemplo n.º 24
0
def main(domain_name, instances_root, suffix = ".features.csv", skip_existing = False, workers = 0):
    """Collect task features."""

    condor.defaults.condor_matching = \
        "InMastodon" \
        " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \
        " && (Arch == \"X86_64\")" \
        " && (OpSys == \"LINUX\")" \
        " && (Memory > 1024)"

    def yield_runs():
        if os.path.exists(domain_name):
            domain = borg.load_solvers(domain_name).domain
        else:
            domain = borg.get_domain(domain_name)

        paths = list(borg.util.files_under(instances_root, domain.extensions))
        count = 0

        for path in paths:
            if skip_existing and os.path.exists(path + suffix):
                continue

            count += 1

            yield (features_for_path, [domain, path])

        logger.info("collecting features for %i instances", count)

    for (task, (names, values)) in condor.do(yield_runs(), workers):
        (_, cnf_path) = task.args
        csv_path = cnf_path + suffix

        with open(csv_path, "wb") as csv_file:
            csv.writer(csv_file).writerow(names)
            csv.writer(csv_file).writerow(values)
Exemplo n.º 25
0
def main(out_path, bundle, experiments, workers=0, local=False):
    """Write the actual output of multiple models."""
    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)

        for experiment in experiments:
            yield (
                infer_distributions,
                [
                    run_data,
                    experiment["model_name"],
                    experiment["instance"],
                    experiment["exclude"],
                ],
            )

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(
            ["model_name", "instance", "solver", "bin", "probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerows(row)
Exemplo n.º 26
0
def main(out_path, bundle, experiments, workers = 0, local = False):
    """Write the actual output of multiple models."""

    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)

        for experiment in experiments:
            yield (
                infer_distributions,
                [
                    run_data,
                    experiment["model_name"],
                    experiment["instance"],
                    experiment["exclude"],
                    ],
                )

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["model_name", "instance", "solver", "bin", "probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerows(row)
Exemplo n.º 27
0
def main(out_path, bundle, workers = 0, local = False):
    """Evaluate the mixture model(s) over a range of component counts."""

    def yield_jobs():
        run_data = borg.storage.RunData.from_bundle(bundle)
        validation = sklearn.cross_validation.ShuffleSplit(len(run_data), 64, test_fraction = 0.2, indices = False)

        for (train_mask, test_mask) in validation:
            split = uuid.uuid4()
            Ks = range(1, 64, 1)

            for K in Ks:
                for model_name in ["mul-dirmix", "mul-dirmatmix"]:
                    yield (evaluate_split, [run_data, model_name, K, split, train_mask, test_mask])

    with open(out_path, "w") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["model_name", "components", "instances", "split", "mean_log_probability"])

        for (_, row) in condor.do(yield_jobs(), workers, local):
            writer.writerow(row)

            out_file.flush()
def main(workers = 0,
         k = 36,
         encoding = 'tile',
         env_size = 9,
         n_runs = 1,
         n_reward_samples = 2500,
         n_reward_runs = 25,
         lam = 0.,
         gam = 0.995,
         beta = 0.995,
         alpha = 1.,
         eta = 0.5,
         eps = 1e-5, 
         patience = 16,
         max_iter = 8, 
         l1theta = None,
         l1code = None,
         l2code = None,
         n_samples = None,
         nonlin = None,
         nonzero = None,
         training_methods = None,
         min_imp = 0.0001,
         min_delta = 1e-6,
         fldir = '/scratch/cluster/ccor/feature-learning/',
         req_rew = False,
         record_runs = False,
         ):

    if n_samples:
        n_samples = map(int, n_samples.split(','))

    beta_ratio = beta/gam 
    # append reward to basis when using perfect info?
    if training_methods is None:
        training_methods = [
            #(['covariance', 'prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all'],['theta-all','w']]),
            #(['prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all','w']]),
            #(['value_prediction'],[['theta-all']]),
            #(['value_prediction', 'bellman'],[['theta-all'],['theta-all','w']]),
            #(['prediction'],[['theta-all']]),
            #(['prediction', 'bellman'], [['theta-all'],['theta-all','w']]),
            #(['covariance'], [['theta-all']]),
            (['prediction', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['bellman', 'prediction', 'bellman'], [['theta-all','w'], ['theta-all'],['theta-all','w']]),
            (['full_covariance', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['covariance', 'bellman'], [['theta-all'],['theta-all','w']]),
            (['full_laplacian'], [['theta-all', 'w']]), 
            (['laplacian'], [['theta-all', 'w']]), 
            (['bellman'], [['theta-all', 'w']]), # baseline
            ]  

    losses = ['sample-reward', 'test-lsbellman', 'test-bellman', 'test-reward',  'test-model', 'test-fullmodel', # test-training
              'true-policy', 'true-policy-uniform', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] \
                if n_samples else \
             ['sample-reward', 'true-policy-uniform', 'true-policy', 'true-bellman',  'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] 

    logger.info('building environment of size %i' % env_size)
    mdp = grid_world.MDP(walls_on = True, size = env_size)
    env = mdp.env
    n_states = env_size**2
    m = Model(mdp.R, mdp.P, gam = gam)
    
    # create raw data encoder (constant appended in encoder by default)
    if encoding is 'tabular':
        encoder = TabularFeatures(env_size, append_const = True)
    elif encoding is 'tile':
        encoder = TileFeatures(env_size, append_const = True)
    elif encoding is 'factored':
        raise NotImplementedError

    def sample(n):
        logger.info('sampling from a grid world')
        # currently defaults to on-policy sampling
        
        n_extra = calc_discount_horizon(lam, gam, eps) - 1 # mdp returns n+1 states and n rewards
        kw = dict(n_samples = n + n_extra, encoder = encoder, req_rew = req_rew) 
        R, X, _ = mdp.sample_encoding(**kw)
        
        if req_rew:
            logger.info('reward required')
            assert sum(R.todense()) > 0

        logger.info('reward sum: %.2f' % sum(R.todense()))

        R_val, X_val, _ = mdp.sample_encoding(**kw)
        R_test, X_test, _ = mdp.sample_encoding(**kw)
        #losses = ['test-bellman', 'test-reward',  'test-model', 
                #'true-bellman', 'true-reward', 'true-model', 'true-lsq'] # test-training
        weighting = 'policy'

        return (X, X_val, X_test), (R, R_val, R_test), weighting
       
    def full_info():
        logger.info('using perfect information')
        # gen stacked matrices of I, P, P^2, ...
        R = numpy.array([])
        S = sp.eye(n_states, n_states)
        P = sp.eye(n_states, n_states)
        for i in xrange(calc_discount_horizon(lam, gam, eps)): # decay epsilon 
            R = numpy.append(R, P * m.R)
            P = m.P * P
            S = sp.vstack((S, P))
        
        X = encoder.encode(S)   
        R = sp.csr_matrix(R[:,None])
        X_val = X_test = X
        R_val = R_test = R
        #losses =  ['true-bellman', 'true-reward', 'true-model'] 
        weighting = 'uniform'

        return (X, X_val, X_test), (R, R_val, R_test), weighting

    
    reg = None
    if l1theta is not None:
        reg = ('l1theta', l1theta)
    if l1code is not None:
        reg = ('l1code', l1code)
    if l2code is not None:
        reg = ('l2code', l2code)

    run_param_keys = ['k','method','encoding','samples', 'reward_samples', 'reward_runs', 'size','weighting',
                      'lambda','gamma','alpha', 'eta', 'regularization','nonlinear']
    def yield_jobs(): 
        
        for i,n in enumerate(n_samples or [n_states]):
            
            logger.info('creating job with %i samples/states' % n)
            
            # build bellman operator matrices
            logger.info('making mixing matrices')
            Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, 
                                    sampled = bool(n_samples), eps = eps)
            
            for r in xrange(n_runs):

                n_features = encoder.n_features
                # initialize parameters
                theta_init = numpy.random.standard_normal((n_features, k))
                theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0))
                w_init = numpy.random.standard_normal((k+1,1)) 
                w_init = w_init / numpy.linalg.norm(w_init)
                

                # sample or gather full info data
                X_data, R_data, weighting = sample(n) if n_samples else full_info()

                bb_params = [n_features, [k], beta_ratio]
                bb_dict = dict( alpha = alpha, reg_tuple = reg, nonlin = nonlin,
                                nonzero = nonzero, thetas = [theta_init])
        
                for j, tm in enumerate(training_methods):
                    loss_list, wrt_list = tm
                    assert len(loss_list) == len(wrt_list)
                    
                    run_param_values = [k, tm, encoder, n, 
                                n_reward_samples, n_reward_runs, 
                                env_size, weighting, 
                                lam, gam, alpha, eta, 
                                reg[0]+str(reg[1]) if reg else 'None',
                                nonlin if nonlin else 'None']

                    d_run_params = dict(izip(run_param_keys, run_param_values))
                     
                    yield (train_basis,[d_run_params, bb_params, bb_dict,
                                        env, m, losses, # environment, model and loss list
                                        X_data, R_data, Mphi, Mrew, # training data
                                        max_iter, patience, min_imp, min_delta, # optimization params 
                                        fldir, record_runs]) # recording params
    # create output file path
    date_str = time.strftime('%y%m%d.%X').replace(':','')
    out_dir = fldir + 'sirf/output/csv/'
    root =  '%s.%s_results' % (
               date_str, 
               'n_samples' if n_samples else 'full_info')
    
    

    d_experiment_params = dict(izip(['k','encoding','size',
                      'lambda','gamma','alpha','regularization','nl'], 
                      [k, encoder, env_size, lam, gam, alpha, 
                              reg[0]+str(reg[1]) if reg else 'None',
                              nonlin if nonlin else 'None']))
    save_path = out_string(out_dir, root, d_experiment_params, '.csv.gz')
    logger.info('saving results to %s' % save_path)
    
    # get column title list ordered params | losses using dummy dicts
    d_param = dict(izip(run_param_keys, numpy.zeros(len(run_param_keys))))
    d_loss = dict(izip(losses, numpy.zeros(len(run_param_keys))))
    col_keys_array,_ = reorder_columns(d_param, d_loss)

    with openz(save_path, "wb") as out_file:
        writer = csv.writer(out_file)
        writer.writerow(col_keys_array)

        for (_, out) in condor.do(yield_jobs(), workers):
            keys, vals = out
            assert (keys == col_keys_array).all() # todo catch
            writer.writerow(vals)
def experiment(workers = 80, n_runs = 9, k = 16, env_size = 15, gam = 0.998, lam = 0., eps = 1e-5,  
    partition = None, patience = 8, max_iter = 8, weighting = 'uniform', reward_init = False,
    nonlin = 1e-8, n_samples = None, beta_ratio = 1.,
    training_methods = None):
    
    if training_methods is None:
        # note: for each loss string, you need a corresponding wrt list
        if reward_init:
            training_methods = [
            (['prediction'],[['theta-model']]),
            (['prediction', 'layered'], [['theta-model'],['theta-model','w']]),
            (['covariance'],[['theta-model']]), # with reward, without fine-tuning
            (['covariance', 'layered'], [['theta-model'],['theta-model','w']]), # theta-model here for 2nd wrt?
            (['layered'], [['theta-all','w']])] # baseline
        
        else:
            training_methods = [(['prediction'],[['theta-all']]),
            (['prediction', 'layered'], [['theta-all'],['theta-all','w']]),
            (['covariance'],[['theta-all']]), # with reward, without fine-tuning
            (['covariance', 'layered'], [['theta-all'],['theta-all','w']]), # theta-model here for 2nd wrt?
            (['layered'], [['theta-all','w']])] # baseline
    
    theano.gof.compilelock.set_lock_status(False)
    theano.config.on_unused_input = 'ignore'
    theano.config.warn.sum_div_dimshuffle_bug = False

    if n_samples is None:
        #n_samples = [100,500]
        n_samples = numpy.round(numpy.linspace(50,1500,6)).astype(int) 

    if partition is None:
        partition = {'theta-model':k-1, 'theta-reward':1}

    mdp = grid_world.MDP(walls_on = True, size = env_size)    
    m = Model(mdp.env.R, mdp.env.P, gam = gam)
    dim = env_size**2

    # tracked losses

    losses = ['test-bellman', 'test-reward', 'test-model', 'true-bellman', 'true-lsq'] 
    logger.info('losses tracked: '+ str(losses))
    
    #n_extra = bb._calc_n_steps(lam, gam, eps)
    #print 'n extra sampled needed: ', n_extra
    d_loss_data = {}
    for key in losses:
        d_loss_data[key] = numpy.zeros((len(n_samples), n_runs, len(training_methods)))

    def yield_jobs():

        for i,n in enumerate(n_samples):
            
            Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = True, eps = eps)

            for r in xrange(n_runs):
                
                # initialize features with unit norm
                theta_init = numpy.random.standard_normal((dim+1, k))
                if reward_init:
                    theta_init[:-1,-1] = m.R # XXX set last column to reward
                    theta_init[-1,-1] = 0
                theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0))

                w_init = numpy.random.standard_normal((k+1,1)) 
                w_init = w_init / numpy.linalg.norm(w_init)

                # sample data: training, validation, and test sets
                S, Sp, R, _, = mdp.sample_grid_world(n, distribution = weighting); 
                S = numpy.vstack((S, Sp[-1,:]))
                S_val, Sp_val, R_val, _, = mdp.sample_grid_world(n, distribution = weighting)
                S_val = scipy.sparse.vstack((S_val, Sp_val[-1,:]))
                S_test, Sp_test, R_test, _, = mdp.sample_grid_world(n, distribution = weighting)
                S_test = scipy.sparse.vstack((S_test, Sp_test[-1,:]))
                
                bb = BellmanBasis(dim+1, k, beta_ratio, partition = partition, 
                    theta = theta_init, w = w_init, record_loss = losses, nonlin = nonlin)
                
                for j,tm in enumerate(training_methods):
                    
                    yield (condor_job,[(i,r,j), bb, m, tm, 
                            S, R, S_val, R_val, S_test, R_test,
                            Mphi, Mrew, patience, max_iter, weighting])

    # aggregate the condor data
    for (_, result) in condor.do(yield_jobs(), workers):
            d_batch_loss, ind_tuple = result
            for name in d_batch_loss.keys():
                d_loss_data[name][ind_tuple] = d_batch_loss[name]

    # save results! 
    pi_root = 'n_samples_results_rinit' if reward_init else 'n_samples_results'    
    out_path = os.getcwd()+'/sirf/output/pickle/%s.no_r.k=%i.l=%s.g=%s.%s.size=%i.r=%i..pickle.gz' \
                    % (pi_root, k, str(lam), str(gam), weighting, env_size, n_runs)
    logger.info('saving results to %s' % out_path)
    with util.openz(out_path, "wb") as out_file:
        pickle.dump(d_loss_data, out_file, protocol = -1)
    
    x = numpy.array(n_samples, dtype = numpy.float64) #range(len(n_samples))
    f = plt.figure()
    logger.info('plotting')
    plot_styles = ['r-', 'b-', 'g-', 'k-', 'c-', 'm-']
    for i,(key,mat) in enumerate(d_loss_data.items()):

        ax = f.add_subplot(2,3,i+1) # todo generalize for arb length 
        
        for h,tm in enumerate(training_methods):                

            std = numpy.std(mat[:,:,h], axis=1)
            mn = numpy.mean(mat[:,:,h], axis=1)
            if 'test' in key:
                mn = mn/x
                std = std/x
            ax.fill_between(x, mn-std, mn+std, facecolor='yellow', alpha=0.15)
            ax.plot(x, mn, plot_styles[h], label = str(tm[0]))
            plt.title(key)
            #plt.axis('off')
            #plt.legend(loc = 3) # lower left
    
    pl_root = 'n_samples_rinit' if reward_init else 'n_samples'
    plt.savefig(os.getcwd()+'/sirf/output/plots/%s.n=%i-%i.k=%i.l=%s.g=%s.%s.size=%i.r=%i.pdf' 
        % (n_samples[0], n_samples[-1], pl_root, k, 
        str(lam), str(gam), weighting, env_size, n_runs))  
Exemplo n.º 30
0
def do(*args, **kwargs):
    return condor.do(*args, **kwargs)
Exemplo n.º 31
0
def measure_feature_performance( \
    games_path, values_path,  workers = 0,\
    affinity_neighbors = 8, interp_neighbors = 8, interp_sigma_sq = -1,\
    num_graph_samples = 20000, num_test_samples = 80000, \
    max_num_features = 500, ridge_param = 0.01, feature_boost = True, eig_solver='arpack'):

    value_player = ''
    if 'random' in values_path:
        value_player = 'random'
    elif 'alp' in values_path:
        value_player = 'alp'

    out_path = str.format('specmine/static/experiments/go_feature_performance.RMSE.{p}.rp={r}.ngs={g}.nts={t}.nf={f}.nan={an}.nin={inn}.is={sig}.{ei}', \
                p=value_player, r = ridge_param, g=num_graph_samples, t=num_test_samples,f=max_num_features, \
                an=affinity_neighbors,inn=interp_neighbors, sig = interp_sigma_sq, ei = eig_solver)
    out_path += '.boost.csv' if feature_boost else '.csv'

    logger.info('out path: %s', out_path)
    logger.info('interpolation sigma_sq: %f', interp_sigma_sq)

    def yield_jobs():

        values = get_value_list(games_path,values_path)
        values = sorted(values, key = lambda _: numpy.random.rand()) # shuffle values


        full_value_dict = dict(values)
        sample_boards = full_value_dict.keys()[:num_graph_samples]

        # load or compute full feature maps
        full_2x2_temp_map = get_template_map(2, 2, B=numpy.inf, symmetric=True)
        if feature_boost: # use 2x2 template features as affinity map for building graph
            aff_map_boost = full_2x2_temp_map
            full_laplacian_map_boosted = get_laplacian_map(sample_boards, num_samples = num_graph_samples, \
                    max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map_boost, eig_solver=eig_solver)
            laplace_map_name_boosted = 'Boosted Laplacian'
        
        aff_map = specmine.feature_maps.flat_affinity_map
        full_laplacian_map = get_laplacian_map(sample_boards, num_samples = num_graph_samples, \
                max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map, eig_solver=eig_solver)
        laplace_map_name = 'Laplacian'
        full_laplacian_map_small = get_laplacian_map(sample_boards, num_samples = num_graph_samples/2., \
                max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map, eig_solver=eig_solver)
        laplace_map_name_small = 'Laplacian-small'
            
        ball_tree = full_laplacian_map.ball_tree

        values = sorted(values, key = lambda _: numpy.random.rand()) # shuffle again before testing
        test_values = dict(values[:num_test_samples])
                
        logger.info("number of samples being used for graph features: %i", num_graph_samples)
        for NF in numpy.r_[0:max_num_features:10j].round().astype(int):
            yield (run_template_features, [test_values, full_2x2_temp_map, NF], dict(ridge_param = ridge_param))
            #yield (run_template_features, [full_3x3_temp_map, NF, test_values])
            yield (run_laplacian_features, [test_values, laplace_map_name, full_laplacian_map, NF, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param))
            yield (run_laplacian_features, [test_values, laplace_map_name_small, full_laplacian_map_small, NF, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param))
            if feature_boost:
                yield (run_laplacian_features, [test_values, laplace_map_name_boosted, full_laplacian_map_boosted, NF, aff_map_boost], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param))
            yield (run_random_features, [test_values, NF, ball_tree, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param))

    with open(out_path, "wb") as out_file:
        writer = csv.writer(out_file)

        writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"])

        for (_, row) in condor.do(yield_jobs(), workers):
            writer.writerow(row)
Exemplo n.º 32
0
def main(
    suite_path,
    tasks_root,
    budget,
    only_missing=False,
    store_answers=False,
    only_solver=None,
    runs=4,
    suffix=".runs.csv",
    workers=0,
):
    """Collect solver running-time data."""

    condor.defaults.condor_matching = \
        "InMastodon" \
        " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \
        " && (Arch == \"X86_64\")" \
        " && (OpSys == \"LINUX\")" \
        " && (Memory > 1024)"

    def yield_runs():
        suite = borg.load_solvers(suite_path)

        logger.info("scanning paths under %s", tasks_root)

        paths = list(borg.util.files_under(tasks_root,
                                           suite.domain.extensions))

        if not paths:
            raise ValueError("no paths found under specified root")

        if only_solver is None:
            solver_names = suite.solvers.keys()
        else:
            solver_names = [only_solver]

        for path in paths:
            run_data = None

            if only_missing and os.path.exists(path + suffix):
                run_data = numpy.recfromcsv(path + suffix, usemask=True)

            for solver_name in solver_names:
                if only_missing and run_data is not None:
                    count = max(
                        0, runs - numpy.sum(run_data.solver == solver_name))
                else:
                    count = runs

                logger.info("scheduling %i run(s) of %s on %s", count,
                            solver_name, os.path.basename(path))

                for _ in xrange(count):
                    seed = numpy.random.randint(sys.maxint)

                    yield (run_solver_on, [
                        suite_path, solver_name, path, budget, store_answers,
                        seed
                    ])

    for (task, row) in condor.do(yield_runs(), workers):
        # unpack run outcome
        (cnf_path, solver_name, budget, cost, succeeded, answer) = row

        if answer is None:
            answer_text = None
        else:
            answer_text = base64.b64encode(zlib.compress(pickle.dumps(answer)))

        # write it to disk
        csv_path = cnf_path + suffix
        existed = os.path.exists(csv_path)

        with open(csv_path, "a") as csv_file:
            writer = csv.writer(csv_file)

            if not existed:
                writer.writerow(
                    ["solver", "budget", "cost", "succeeded", "answer"])

            writer.writerow(
                [solver_name, budget, cost, succeeded, answer_text])
Exemplo n.º 33
0
def main(
    suite_path,
    tasks_root,
    budget,
    only_missing = False,
    store_answers = False,
    only_solver = None,
    runs = 4,
    suffix = ".runs.csv",
    workers = 0,
    ):
    """Collect solver running-time data."""

    condor.defaults.condor_matching = \
        "InMastodon" \
        " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \
        " && (Arch == \"X86_64\")" \
        " && (OpSys == \"LINUX\")" \
        " && (Memory > 1024)"

    def yield_runs():
        suite = borg.load_solvers(suite_path)

        logger.info("scanning paths under %s", tasks_root)

        paths = list(borg.util.files_under(tasks_root, suite.domain.extensions))

        if not paths:
            raise ValueError("no paths found under specified root")

        if only_solver is None:
            solver_names = suite.solvers.keys()
        else:
            solver_names = [only_solver]

        for path in paths:
            run_data = None

            if only_missing and os.path.exists(path + suffix):
                run_data = numpy.recfromcsv(path + suffix, usemask = True)

            for solver_name in solver_names:
                if only_missing and run_data is not None:
                    count = max(0, runs - numpy.sum(run_data.solver == solver_name))
                else:
                    count = runs

                logger.info("scheduling %i run(s) of %s on %s", count, solver_name, os.path.basename(path))

                for _ in xrange(count):
                    seed = numpy.random.randint(sys.maxint)

                    yield (run_solver_on, [suite_path, solver_name, path, budget, store_answers, seed])

    for (task, row) in condor.do(yield_runs(), workers):
        # unpack run outcome
        (cnf_path, solver_name, budget, cost, succeeded, answer) = row

        if answer is None:
            answer_text = None
        else:
            answer_text = base64.b64encode(zlib.compress(pickle.dumps(answer)))

        # write it to disk
        csv_path = cnf_path + suffix
        existed = os.path.exists(csv_path)

        with open(csv_path, "a") as csv_file:
            writer = csv.writer(csv_file)

            if not existed:
                writer.writerow(["solver", "budget", "cost", "succeeded", "answer"])

            writer.writerow([solver_name, budget, cost, succeeded, answer_text])
Exemplo n.º 34
0
def do(*args, **kwargs):
    import condor

    return condor.do(*args, **kwargs)
Exemplo n.º 35
0
def do(*args, **kwargs):
    import condor

    return condor.do(*args, **kwargs)
Exemplo n.º 36
0
def main():
    calls = [(f, [x]) for x in range(16)]

    for (call, result) in condor.do(calls, 4):
        print call.args, result