def main( out_path, portfolio_name, train_bundle, test_bundle, single = None, workers = 0, local = False, ): """Simulate portfolio and solver behavior.""" # generate jobs def yield_runs(): train_data = borg.storage.RunData.from_bundle(train_bundle) test_data = borg.storage.RunData.from_bundle(test_bundle) if portfolio_name == "-": if single is None: makers = map(SolverMaker, train_data.solver_names) else: makers = map(SolverMaker, [single]) else: makers = [PortfolioMaker(portfolio_name)] for maker in makers: for _ in xrange(4): yield (simulate_split, [maker, train_data, test_data]) # and run them with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["path", "solver", "budget", "cost", "success", "answer", "split"]) condor.do(yield_runs(), workers, lambda _, r: writer.writerows(r), local)
def main(out_path, bundle, workers=0, local=False): """Evaluate the mixture model(s) over a range of component counts.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) validation = sklearn.cross_validation.ShuffleSplit(len(run_data), 64, test_fraction=0.2, indices=False) for (train_mask, test_mask) in validation: split = uuid.uuid4() Ks = range(1, 64, 1) for K in Ks: for model_name in ["mul-dirmix", "mul-dirmatmix"]: yield (evaluate_split, [ run_data, model_name, K, split, train_mask, test_mask ]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow([ "model_name", "components", "instances", "split", "mean_log_probability" ]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(out_path, num_boards = 10000, min_rollouts = 128, workers = 0, replacement = True, player = None, opponent = None, error_thresh = 0.002): logger.info("generating state, value pairs using samples from given policy") games = [] grids = numpy.zeros((0,9,9), numpy.int8) boards_seen = set() while (grids.shape[0] < num_boards) if replacement else (len(boards_seen) < num_boards): new_game = gen_game() games.append(new_game) boards_seen = boards_seen.union(set(map(pyfeat.go.BoardState, new_game.grids))) grids = numpy.vstack((grids,new_game.grids)) logger.info("number of boards in last game's grid: %i", new_game.grids.shape[0]) logger.info("number of boards gathered: %i", len(boards_seen)) def yield_jobs(): logger.info("distributing jobs for %i games", len(games)) for game in games: yield (find_values, [game, min_rollouts, None, None, error_thresh]) evaluated = {} for (job, values) in condor.do(yield_jobs(), workers = workers): (game, _, _, _, _) = job.args evaluated[game] = values print 'value of empty board: ', values[0] print 'empty board? : ', game.grids[0] logger.info("about to pickle") with pyfeat.util.openz(out_path, "wb") as out_file: pickle.dump(evaluated, out_file, protocol = -1)
def main(out_path, games_path, name = None, samples = None, rollouts = 256, workers = 0): logger.info("reading games from %s", games_path) with specmine.util.openz(games_path) as games_file: games = pickle.load(games_file) if name is None: if samples is None: names = games else: names = sorted(games, key = lambda _: random.random())[:samples] else: names = [name] def yield_jobs(): logger.info("distributing jobs for %i games", len(names)) for name in names: yield (find_values, [name, games[name], rollouts]) evaluated = {} for (job, values) in condor.do(yield_jobs(), workers = workers): (name, _, _) = job.args evaluated[name] = values with specmine.util.openz(out_path, "wb") as out_file: pickle.dump(evaluated, out_file, protocol = -1)
def clustered_affinity_test(out_path, games_path, values_path, neighbors = 8, workers = 0, interpolate = True, off_graph = False): ''' value prediction using features learned from clustered graph ''' value_list = get_value_list(games_path,values_path) logger.info("number of value samples total: %i", len(value_list)) def yield_jobs(): min_samples = 20000 max_samples = 260000 step_samples = 60000 cluster_size = 10000 #average max_test_samples = 100000 shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) for samples in xrange(min_samples,max_samples,step_samples): num_clusters = int(round(samples/cluster_size)) logger.info("number of clusters used: %i", num_clusters) # randomly sample subset of games value_dict = dict(shuffled_values[:samples]) if off_graph: # limit max number of samples tested test_values = dict(shuffled_values[samples:max_test_samples+1]) else: test_values = dict(shuffled_values[:samples]) boards = value_dict.keys() num_boards = len(boards) logger.info("kept %i board samples", num_boards) index = dict(zip(boards, xrange(num_boards))) avectors_ND = numpy.array(map(specmine.go.board_to_affinity, boards)) affinity_NN = specmine.discovery.affinity_graph(avectors_ND, neighbors, sigma = 1e6) for B in numpy.r_[0:300:10j].round().astype(int): if interpolate: yield (run_template_features, [2, 2, B, test_values]) yield (run_random_features, [B, avectors_ND, index, test_values, interpolate], dict(aff_map = affinity_map)) #yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate], dict(aff_map = affinity_map)) yield (run_clustered_laplacian_features, ["affinity", B, avectors_ND, affinity_NN, index, test_values, \ num_clusters,interpolate], dict(aff_map = affinity_map)) else: yield (run_template_features, [2, 2, B, test_values]) yield (run_random_features, [B, avectors_ND, index, test_values, interpolate]) #yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate]) #yield (run_graph_features, ["gameplay", B, avectors_ND, gameplay_NN, gameplay_index, test_values, num_clusters, interpolate]) yield (run_clustered_laplacian_features, ["affinity", B, avectors_ND, affinity_NN, index, test_values, num_clusters, interpolate]) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"]) for (_, row) in condor.do(yield_jobs(), workers): writer.writerow(row)
def main(out_path, bundle, workers=0, local=False): """Evaluate the pure multinomial model over a range of smoothing values.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) validation = sklearn.cross_validation.KFold(len(run_data), 10, indices=False) for (train_mask, test_mask) in validation: split = uuid.uuid4() alphas = numpy.r_[1e-8:1e-1:64j] for alpha in alphas: yield (evaluate_split, [run_data, alpha, split, train_mask, test_mask]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow( ["alpha", "instances", "split", "mean_log_probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def flat_affinity_test(out_path, games_path, values_path, neighbors = 8, workers = 0, interpolate = True, off_graph = True): """Test value prediction in Go.""" value_list = get_value_list(games_path,values_path) logger.info("number of value samples total: %i", len(value_list)) def yield_jobs(): min_samples = 10000 max_samples = 15000 step_samples = 5000 max_test_samples = 250000 shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) for samples in xrange(min_samples,max_samples,step_samples): # randomly sample subset of games value_dict = dict(shuffled_values[:samples]) # if testing off-graph use held-out samples if off_graph: test_values = dict(shuffled_values[samples:max_test_samples+1]) else: test_values = dict(shuffled_values[:samples]) print type(test_values) boards = value_dict.keys() num_boards = len(boards) logger.info("kept %i board samples", num_boards) index = dict(zip(boards, xrange(num_boards))) avectors_ND = numpy.array(map(specmine.go.board_to_affinity, boards)) affinity_NN = specmine.discovery.affinity_graph(avectors_ND, neighbors, sigma = 1e6) for B in numpy.r_[0:250:10j].round().astype(int): if interpolate: yield (run_template_features, [2, 2, B, test_values]) #yield (run_template_features, [2, 3, B, test_values]) yield (run_template_features, [3, 3, B, test_values]) yield (run_random_features, [B, avectors_ND, index, test_values, interpolate], dict(aff_map = affinity_map)) yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate], dict(aff_map = affinity_map)) else: yield (run_template_features, [2, 2, B, test_values]) #yield (run_template_features, [2, 3, B, test_values]) yield (run_template_features, [3, 3, B, test_values]) yield (run_random_features, [B, avectors_ND, index, test_values, interpolate]) yield (run_laplacian_features, ["Laplacian",B,avectors_ND, affinity_NN, index, test_values, interpolate]) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"]) for (_, row) in condor.do(yield_jobs(), workers): writer.writerow(row)
def do(self, tasks): import condor condor.defaults.condor_matching = ( "InMastodon" " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" " && (Arch == \"X86_64\")" " && (OpSys == \"LINUX\")" " && (Memory > 1024)") return condor.do(tasks, workers=self._workers)
def main(out_path, runs, repeats=128, workers=0, local=False): """Simulate portfolio and solver behavior.""" logger.info("simulating %i runs", len(runs)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for run in runs: all_data = get_run_data(run["bundle"]) validation = sklearn.cross_validation.ShuffleSplit( len(all_data), repeats, test_fraction=0.2, indices=False) if run["portfolio_name"] == "-": makers = map(borg.experiments.simulate_runs.SolverMaker, all_data.solver_names) else: makers = [ borg.experiments.simulate_runs.PortfolioMaker( run["portfolio_name"]) ] max_instances = len(all_data) * 0.8 for (train_mask, test_mask) in validation: for instances in map( int, map(round, numpy.r_[10.0:max_instances:32j])): for maker in makers: yield ( simulate_run, [ run, maker, all_data, train_mask, test_mask, instances, run["independent"], run["mixture"], ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow([ "description", "solver", "instances", "successes", "mean_time", "median_time" ]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(out_path, experiments, workers=0, local=False): """Run the specified model evaluations.""" logger.info("running %i experiments", len(experiments)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for experiment in experiments: logger.info("preparing experiment: %s", experiment) run_data = get_run_data(experiment["run_data"]) validation = sklearn.cross_validation.KFold(len(run_data), 5, indices=False) (train_mask, test_mask) = iter(validation).next() training = run_data.masked(train_mask).collect_systematic([2]) testing = run_data.masked(test_mask).collect_systematic([4]) feature_counts = range(0, len(run_data.common_features) + 1, 2) replications = xrange(32) parameters = list(itertools.product(feature_counts, replications)) for model_name in experiment["model_names"]: model = borg.experiments.common.train_model( model_name, training) model.name = model_name for (feature_count, _) in parameters: shuffled_names = sorted( run_data.common_features, key=lambda _: numpy.random.random()) selected_names = sorted(shuffled_names[:feature_count]) yield ( evaluate_features, [ model, testing, selected_names, ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["model_name", "features", "score_name", "score"]) for (_, rows) in condor.do(yield_jobs(), workers, local): writer.writerows(rows) out_file.flush()
def main( out_path, portfolio_name, train_bundle, test_bundle, single=None, workers=0, local=False, ): """Simulate portfolio and solver behavior.""" # generate jobs def yield_runs(): train_data = borg.storage.RunData.from_bundle(train_bundle) test_data = borg.storage.RunData.from_bundle(test_bundle) if portfolio_name == "-": if single is None: makers = map(SolverMaker, train_data.solver_names) else: makers = map(SolverMaker, [single]) else: makers = [PortfolioMaker(portfolio_name)] for maker in makers: for _ in xrange(4): yield (simulate_split, [maker, train_data, test_data]) # and run them with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow( ["path", "solver", "budget", "cost", "success", "answer", "split"]) condor.do(yield_runs(), workers, lambda _, r: writer.writerows(r), local)
def main(out_path, runs, repeats=5, workers=0, local=False): """Simulate portfolio and solver behavior.""" logger.info("simulating %i runs", len(runs) * repeats) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for run in runs: train_data = get_run_data(run["train_bundle"]) if run.get("only_nontrivial", False): train_data = train_data.only_nontrivial() if run["test_bundle"] == "-": validation = sklearn.cross_validation.KFold(len(train_data), repeats, indices=False) data_sets = [(train_data.masked(v), train_data.masked(e)) for (v, e) in validation] else: test_data = get_run_data(run["test_bundle"]) if run.get("only_nontrivial", False): test_data = test_data.only_nontrivial() data_sets = [(train_data, test_data)] * repeats if run["portfolio_name"] == "-": makers = map(SolverMaker, train_data.solver_names) else: makers = [PortfolioMaker(run["portfolio_name"])] for maker in makers: for (train_fold_data, test_fold_data) in data_sets: yield (simulate_run, [run, maker, train_fold_data, test_fold_data]) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow( ["category", "solver", "budget", "cost", "success", "split"]) for (_, rows) in condor.do(yield_jobs(), workers, local): writer.writerows(rows) out_file.flush()
def main(out_path, experiments, workers = 0, local = False): """Run the specified model evaluations.""" logger.info("running %i experiments", len(experiments)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for experiment in experiments: logger.info("preparing experiment: %s", experiment) run_data = get_run_data(experiment["run_data"]) validation = sklearn.cross_validation.KFold(len(run_data), 5, indices = False) (train_mask, test_mask) = iter(validation).next() training = run_data.masked(train_mask).collect_systematic([2]) testing = run_data.masked(test_mask).collect_systematic([4]) feature_counts = range(0, len(run_data.common_features) + 1, 2) replications = xrange(32) parameters = list(itertools.product(feature_counts, replications)) for model_name in experiment["model_names"]: model = borg.experiments.common.train_model(model_name, training) model.name = model_name for (feature_count, _) in parameters: shuffled_names = sorted(run_data.common_features, key = lambda _: numpy.random.random()) selected_names = sorted(shuffled_names[:feature_count]) yield ( evaluate_features, [ model, testing, selected_names, ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["model_name", "features", "score_name", "score"]) for (_, rows) in condor.do(yield_jobs(), workers, local): writer.writerows(rows) out_file.flush()
def main(out_path, runs, repeats = 128, workers = 0, local = False): """Simulate portfolio and solver behavior.""" logger.info("simulating %i runs", len(runs)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for run in runs: all_data = get_run_data(run["bundle"]) validation = sklearn.cross_validation.ShuffleSplit(len(all_data), repeats, test_fraction = 0.2, indices = False) if run["portfolio_name"] == "-": makers = map(borg.experiments.simulate_runs.SolverMaker, all_data.solver_names) else: makers = [borg.experiments.simulate_runs.PortfolioMaker(run["portfolio_name"])] max_instances = len(all_data) * 0.8 for (train_mask, test_mask) in validation: for instances in map(int, map(round, numpy.r_[10.0:max_instances:32j])): for maker in makers: yield ( simulate_run, [ run, maker, all_data, train_mask, test_mask, instances, run["independent"], run["mixture"], ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["description", "solver", "instances", "successes", "mean_time", "median_time"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(out_path, runs, repeats = 5, workers = 0, local = False): """Simulate portfolio and solver behavior.""" logger.info("simulating %i runs", len(runs) * repeats) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for run in runs: train_data = get_run_data(run["train_bundle"]) if run.get("only_nontrivial", False): train_data = train_data.only_nontrivial() if run["test_bundle"] == "-": validation = sklearn.cross_validation.KFold(len(train_data), repeats, indices = False) data_sets = [(train_data.masked(v), train_data.masked(e)) for (v, e) in validation] else: test_data = get_run_data(run["test_bundle"]) if run.get("only_nontrivial", False): test_data = test_data.only_nontrivial() data_sets = [(train_data, test_data)] * repeats if run["portfolio_name"] == "-": makers = map(SolverMaker, train_data.solver_names) else: makers = [PortfolioMaker(run["portfolio_name"])] for maker in makers: for (train_fold_data, test_fold_data) in data_sets: yield (simulate_run, [run, maker, train_fold_data, test_fold_data]) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["category", "solver", "budget", "cost", "success", "split"]) for (_, rows) in condor.do(yield_jobs(), workers, local): writer.writerows(rows) out_file.flush()
def main(out_path, experiments, workers=0, local=False): """Run the specified model evaluations.""" logger.info("running %i experiments", len(experiments)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for experiment in experiments: logger.info("preparing experiment: %s", experiment) run_data = get_run_data(experiment["run_data"]) validation = sklearn.cross_validation.ShuffleSplit( len(run_data), 32, test_fraction=0.1, indices=False) max_instance_count = numpy.floor(0.9 * len(run_data)) - 10 instance_counts = map( int, map(round, numpy.r_[10:max_instance_count:24j])) for (train_mask, test_mask) in validation: for instance_count in instance_counts: yield ( evaluate_split, [ run_data, experiment["model_name"], experiment["mixture"], experiment["independent"], instance_count, train_mask, test_mask, ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow( ["model_name", "sampling", "instances", "mean_log_probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(out_path, bundle, workers=0, local=False): """Evaluate the mixture model(s) over a range of component counts.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) planner_names = ["knapsack", "streeter", "bellman"] bin_counts = xrange(1, 121) replications = xrange(16) experiments = itertools.product(planner_names, bin_counts, replications) for (planner_name, bin_count, _) in experiments: if planner_name != "bellman" or bin_count <= 5: yield (run_experiment, [run_data, planner_name, bin_count]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["planner", "bins", "rate"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row)
def main(out_path, bundle, workers = 0, local = False): """Evaluate the mixture model(s) over a range of component counts.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) planner_names = ["knapsack", "streeter", "bellman"] bin_counts = xrange(1, 121) replications = xrange(16) experiments = itertools.product(planner_names, bin_counts, replications) for (planner_name, bin_count, _) in experiments: if planner_name != "bellman" or bin_count <= 5: yield (run_experiment, [run_data, planner_name, bin_count]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["planner", "bins", "rate"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row)
def main(out_path, experiments, workers = 0, local = False): """Run the specified model evaluations.""" logger.info("running %i experiments", len(experiments)) get_run_data = borg.util.memoize(borg.storage.RunData.from_bundle) def yield_jobs(): for experiment in experiments: logger.info("preparing experiment: %s", experiment) run_data = get_run_data(experiment["run_data"]) validation = sklearn.cross_validation.ShuffleSplit(len(run_data), 32, test_fraction = 0.1, indices = False) max_instance_count = numpy.floor(0.9 * len(run_data)) - 10 instance_counts = map(int, map(round, numpy.r_[10:max_instance_count:24j])) for (train_mask, test_mask) in validation: for instance_count in instance_counts: yield ( evaluate_split, [ run_data, experiment["model_name"], experiment["mixture"], experiment["independent"], instance_count, train_mask, test_mask, ], ) with borg.util.openz(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["model_name", "sampling", "instances", "mean_log_probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(domain_name, instances_root, suffix=".features.csv", skip_existing=False, workers=0): """Collect task features.""" condor.defaults.condor_matching = \ "InMastodon" \ " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \ " && (Arch == \"X86_64\")" \ " && (OpSys == \"LINUX\")" \ " && (Memory > 1024)" def yield_runs(): if os.path.exists(domain_name): domain = borg.load_solvers(domain_name).domain else: domain = borg.get_domain(domain_name) paths = list(borg.util.files_under(instances_root, domain.extensions)) count = 0 for path in paths: if skip_existing and os.path.exists(path + suffix): continue count += 1 yield (features_for_path, [domain, path]) logger.info("collecting features for %i instances", count) for (task, (names, values)) in condor.do(yield_runs(), workers): (_, cnf_path) = task.args csv_path = cnf_path + suffix with open(csv_path, "wb") as csv_file: csv.writer(csv_file).writerow(names) csv.writer(csv_file).writerow(values)
def main(out_path, games_path, values_path, neighbors = 8, workers = 0, off_graph = True): """Test value prediction in Go.""" value_list = get_value_list(games_path, values_path) logger.info("number of value samples total: %i", len(value_list)) def yield_jobs(): samples = 20000 shuffled_values = sorted(value_list, key = lambda _: numpy.random.rand()) # randomly sample subset value_dict = dict(shuffled_values[:samples]) test_samples = 20000 if off_graph: test_values = dict(shuffled_values[-test_samples:]) else: test_values = dict(shuffled_values[:test_samples]) logger.info("kept %i board samples", len(value_dict)) avectors_ND = numpy.array(map(specmine.go.board_to_affinity, value_dict)) #for B in numpy.r_[1:200:8j].round().astype(int): for B in [200]: yield (run_graph_features, ["affinity", B, avectors_ND, test_values]) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["map_name", "features", "score_mean", "score_variance"]) for (_, row) in condor.do(yield_jobs(), workers): writer.writerow(row) out_file.flush()
def main(out_path, bundle, workers = 0, local = False): """Evaluate the pure multinomial model over a range of smoothing values.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) validation = sklearn.cross_validation.KFold(len(run_data), 10, indices = False) for (train_mask, test_mask) in validation: split = uuid.uuid4() alphas = numpy.r_[1e-8:1e-1:64j] for alpha in alphas: yield (evaluate_split, [run_data, alpha, split, train_mask, test_mask]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["alpha", "instances", "split", "mean_log_probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(domain_name, instances_root, suffix = ".features.csv", skip_existing = False, workers = 0): """Collect task features.""" condor.defaults.condor_matching = \ "InMastodon" \ " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \ " && (Arch == \"X86_64\")" \ " && (OpSys == \"LINUX\")" \ " && (Memory > 1024)" def yield_runs(): if os.path.exists(domain_name): domain = borg.load_solvers(domain_name).domain else: domain = borg.get_domain(domain_name) paths = list(borg.util.files_under(instances_root, domain.extensions)) count = 0 for path in paths: if skip_existing and os.path.exists(path + suffix): continue count += 1 yield (features_for_path, [domain, path]) logger.info("collecting features for %i instances", count) for (task, (names, values)) in condor.do(yield_runs(), workers): (_, cnf_path) = task.args csv_path = cnf_path + suffix with open(csv_path, "wb") as csv_file: csv.writer(csv_file).writerow(names) csv.writer(csv_file).writerow(values)
def main(out_path, bundle, experiments, workers=0, local=False): """Write the actual output of multiple models.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) for experiment in experiments: yield ( infer_distributions, [ run_data, experiment["model_name"], experiment["instance"], experiment["exclude"], ], ) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow( ["model_name", "instance", "solver", "bin", "probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerows(row)
def main(out_path, bundle, experiments, workers = 0, local = False): """Write the actual output of multiple models.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) for experiment in experiments: yield ( infer_distributions, [ run_data, experiment["model_name"], experiment["instance"], experiment["exclude"], ], ) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["model_name", "instance", "solver", "bin", "probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerows(row)
def main(out_path, bundle, workers = 0, local = False): """Evaluate the mixture model(s) over a range of component counts.""" def yield_jobs(): run_data = borg.storage.RunData.from_bundle(bundle) validation = sklearn.cross_validation.ShuffleSplit(len(run_data), 64, test_fraction = 0.2, indices = False) for (train_mask, test_mask) in validation: split = uuid.uuid4() Ks = range(1, 64, 1) for K in Ks: for model_name in ["mul-dirmix", "mul-dirmatmix"]: yield (evaluate_split, [run_data, model_name, K, split, train_mask, test_mask]) with open(out_path, "w") as out_file: writer = csv.writer(out_file) writer.writerow(["model_name", "components", "instances", "split", "mean_log_probability"]) for (_, row) in condor.do(yield_jobs(), workers, local): writer.writerow(row) out_file.flush()
def main(workers = 0, k = 36, encoding = 'tile', env_size = 9, n_runs = 1, n_reward_samples = 2500, n_reward_runs = 25, lam = 0., gam = 0.995, beta = 0.995, alpha = 1., eta = 0.5, eps = 1e-5, patience = 16, max_iter = 8, l1theta = None, l1code = None, l2code = None, n_samples = None, nonlin = None, nonzero = None, training_methods = None, min_imp = 0.0001, min_delta = 1e-6, fldir = '/scratch/cluster/ccor/feature-learning/', req_rew = False, record_runs = False, ): if n_samples: n_samples = map(int, n_samples.split(',')) beta_ratio = beta/gam # append reward to basis when using perfect info? if training_methods is None: training_methods = [ #(['covariance', 'prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all'],['theta-all','w']]), #(['prediction', 'value_prediction', 'bellman'],[['theta-all'],['theta-all'],['theta-all','w']]), #(['value_prediction'],[['theta-all']]), #(['value_prediction', 'bellman'],[['theta-all'],['theta-all','w']]), #(['prediction'],[['theta-all']]), #(['prediction', 'bellman'], [['theta-all'],['theta-all','w']]), #(['covariance'], [['theta-all']]), (['prediction', 'bellman'], [['theta-all'],['theta-all','w']]), (['bellman', 'prediction', 'bellman'], [['theta-all','w'], ['theta-all'],['theta-all','w']]), (['full_covariance', 'bellman'], [['theta-all'],['theta-all','w']]), (['covariance', 'bellman'], [['theta-all'],['theta-all','w']]), (['full_laplacian'], [['theta-all', 'w']]), (['laplacian'], [['theta-all', 'w']]), (['bellman'], [['theta-all', 'w']]), # baseline ] losses = ['sample-reward', 'test-lsbellman', 'test-bellman', 'test-reward', 'test-model', 'test-fullmodel', # test-training 'true-policy', 'true-policy-uniform', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] \ if n_samples else \ ['sample-reward', 'true-policy-uniform', 'true-policy', 'true-bellman', 'true-lsbellman', 'true-reward', 'true-model', 'true-fullmodel', 'true-lsq'] logger.info('building environment of size %i' % env_size) mdp = grid_world.MDP(walls_on = True, size = env_size) env = mdp.env n_states = env_size**2 m = Model(mdp.R, mdp.P, gam = gam) # create raw data encoder (constant appended in encoder by default) if encoding is 'tabular': encoder = TabularFeatures(env_size, append_const = True) elif encoding is 'tile': encoder = TileFeatures(env_size, append_const = True) elif encoding is 'factored': raise NotImplementedError def sample(n): logger.info('sampling from a grid world') # currently defaults to on-policy sampling n_extra = calc_discount_horizon(lam, gam, eps) - 1 # mdp returns n+1 states and n rewards kw = dict(n_samples = n + n_extra, encoder = encoder, req_rew = req_rew) R, X, _ = mdp.sample_encoding(**kw) if req_rew: logger.info('reward required') assert sum(R.todense()) > 0 logger.info('reward sum: %.2f' % sum(R.todense())) R_val, X_val, _ = mdp.sample_encoding(**kw) R_test, X_test, _ = mdp.sample_encoding(**kw) #losses = ['test-bellman', 'test-reward', 'test-model', #'true-bellman', 'true-reward', 'true-model', 'true-lsq'] # test-training weighting = 'policy' return (X, X_val, X_test), (R, R_val, R_test), weighting def full_info(): logger.info('using perfect information') # gen stacked matrices of I, P, P^2, ... R = numpy.array([]) S = sp.eye(n_states, n_states) P = sp.eye(n_states, n_states) for i in xrange(calc_discount_horizon(lam, gam, eps)): # decay epsilon R = numpy.append(R, P * m.R) P = m.P * P S = sp.vstack((S, P)) X = encoder.encode(S) R = sp.csr_matrix(R[:,None]) X_val = X_test = X R_val = R_test = R #losses = ['true-bellman', 'true-reward', 'true-model'] weighting = 'uniform' return (X, X_val, X_test), (R, R_val, R_test), weighting reg = None if l1theta is not None: reg = ('l1theta', l1theta) if l1code is not None: reg = ('l1code', l1code) if l2code is not None: reg = ('l2code', l2code) run_param_keys = ['k','method','encoding','samples', 'reward_samples', 'reward_runs', 'size','weighting', 'lambda','gamma','alpha', 'eta', 'regularization','nonlinear'] def yield_jobs(): for i,n in enumerate(n_samples or [n_states]): logger.info('creating job with %i samples/states' % n) # build bellman operator matrices logger.info('making mixing matrices') Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = bool(n_samples), eps = eps) for r in xrange(n_runs): n_features = encoder.n_features # initialize parameters theta_init = numpy.random.standard_normal((n_features, k)) theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample or gather full info data X_data, R_data, weighting = sample(n) if n_samples else full_info() bb_params = [n_features, [k], beta_ratio] bb_dict = dict( alpha = alpha, reg_tuple = reg, nonlin = nonlin, nonzero = nonzero, thetas = [theta_init]) for j, tm in enumerate(training_methods): loss_list, wrt_list = tm assert len(loss_list) == len(wrt_list) run_param_values = [k, tm, encoder, n, n_reward_samples, n_reward_runs, env_size, weighting, lam, gam, alpha, eta, reg[0]+str(reg[1]) if reg else 'None', nonlin if nonlin else 'None'] d_run_params = dict(izip(run_param_keys, run_param_values)) yield (train_basis,[d_run_params, bb_params, bb_dict, env, m, losses, # environment, model and loss list X_data, R_data, Mphi, Mrew, # training data max_iter, patience, min_imp, min_delta, # optimization params fldir, record_runs]) # recording params # create output file path date_str = time.strftime('%y%m%d.%X').replace(':','') out_dir = fldir + 'sirf/output/csv/' root = '%s.%s_results' % ( date_str, 'n_samples' if n_samples else 'full_info') d_experiment_params = dict(izip(['k','encoding','size', 'lambda','gamma','alpha','regularization','nl'], [k, encoder, env_size, lam, gam, alpha, reg[0]+str(reg[1]) if reg else 'None', nonlin if nonlin else 'None'])) save_path = out_string(out_dir, root, d_experiment_params, '.csv.gz') logger.info('saving results to %s' % save_path) # get column title list ordered params | losses using dummy dicts d_param = dict(izip(run_param_keys, numpy.zeros(len(run_param_keys)))) d_loss = dict(izip(losses, numpy.zeros(len(run_param_keys)))) col_keys_array,_ = reorder_columns(d_param, d_loss) with openz(save_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(col_keys_array) for (_, out) in condor.do(yield_jobs(), workers): keys, vals = out assert (keys == col_keys_array).all() # todo catch writer.writerow(vals)
def experiment(workers = 80, n_runs = 9, k = 16, env_size = 15, gam = 0.998, lam = 0., eps = 1e-5, partition = None, patience = 8, max_iter = 8, weighting = 'uniform', reward_init = False, nonlin = 1e-8, n_samples = None, beta_ratio = 1., training_methods = None): if training_methods is None: # note: for each loss string, you need a corresponding wrt list if reward_init: training_methods = [ (['prediction'],[['theta-model']]), (['prediction', 'layered'], [['theta-model'],['theta-model','w']]), (['covariance'],[['theta-model']]), # with reward, without fine-tuning (['covariance', 'layered'], [['theta-model'],['theta-model','w']]), # theta-model here for 2nd wrt? (['layered'], [['theta-all','w']])] # baseline else: training_methods = [(['prediction'],[['theta-all']]), (['prediction', 'layered'], [['theta-all'],['theta-all','w']]), (['covariance'],[['theta-all']]), # with reward, without fine-tuning (['covariance', 'layered'], [['theta-all'],['theta-all','w']]), # theta-model here for 2nd wrt? (['layered'], [['theta-all','w']])] # baseline theano.gof.compilelock.set_lock_status(False) theano.config.on_unused_input = 'ignore' theano.config.warn.sum_div_dimshuffle_bug = False if n_samples is None: #n_samples = [100,500] n_samples = numpy.round(numpy.linspace(50,1500,6)).astype(int) if partition is None: partition = {'theta-model':k-1, 'theta-reward':1} mdp = grid_world.MDP(walls_on = True, size = env_size) m = Model(mdp.env.R, mdp.env.P, gam = gam) dim = env_size**2 # tracked losses losses = ['test-bellman', 'test-reward', 'test-model', 'true-bellman', 'true-lsq'] logger.info('losses tracked: '+ str(losses)) #n_extra = bb._calc_n_steps(lam, gam, eps) #print 'n extra sampled needed: ', n_extra d_loss_data = {} for key in losses: d_loss_data[key] = numpy.zeros((len(n_samples), n_runs, len(training_methods))) def yield_jobs(): for i,n in enumerate(n_samples): Mphi, Mrew = BellmanBasis.get_mixing_matrices(n, lam, gam, sampled = True, eps = eps) for r in xrange(n_runs): # initialize features with unit norm theta_init = numpy.random.standard_normal((dim+1, k)) if reward_init: theta_init[:-1,-1] = m.R # XXX set last column to reward theta_init[-1,-1] = 0 theta_init /= numpy.sqrt((theta_init * theta_init).sum(axis=0)) w_init = numpy.random.standard_normal((k+1,1)) w_init = w_init / numpy.linalg.norm(w_init) # sample data: training, validation, and test sets S, Sp, R, _, = mdp.sample_grid_world(n, distribution = weighting); S = numpy.vstack((S, Sp[-1,:])) S_val, Sp_val, R_val, _, = mdp.sample_grid_world(n, distribution = weighting) S_val = scipy.sparse.vstack((S_val, Sp_val[-1,:])) S_test, Sp_test, R_test, _, = mdp.sample_grid_world(n, distribution = weighting) S_test = scipy.sparse.vstack((S_test, Sp_test[-1,:])) bb = BellmanBasis(dim+1, k, beta_ratio, partition = partition, theta = theta_init, w = w_init, record_loss = losses, nonlin = nonlin) for j,tm in enumerate(training_methods): yield (condor_job,[(i,r,j), bb, m, tm, S, R, S_val, R_val, S_test, R_test, Mphi, Mrew, patience, max_iter, weighting]) # aggregate the condor data for (_, result) in condor.do(yield_jobs(), workers): d_batch_loss, ind_tuple = result for name in d_batch_loss.keys(): d_loss_data[name][ind_tuple] = d_batch_loss[name] # save results! pi_root = 'n_samples_results_rinit' if reward_init else 'n_samples_results' out_path = os.getcwd()+'/sirf/output/pickle/%s.no_r.k=%i.l=%s.g=%s.%s.size=%i.r=%i..pickle.gz' \ % (pi_root, k, str(lam), str(gam), weighting, env_size, n_runs) logger.info('saving results to %s' % out_path) with util.openz(out_path, "wb") as out_file: pickle.dump(d_loss_data, out_file, protocol = -1) x = numpy.array(n_samples, dtype = numpy.float64) #range(len(n_samples)) f = plt.figure() logger.info('plotting') plot_styles = ['r-', 'b-', 'g-', 'k-', 'c-', 'm-'] for i,(key,mat) in enumerate(d_loss_data.items()): ax = f.add_subplot(2,3,i+1) # todo generalize for arb length for h,tm in enumerate(training_methods): std = numpy.std(mat[:,:,h], axis=1) mn = numpy.mean(mat[:,:,h], axis=1) if 'test' in key: mn = mn/x std = std/x ax.fill_between(x, mn-std, mn+std, facecolor='yellow', alpha=0.15) ax.plot(x, mn, plot_styles[h], label = str(tm[0])) plt.title(key) #plt.axis('off') #plt.legend(loc = 3) # lower left pl_root = 'n_samples_rinit' if reward_init else 'n_samples' plt.savefig(os.getcwd()+'/sirf/output/plots/%s.n=%i-%i.k=%i.l=%s.g=%s.%s.size=%i.r=%i.pdf' % (n_samples[0], n_samples[-1], pl_root, k, str(lam), str(gam), weighting, env_size, n_runs))
def do(*args, **kwargs): return condor.do(*args, **kwargs)
def measure_feature_performance( \ games_path, values_path, workers = 0,\ affinity_neighbors = 8, interp_neighbors = 8, interp_sigma_sq = -1,\ num_graph_samples = 20000, num_test_samples = 80000, \ max_num_features = 500, ridge_param = 0.01, feature_boost = True, eig_solver='arpack'): value_player = '' if 'random' in values_path: value_player = 'random' elif 'alp' in values_path: value_player = 'alp' out_path = str.format('specmine/static/experiments/go_feature_performance.RMSE.{p}.rp={r}.ngs={g}.nts={t}.nf={f}.nan={an}.nin={inn}.is={sig}.{ei}', \ p=value_player, r = ridge_param, g=num_graph_samples, t=num_test_samples,f=max_num_features, \ an=affinity_neighbors,inn=interp_neighbors, sig = interp_sigma_sq, ei = eig_solver) out_path += '.boost.csv' if feature_boost else '.csv' logger.info('out path: %s', out_path) logger.info('interpolation sigma_sq: %f', interp_sigma_sq) def yield_jobs(): values = get_value_list(games_path,values_path) values = sorted(values, key = lambda _: numpy.random.rand()) # shuffle values full_value_dict = dict(values) sample_boards = full_value_dict.keys()[:num_graph_samples] # load or compute full feature maps full_2x2_temp_map = get_template_map(2, 2, B=numpy.inf, symmetric=True) if feature_boost: # use 2x2 template features as affinity map for building graph aff_map_boost = full_2x2_temp_map full_laplacian_map_boosted = get_laplacian_map(sample_boards, num_samples = num_graph_samples, \ max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map_boost, eig_solver=eig_solver) laplace_map_name_boosted = 'Boosted Laplacian' aff_map = specmine.feature_maps.flat_affinity_map full_laplacian_map = get_laplacian_map(sample_boards, num_samples = num_graph_samples, \ max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map, eig_solver=eig_solver) laplace_map_name = 'Laplacian' full_laplacian_map_small = get_laplacian_map(sample_boards, num_samples = num_graph_samples/2., \ max_eigs = max_num_features, neighbors=affinity_neighbors, affinity_map = aff_map, eig_solver=eig_solver) laplace_map_name_small = 'Laplacian-small' ball_tree = full_laplacian_map.ball_tree values = sorted(values, key = lambda _: numpy.random.rand()) # shuffle again before testing test_values = dict(values[:num_test_samples]) logger.info("number of samples being used for graph features: %i", num_graph_samples) for NF in numpy.r_[0:max_num_features:10j].round().astype(int): yield (run_template_features, [test_values, full_2x2_temp_map, NF], dict(ridge_param = ridge_param)) #yield (run_template_features, [full_3x3_temp_map, NF, test_values]) yield (run_laplacian_features, [test_values, laplace_map_name, full_laplacian_map, NF, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param)) yield (run_laplacian_features, [test_values, laplace_map_name_small, full_laplacian_map_small, NF, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param)) if feature_boost: yield (run_laplacian_features, [test_values, laplace_map_name_boosted, full_laplacian_map_boosted, NF, aff_map_boost], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param)) yield (run_random_features, [test_values, NF, ball_tree, aff_map], dict(interp_neighbors = interp_neighbors, interp_sigma_sq = interp_sigma_sq, ridge_param = ridge_param)) with open(out_path, "wb") as out_file: writer = csv.writer(out_file) writer.writerow(["map_name", "features", "samples", "score_mean", "score_variance"]) for (_, row) in condor.do(yield_jobs(), workers): writer.writerow(row)
def main( suite_path, tasks_root, budget, only_missing=False, store_answers=False, only_solver=None, runs=4, suffix=".runs.csv", workers=0, ): """Collect solver running-time data.""" condor.defaults.condor_matching = \ "InMastodon" \ " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \ " && (Arch == \"X86_64\")" \ " && (OpSys == \"LINUX\")" \ " && (Memory > 1024)" def yield_runs(): suite = borg.load_solvers(suite_path) logger.info("scanning paths under %s", tasks_root) paths = list(borg.util.files_under(tasks_root, suite.domain.extensions)) if not paths: raise ValueError("no paths found under specified root") if only_solver is None: solver_names = suite.solvers.keys() else: solver_names = [only_solver] for path in paths: run_data = None if only_missing and os.path.exists(path + suffix): run_data = numpy.recfromcsv(path + suffix, usemask=True) for solver_name in solver_names: if only_missing and run_data is not None: count = max( 0, runs - numpy.sum(run_data.solver == solver_name)) else: count = runs logger.info("scheduling %i run(s) of %s on %s", count, solver_name, os.path.basename(path)) for _ in xrange(count): seed = numpy.random.randint(sys.maxint) yield (run_solver_on, [ suite_path, solver_name, path, budget, store_answers, seed ]) for (task, row) in condor.do(yield_runs(), workers): # unpack run outcome (cnf_path, solver_name, budget, cost, succeeded, answer) = row if answer is None: answer_text = None else: answer_text = base64.b64encode(zlib.compress(pickle.dumps(answer))) # write it to disk csv_path = cnf_path + suffix existed = os.path.exists(csv_path) with open(csv_path, "a") as csv_file: writer = csv.writer(csv_file) if not existed: writer.writerow( ["solver", "budget", "cost", "succeeded", "answer"]) writer.writerow( [solver_name, budget, cost, succeeded, answer_text])
def main( suite_path, tasks_root, budget, only_missing = False, store_answers = False, only_solver = None, runs = 4, suffix = ".runs.csv", workers = 0, ): """Collect solver running-time data.""" condor.defaults.condor_matching = \ "InMastodon" \ " && regexp(\"rhavan-.*\", ParallelSchedulingGroup)" \ " && (Arch == \"X86_64\")" \ " && (OpSys == \"LINUX\")" \ " && (Memory > 1024)" def yield_runs(): suite = borg.load_solvers(suite_path) logger.info("scanning paths under %s", tasks_root) paths = list(borg.util.files_under(tasks_root, suite.domain.extensions)) if not paths: raise ValueError("no paths found under specified root") if only_solver is None: solver_names = suite.solvers.keys() else: solver_names = [only_solver] for path in paths: run_data = None if only_missing and os.path.exists(path + suffix): run_data = numpy.recfromcsv(path + suffix, usemask = True) for solver_name in solver_names: if only_missing and run_data is not None: count = max(0, runs - numpy.sum(run_data.solver == solver_name)) else: count = runs logger.info("scheduling %i run(s) of %s on %s", count, solver_name, os.path.basename(path)) for _ in xrange(count): seed = numpy.random.randint(sys.maxint) yield (run_solver_on, [suite_path, solver_name, path, budget, store_answers, seed]) for (task, row) in condor.do(yield_runs(), workers): # unpack run outcome (cnf_path, solver_name, budget, cost, succeeded, answer) = row if answer is None: answer_text = None else: answer_text = base64.b64encode(zlib.compress(pickle.dumps(answer))) # write it to disk csv_path = cnf_path + suffix existed = os.path.exists(csv_path) with open(csv_path, "a") as csv_file: writer = csv.writer(csv_file) if not existed: writer.writerow(["solver", "budget", "cost", "succeeded", "answer"]) writer.writerow([solver_name, budget, cost, succeeded, answer_text])
def do(*args, **kwargs): import condor return condor.do(*args, **kwargs)
def main(): calls = [(f, [x]) for x in range(16)] for (call, result) in condor.do(calls, 4): print call.args, result