def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split): num_labels = self.configs.num_labels[i_labels] s = str(num_labels) + '-' + str(split) curr_results = _load_temp_split_file(results_file, num_labels, split) if curr_results: return curr_results if mpi_utility.is_master(): timer.tic() #print 'num_labels-split: ' + s temp_file_name = _temp_split_file_name(results_file, num_labels, split) temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl') temp_dir = temp_dir_root + '/CV-temp/' curr_data = data_and_splits.get_split(split, num_labels) learner = self.configs.learner curr_learner = copy.deepcopy(learner) curr_learner.split_idx_str = s curr_learner.temp_dir = temp_dir curr_results = curr_learner.train_and_test(curr_data) if mpi_utility.is_group_master(): helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results) helper_functions.delete_dir_if_exists(temp_dir_root) instance_subset = learner.configs.instance_subset results_features = learner.configs.results_features test_error_to_print = 'is_train' if mpi_utility.is_group_master(): if hasattr(curr_learner, 'best_params'): print s + '-' + str(curr_learner.best_params) + ' Error: ' + \ str(curr_results.compute_error(self.configs.loss_function, results_features, test_error_to_print)) else: print s + ' Done' if mpi_utility.is_master(): timer.toc() return curr_results
def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split): num_labels = self.configs.num_labels[i_labels] s = str(num_labels) + '-' + str(split) curr_results = _load_temp_split_file(results_file, num_labels, split) if curr_results: return curr_results #print 'num_labels-split: ' + s temp_file_name = _temp_split_file_name(results_file, num_labels, split) temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl') temp_dir = temp_dir_root + '/CV-temp/' curr_data = data_and_splits.get_split(split, num_labels) learner = self.configs.learner curr_learner = copy.deepcopy(learner) curr_learner.split_idx_str = s curr_learner.temp_dir = temp_dir curr_results = curr_learner.train_and_test(curr_data) if mpi_utility.is_group_master(): helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results) helper_functions.delete_dir_if_exists(temp_dir_root) if mpi_utility.is_group_master(): if hasattr(curr_learner, 'best_params'): print s + '-' + str(curr_learner.best_params) + ' Error: ' + str(curr_results.compute_error(self.configs.loss_function)) else: print s + ' Done' return curr_results
def run_experiments(self): data_file = self.configs.data_file data_and_splits = helper_functions.load_object(data_file) data_and_splits.data.repair_data() assert self.configs.num_splits <= len(data_and_splits.splits) data_and_splits.labels_to_keep = self.configs.labels_to_keep data_and_splits.labels_to_not_sample = self.configs.labels_to_not_sample data_and_splits.target_labels = self.configs.target_labels data_and_splits.data.repair_data() results_file = self.configs.results_file comm = mpi_utility.get_comm() if os.path.isfile(results_file): if mpi_utility.is_group_master(): print results_file + ' already exists - skipping' return if mpi_utility.is_group_master(): hostname = helper_functions.get_hostname() print '(' + hostname + ') Running experiments: ' + results_file learner = self.configs.learner learner.run_pre_experiment_setup(data_and_splits) num_labels = len(self.configs.num_labels) num_splits = self.configs.num_splits #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits) method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits) for i, nl in enumerate(self.configs.num_labels): method_results.results_list[i].num_labels = nl split_idx = self.configs.split_idx if split_idx is not None: num_labels_list = list(itertools.product(range(num_labels), [split_idx])) else: num_labels_list = list(itertools.product(range(num_labels), range(num_splits))) shared_args = (self, results_file, data_and_splits, method_results) args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list] if self.configs.use_pool: pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size) all_results = pool.map(_run_experiment, args) else: all_results = [_run_experiment(a) for a in args] for curr_results,s in zip(all_results,num_labels_list): if curr_results is None: continue i_labels, split = s method_results.set(curr_results, i_labels, split) method_results.configs = self.configs if self.configs.should_load_temp_data: helper_functions.save_object(results_file,method_results) for i_labels, split in num_labels_list: num_labels = self.configs.num_labels[i_labels] _delete_temp_split_files(results_file, num_labels, split) _delete_temp_folder(results_file)
def run_main(num_labels=None, split_idx=None, no_viz=None, configs=None, comm=None): import argparse import sys #print sys.argv parser = argparse.ArgumentParser() parser.add_argument('-num_labels', type=int) parser.add_argument('-split_idx', type=int) parser.add_argument('-no_viz', action='store_true') arguments = parser.parse_args(sys.argv[1:]) if num_labels is not None: arguments.num_labels = num_labels if split_idx is not None: arguments.split_idx = split_idx if no_viz is not None: arguments.no_viz = no_viz configs_lib.comm = comm if test_mpi: from mpi4py import MPI print str(MPI.COMM_WORLD.Get_rank()) + '-' + str(arguments.num_labels) + '-' + str(arguments.split_idx) return configs_lib.arguments = arguments import warnings #print 'Ignoring Deprecation Warnings' warnings.filterwarnings("ignore",category=DeprecationWarning) from mpi4py import MPI comm = MPI.COMM_WORLD if MPI.COMM_WORLD.Get_size() > 1: if mpi_utility.is_group_master(): print '(' + socket.gethostname() + ')''Process ' + str(comm.Get_rank()) + ': Starting experiments...' else: print 'Starting experiments...' if mpi_utility.is_group_master(): timer.tic() if configs_lib.run_experiments: run_experiments(configs) if mpi_utility.is_group_master(): timer.toc() if helper_functions.is_laptop(): import winsound winsound.Beep(440, 1000) if helper_functions.is_laptop() and not arguments.no_viz and MPI.COMM_WORLD.Get_size() == 1: vis_configs = configs_lib.VisualizationConfigs() if vis_configs.vis_table: create_table() else: run_visualization()
def run_experiments(self): data_file = self.configs.data_file data_and_splits = self.load_data_and_splits(data_file) results_file = self.configs.results_file comm = mpi_utility.get_comm() if os.path.isfile(results_file): if mpi_utility.is_group_master(): print results_file + ' already exists - skipping' return if mpi_utility.is_group_master(): hostname = helper_functions.get_hostname() print '(' + hostname + ') Running experiments: ' + results_file learner = self.configs.learner learner.run_pre_experiment_setup(data_and_splits) num_labels = len(self.configs.num_labels) num_splits = self.configs.num_splits #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits) method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits) for i, nl in enumerate(self.configs.num_labels): method_results.results_list[i].num_labels = nl split_idx = self.configs.split_idx if split_idx is not None: num_labels_list = list(itertools.product(range(num_labels), [split_idx])) else: num_labels_list = list(itertools.product(range(num_labels), range(num_splits))) shared_args = (self, results_file, data_and_splits, method_results) args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list] if self.configs.use_pool: pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size) all_results = pool.map(_run_experiment, args) else: all_results = [_run_experiment(a) for a in args] for curr_results,s in zip(all_results,num_labels_list): if curr_results is None: continue i_labels, split = s method_results.set(curr_results, i_labels, split) method_results.configs = self.configs if self.configs.should_load_temp_data: helper_functions.save_object(results_file,method_results) for i_labels, split in num_labels_list: num_labels = self.configs.num_labels[i_labels] _delete_temp_split_files(results_file, num_labels, split) _delete_temp_folder(results_file)