예제 #1
0
def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split):
    num_labels = self.configs.num_labels[i_labels]
    s = str(num_labels) + '-' + str(split)
    curr_results = _load_temp_split_file(results_file, num_labels, split)
    if curr_results:
        return curr_results
    if mpi_utility.is_master():
        timer.tic()
    #print 'num_labels-split: ' + s
    temp_file_name = _temp_split_file_name(results_file, num_labels, split)
    temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl')
    temp_dir = temp_dir_root + '/CV-temp/'
    curr_data = data_and_splits.get_split(split, num_labels)
    learner = self.configs.learner
    curr_learner = copy.deepcopy(learner)
    curr_learner.split_idx_str = s
    curr_learner.temp_dir = temp_dir
    curr_results = curr_learner.train_and_test(curr_data)
    if mpi_utility.is_group_master():
        helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results)
        helper_functions.delete_dir_if_exists(temp_dir_root)
    instance_subset = learner.configs.instance_subset
    results_features = learner.configs.results_features
    test_error_to_print = 'is_train'
    if mpi_utility.is_group_master():
        if hasattr(curr_learner, 'best_params'):
            print s + '-' + str(curr_learner.best_params) + ' Error: ' + \
                  str(curr_results.compute_error(self.configs.loss_function, results_features, test_error_to_print))
        else:
            print s + ' Done'
    if mpi_utility.is_master():
        timer.toc()
    return curr_results
예제 #2
0
def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split):
    num_labels = self.configs.num_labels[i_labels]
    s = str(num_labels) + '-' + str(split)
    curr_results = _load_temp_split_file(results_file, num_labels, split)
    if curr_results:
        return curr_results
    #print 'num_labels-split: ' + s
    temp_file_name = _temp_split_file_name(results_file, num_labels, split)
    temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl')
    temp_dir = temp_dir_root + '/CV-temp/'
    curr_data = data_and_splits.get_split(split, num_labels)
    learner = self.configs.learner
    curr_learner = copy.deepcopy(learner)
    curr_learner.split_idx_str = s
    curr_learner.temp_dir = temp_dir
    curr_results = curr_learner.train_and_test(curr_data)
    if mpi_utility.is_group_master():
        helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results)
        helper_functions.delete_dir_if_exists(temp_dir_root)
    if mpi_utility.is_group_master():
        if hasattr(curr_learner, 'best_params'):
            print s + '-' + str(curr_learner.best_params) + ' Error: ' + str(curr_results.compute_error(self.configs.loss_function))
        else:
            print s + ' Done'
    return curr_results
예제 #3
0
    def run_experiments(self):
        data_file = self.configs.data_file
        data_and_splits = helper_functions.load_object(data_file)
        data_and_splits.data.repair_data()
        assert self.configs.num_splits <= len(data_and_splits.splits)
        data_and_splits.labels_to_keep = self.configs.labels_to_keep
        data_and_splits.labels_to_not_sample = self.configs.labels_to_not_sample
        data_and_splits.target_labels = self.configs.target_labels
        data_and_splits.data.repair_data()
        results_file = self.configs.results_file
        comm = mpi_utility.get_comm()
        if os.path.isfile(results_file):
            if mpi_utility.is_group_master():
                print results_file + ' already exists - skipping'
            return            
        if mpi_utility.is_group_master():
            hostname = helper_functions.get_hostname()
            print '(' + hostname  + ') Running experiments: ' + results_file
        learner = self.configs.learner
        learner.run_pre_experiment_setup(data_and_splits)
        num_labels = len(self.configs.num_labels)
        num_splits = self.configs.num_splits
        #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits)
        method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits)
        for i, nl in enumerate(self.configs.num_labels):
            method_results.results_list[i].num_labels = nl

        split_idx = self.configs.split_idx
        if split_idx is not None:
            num_labels_list = list(itertools.product(range(num_labels), [split_idx]))
        else:
            num_labels_list = list(itertools.product(range(num_labels), range(num_splits)))

        shared_args = (self, results_file, data_and_splits, method_results)
        args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list]
        if self.configs.use_pool:
            pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size)
            all_results = pool.map(_run_experiment, args)
        else:
            all_results = [_run_experiment(a) for a in args]
        for curr_results,s in zip(all_results,num_labels_list):
            if curr_results is None:
                continue
            i_labels, split = s
            method_results.set(curr_results, i_labels, split)

        method_results.configs = self.configs
        if self.configs.should_load_temp_data:
            helper_functions.save_object(results_file,method_results)
            for i_labels, split in num_labels_list:
                num_labels = self.configs.num_labels[i_labels]
                _delete_temp_split_files(results_file, num_labels, split)
            _delete_temp_folder(results_file)
예제 #4
0
def run_main(num_labels=None, split_idx=None, no_viz=None, configs=None, comm=None):
    import argparse
    import sys
    #print sys.argv
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_labels', type=int)
    parser.add_argument('-split_idx', type=int)
    parser.add_argument('-no_viz', action='store_true')
    arguments = parser.parse_args(sys.argv[1:])
    if num_labels is not None:
        arguments.num_labels = num_labels
    if split_idx is not None:
        arguments.split_idx = split_idx
    if no_viz is not None:
        arguments.no_viz = no_viz

    configs_lib.comm = comm
    if test_mpi:
        from mpi4py import MPI
        print str(MPI.COMM_WORLD.Get_rank()) + '-' + str(arguments.num_labels) + '-' + str(arguments.split_idx)
        return

    configs_lib.arguments = arguments
    import warnings
    #print 'Ignoring Deprecation Warnings'
    warnings.filterwarnings("ignore",category=DeprecationWarning)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    if MPI.COMM_WORLD.Get_size() > 1:
        if mpi_utility.is_group_master():
            print '(' + socket.gethostname() + ')''Process ' + str(comm.Get_rank()) + ': Starting experiments...'
    else:
        print 'Starting experiments...'
    if mpi_utility.is_group_master():
        timer.tic()
    if configs_lib.run_experiments:
        run_experiments(configs)
    if mpi_utility.is_group_master():
        timer.toc()
    if helper_functions.is_laptop():
        import winsound
        winsound.Beep(440, 1000)
    if helper_functions.is_laptop() and not arguments.no_viz and MPI.COMM_WORLD.Get_size() == 1:
        vis_configs = configs_lib.VisualizationConfigs()
        if vis_configs.vis_table:
            create_table()
        else:
            run_visualization()
예제 #5
0
    def run_experiments(self):
        data_file = self.configs.data_file
        data_and_splits = self.load_data_and_splits(data_file)
        results_file = self.configs.results_file
        comm = mpi_utility.get_comm()
        if os.path.isfile(results_file):
            if mpi_utility.is_group_master():
                print results_file + ' already exists - skipping'
            return            
        if mpi_utility.is_group_master():
            hostname = helper_functions.get_hostname()
            print '(' + hostname  + ') Running experiments: ' + results_file
        learner = self.configs.learner
        learner.run_pre_experiment_setup(data_and_splits)
        num_labels = len(self.configs.num_labels)
        num_splits = self.configs.num_splits
        #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits)
        method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits)
        for i, nl in enumerate(self.configs.num_labels):
            method_results.results_list[i].num_labels = nl

        split_idx = self.configs.split_idx
        if split_idx is not None:
            num_labels_list = list(itertools.product(range(num_labels), [split_idx]))
        else:
            num_labels_list = list(itertools.product(range(num_labels), range(num_splits)))

        shared_args = (self, results_file, data_and_splits, method_results)
        args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list]
        if self.configs.use_pool:
            pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size)
            all_results = pool.map(_run_experiment, args)
        else:
            all_results = [_run_experiment(a) for a in args]
        for curr_results,s in zip(all_results,num_labels_list):
            if curr_results is None:
                continue
            i_labels, split = s
            method_results.set(curr_results, i_labels, split)

        method_results.configs = self.configs
        if self.configs.should_load_temp_data:
            helper_functions.save_object(results_file,method_results)
            for i_labels, split in num_labels_list:
                num_labels = self.configs.num_labels[i_labels]
                _delete_temp_split_files(results_file, num_labels, split)
            _delete_temp_folder(results_file)