Пример #1
0
    def __init__(self, comm=None, debug=False, loadbalance=False, comms=None):
        self.comm = MPI.COMM_WORLD if comm is None else comm
        self.rank = self.comm.Get_rank()
        self.size = self.comm.Get_size() - 1
        self.debug = debug
        self.function = _error_function
        self.loadbalance = loadbalance
        self.comms = comms
        '''
        if not self.is_master() and (comms is None or len(self.comms) != 1):
            raise RuntimeError("Invalid number of group:" + str(len(comms)))
        '''
        self.node_name_to_tag = {}

        if self.is_master():
            for i, s in enumerate(comms.keys()):
                self.node_name_to_tag[s] = i

        self.node_name_to_tag = self.comm.bcast(self.node_name_to_tag, root=0)
        self.tag_to_node_name = {v: k for k, v in self.node_name_to_tag.items()}
        self.node_name_to_rank = mpi_utility.mpi_gather_hostnames(comm=self.comm, include_root=False)
        if not self.is_master() and helper_functions.get_hostname() not in self.node_name_to_tag:
            raise RuntimeError("Node " + helper_functions.get_hostname() + " not in node_name_to_tag")
        if self.size == 0:
            raise ValueError("Tried to create an MPI pool, but there "
                             "was only one MPI process available. "
                             "Need at least two.")
        if self.num_groups == 0:
            raise ValueError("Tried to create an MPI pool, but there were no groups available")
Пример #2
0
def mpi_run_main_args(args):
    #return None
    args = list(args)
    if len(mpi_comms) > 1 or parallelize_cv:
        my_comm = mpi_comms[helper_functions.get_hostname()]
        args.append(my_comm)
    main.run_main_args(args)
Пример #3
0
def mpi_run_main_args(args):
    #return None
    args = list(args)
    if len(mpi_comms) > 1 or parallelize_cv:
        my_comm = mpi_comms[helper_functions.get_hostname()]
        args.append(my_comm)
    main.run_main_args(args)
Пример #4
0
def mpi_rollcall():
    comm = MPI.COMM_WORLD
    s = comm.Get_size()
    rank = comm.Get_rank()
    for i in range(s):
        if rank == i:
            hostname = helper_functions.get_hostname()
            print '(' + hostname + '): ' + str(rank) + ' of ' + str(s)
        comm.Barrier()
Пример #5
0
def mpi_rollcall():
    comm = MPI.COMM_WORLD
    s = comm.Get_size()
    rank = comm.Get_rank()
    for i in range(s):
        if rank == i:
            hostname = helper_functions.get_hostname()
            print '(' + hostname + '): ' + str(rank) + ' of ' + str(s)
        comm.Barrier()
Пример #6
0
def test_mpi_func(i):

    local_comm = mpi_comms[helper_functions.get_hostname()]

    local_comm.Barrier()
    if local_comm.Get_rank() == 0:
        mpi_utility.mpi_print('My Group: ' + str(i),local_comm)
    local_comm.Barrier()

    #mpi_utility.mpi_print(str(i))
    return 'Hello World'
Пример #7
0
    def run_experiments(self):
        data_file = self.configs.data_file
        data_and_splits = helper_functions.load_object(data_file)
        data_and_splits.data.repair_data()
        assert self.configs.num_splits <= len(data_and_splits.splits)
        data_and_splits.labels_to_keep = self.configs.labels_to_keep
        data_and_splits.labels_to_not_sample = self.configs.labels_to_not_sample
        data_and_splits.target_labels = self.configs.target_labels
        data_and_splits.data.repair_data()
        results_file = self.configs.results_file
        comm = mpi_utility.get_comm()
        if os.path.isfile(results_file):
            if mpi_utility.is_group_master():
                print results_file + ' already exists - skipping'
            return            
        if mpi_utility.is_group_master():
            hostname = helper_functions.get_hostname()
            print '(' + hostname  + ') Running experiments: ' + results_file
        learner = self.configs.learner
        learner.run_pre_experiment_setup(data_and_splits)
        num_labels = len(self.configs.num_labels)
        num_splits = self.configs.num_splits
        #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits)
        method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits)
        for i, nl in enumerate(self.configs.num_labels):
            method_results.results_list[i].num_labels = nl

        split_idx = self.configs.split_idx
        if split_idx is not None:
            num_labels_list = list(itertools.product(range(num_labels), [split_idx]))
        else:
            num_labels_list = list(itertools.product(range(num_labels), range(num_splits)))

        shared_args = (self, results_file, data_and_splits, method_results)
        args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list]
        if self.configs.use_pool:
            pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size)
            all_results = pool.map(_run_experiment, args)
        else:
            all_results = [_run_experiment(a) for a in args]
        for curr_results,s in zip(all_results,num_labels_list):
            if curr_results is None:
                continue
            i_labels, split = s
            method_results.set(curr_results, i_labels, split)

        method_results.configs = self.configs
        if self.configs.should_load_temp_data:
            helper_functions.save_object(results_file,method_results)
            for i_labels, split in num_labels_list:
                num_labels = self.configs.num_labels[i_labels]
                _delete_temp_split_files(results_file, num_labels, split)
            _delete_temp_folder(results_file)
Пример #8
0
def mpi_gather_hostnames(comm=MPI.COMM_WORLD, include_root=False):
    hostname = helper_functions.get_hostname()
    all_hostnames = comm.gather(hostname, root=0)
    all_hostnames = comm.bcast(all_hostnames, root=0)
    host_to_rank = {}
    for i, s in enumerate(all_hostnames):
        if not include_root and i == 0:
            continue
        if s not in host_to_rank:
            host_to_rank[s] = set()
        host_to_rank[s].add(i)
    host_to_rank = comm.bcast(host_to_rank, root=0)
    return host_to_rank
Пример #9
0
def mpi_gather_hostnames(comm=MPI.COMM_WORLD, include_root=False):
    hostname = helper_functions.get_hostname()
    all_hostnames = comm.gather(hostname, root=0)
    all_hostnames = comm.bcast(all_hostnames, root=0)
    host_to_rank = {}
    for i, s in enumerate(all_hostnames):
        if not include_root and i == 0:
            continue
        if s not in host_to_rank:
            host_to_rank[s] = set()
        host_to_rank[s].add(i)
    host_to_rank = comm.bcast(host_to_rank, root=0)
    return host_to_rank
Пример #10
0
    def __init__(self, comm=None, debug=False, loadbalance=False, comms=None):
        self.comm = MPI.COMM_WORLD if comm is None else comm
        self.rank = self.comm.Get_rank()
        self.size = self.comm.Get_size() - 1
        self.debug = debug
        self.function = _error_function
        self.loadbalance = loadbalance
        self.comms = comms
        '''
        if not self.is_master() and (comms is None or len(self.comms) != 1):
            raise RuntimeError("Invalid number of group:" + str(len(comms)))
        '''
        self.node_name_to_tag = {}

        if self.is_master():
            for i, s in enumerate(comms.keys()):
                self.node_name_to_tag[s] = i

        self.node_name_to_tag = self.comm.bcast(self.node_name_to_tag, root=0)
        self.tag_to_node_name = {
            v: k
            for k, v in self.node_name_to_tag.items()
        }
        self.node_name_to_rank = mpi_utility.mpi_gather_hostnames(
            comm=self.comm, include_root=False)
        if not self.is_master() and helper_functions.get_hostname(
        ) not in self.node_name_to_tag:
            raise RuntimeError("Node " + helper_functions.get_hostname() +
                               " not in node_name_to_tag")
        if self.size == 0:
            raise ValueError("Tried to create an MPI pool, but there "
                             "was only one MPI process available. "
                             "Need at least two.")
        if self.num_groups == 0:
            raise ValueError(
                "Tried to create an MPI pool, but there were no groups available"
            )
Пример #11
0
    def run_experiments(self):
        data_file = self.configs.data_file
        data_and_splits = self.load_data_and_splits(data_file)
        results_file = self.configs.results_file
        comm = mpi_utility.get_comm()
        if os.path.isfile(results_file):
            if mpi_utility.is_group_master():
                print results_file + ' already exists - skipping'
            return            
        if mpi_utility.is_group_master():
            hostname = helper_functions.get_hostname()
            print '(' + hostname  + ') Running experiments: ' + results_file
        learner = self.configs.learner
        learner.run_pre_experiment_setup(data_and_splits)
        num_labels = len(self.configs.num_labels)
        num_splits = self.configs.num_splits
        #method_results = results.MethodResults(n_exp=num_labels, n_splits=num_splits)
        method_results = self.configs.method_results_class(n_exp=num_labels, n_splits=num_splits)
        for i, nl in enumerate(self.configs.num_labels):
            method_results.results_list[i].num_labels = nl

        split_idx = self.configs.split_idx
        if split_idx is not None:
            num_labels_list = list(itertools.product(range(num_labels), [split_idx]))
        else:
            num_labels_list = list(itertools.product(range(num_labels), range(num_splits)))

        shared_args = (self, results_file, data_and_splits, method_results)
        args = [shared_args + (i_labels, split) for i_labels,split in num_labels_list]
        if self.configs.use_pool:
            pool = multiprocessing_utility.LoggingPool(processes=self.configs.pool_size)
            all_results = pool.map(_run_experiment, args)
        else:
            all_results = [_run_experiment(a) for a in args]
        for curr_results,s in zip(all_results,num_labels_list):
            if curr_results is None:
                continue
            i_labels, split = s
            method_results.set(curr_results, i_labels, split)

        method_results.configs = self.configs
        if self.configs.should_load_temp_data:
            helper_functions.save_object(results_file,method_results)
            for i_labels, split in num_labels_list:
                num_labels = self.configs.num_labels[i_labels]
                _delete_temp_split_files(results_file, num_labels, split)
            _delete_temp_folder(results_file)
Пример #12
0
 def get_tag(self):
     if self.is_master():
         raise RuntimeError("Master node doesn't have a tag.")
     return self.node_name_to_tag[helper_functions.get_hostname()]
Пример #13
0
 def is_group_root(self):
     group_members = self.node_name_to_rank[helper_functions.get_hostname()]
     if 0 in group_members:
         group_members.remove(0)
     return self.rank == min(group_members)
Пример #14
0
 def get_tag(self):
     if self.is_master():
         raise RuntimeError("Master node doesn't have a tag.")
     return self.node_name_to_tag[helper_functions.get_hostname()]
Пример #15
0
 def is_group_root(self):
     group_members = self.node_name_to_rank[helper_functions.get_hostname()]
     if 0 in group_members:
         group_members.remove(0)
     return self.rank == min(group_members)