Пример #1
0
def distribute(mat):
    """Distributes the mat from root to individual nodes
    
    The data will be distributed along the first axis, as even as possible.
    You should make sure that the matrix is in C-contiguous format.
    """
    # quick check
    if SIZE == 1:
        return mat
    if is_root():
        shape = mat.shape[1:]
        segments = get_segments(mat.shape[0])
        dtype = mat.dtype
    else:
        shape = None
        segments = None
        dtype = None
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    segments = COMM.bcast(segments)
    if is_root():
        if mat.flags['C_CONTIGUOUS'] != True:
            logging.warning('Warning: mat is not contiguous.')
            mat = np.ascontiguousarray(mat)
        for i in range(1, SIZE):
            safe_send_matrix(mat[segments[i]:segments[i + 1]], dest=i)
        data = mat[:segments[1]].copy()
    else:
        data = np.empty((segments[RANK + 1] - segments[RANK], ) + shape,
                        dtype=dtype)
        safe_recv_matrix(data)
    return data
Пример #2
0
def distribute(mat):
    """Distributes the mat from root to individual nodes
    
    The data will be distributed along the first axis, as even as possible.
    You should make sure that the matrix is in C-contiguous format.
    """
    # quick check
    if SIZE == 1:
        return mat
    if is_root():
        shape = mat.shape[1:]
        segments = get_segments(mat.shape[0])
        dtype = mat.dtype
    else:
        shape = None
        segments = None
        dtype = None
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    segments = COMM.bcast(segments)
    if is_root():
        if mat.flags['C_CONTIGUOUS'] != True:
            logging.warning('Warning: mat is not contiguous.')
            mat = np.ascontiguousarray(mat)
        for i in range(1,SIZE):
            safe_send_matrix(mat[segments[i]:segments[i+1]], dest=i)
        data = mat[:segments[1]].copy()
    else:
        data = np.empty((segments[RANK+1] - segments[RANK],) + shape,
                        dtype = dtype)
        safe_recv_matrix(data)
    return data
Пример #3
0
def load_matrix_multi(filename, N = None):
    """Loads the matrix previously dumped by dump_matrix_multi. The MPI size 
    might be different. The stored files are in the format
    filename-xxxxx-of-xxxxx, which we obtain using glob.
    
    Input:
        N: (optional) if given, specify the number of parts the matrix is
            separated too. Otherwise, the number is automatically inferred by
            listing all the files using regexp matching.
    """
    files= glob.glob('%s-?????-of-?????.npy' % (filename))
    N = len(files)
    logging.debug("Loading the matrix from %d parts" % N)
    # we will load the length of the data, and then try to distribute them
    # as even as possible.
    if RANK == 0:
        # the root will first taste each file
        sizes = np.array([np.load('%s-%05d-of-%05d.npy' % (filename, i, N),
                                  mmap_mode='r').shape[0]
                          for i in range(N)])
        temp = np.load('%s-%05d-of-%05d.npy' % (filename, 0, N),
                                  mmap_mode='r')
        shape = temp.shape[1:]
        dtype = temp.dtype
    else:
        sizes = None
        shape = None
        dtype = None
    barrier()
    sizes = COMM.bcast(sizes)
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    total = sizes.sum()
    segments = get_segments(total)
    # now, each node opens the file that overlaps with its data, and reads
    # the contents.
    my_start = segments[RANK]
    my_end = segments[RANK+1]
    my_size = my_end - my_start
    mat = np.empty((my_size,) + shape, dtype = dtype)
    mat = np.empty((my_size,) + shape)
    f_start = 0
    f_end = 0
    for i, size in enumerate(sizes):
        f_end += size
        if f_start < my_end and f_end > my_start:
            file_mat = np.load('%s-%05d-of-%05d.npy' % (filename, i, N),
                                mmap_mode='r')
            mat[max(f_start - my_start, 0):\
                min(f_end - my_start, my_size)] = \
                    file_mat[max(my_start - f_start,0):\
                             min(my_end - f_start, size)]
        f_start += size
    return mat
Пример #4
0
def load_matrix_multi(filename, N=None):
    """Loads the matrix previously dumped by dump_matrix_multi. The MPI size 
    might be different. The stored files are in the format
    filename-xxxxx-of-xxxxx, which we obtain using glob.
    
    Input:
        N: (optional) if given, specify the number of parts the matrix is
            separated too. Otherwise, the number is automatically inferred by
            listing all the files using regexp matching.
    """
    files = glob.glob('%s-?????-of-?????.npy' % (filename))
    N = len(files)
    logging.debug("Loading the matrix from %d parts" % N)
    # we will load the length of the data, and then try to distribute them
    # as even as possible.
    if RANK == 0:
        # the root will first taste each file
        sizes = np.array([
            np.load('%s-%05d-of-%05d.npy' % (filename, i, N),
                    mmap_mode='r').shape[0] for i in range(N)
        ])
        temp = np.load('%s-%05d-of-%05d.npy' % (filename, 0, N), mmap_mode='r')
        shape = temp.shape[1:]
        dtype = temp.dtype
    else:
        sizes = None
        shape = None
        dtype = None
    barrier()
    sizes = COMM.bcast(sizes)
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    total = sizes.sum()
    segments = get_segments(total)
    # now, each node opens the file that overlaps with its data, and reads
    # the contents.
    my_start = segments[RANK]
    my_end = segments[RANK + 1]
    my_size = my_end - my_start
    mat = np.empty((my_size, ) + shape, dtype=dtype)
    mat = np.empty((my_size, ) + shape)
    f_start = 0
    f_end = 0
    for i, size in enumerate(sizes):
        f_end += size
        if f_start < my_end and f_end > my_start:
            file_mat = np.load('%s-%05d-of-%05d.npy' % (filename, i, N),
                               mmap_mode='r')
            mat[max(f_start - my_start, 0):\
                min(f_end - my_start, my_size)] = \
                    file_mat[max(my_start - f_start,0):\
                             min(my_end - f_start, size)]
        f_start += size
    return mat
Пример #5
0
def distribute_list(source):
    """Distributes the list from root to individual nodes
    """
    # quick check
    if SIZE == 1:
        return source
    if is_root():
        length = len(source)
        if length == 0:
            logging.warning("Warning: List has length 0")
    else:
        length = 0
    length = COMM.bcast(length)
    if length == 0:
        return []
    segments = get_segments(length)
    if is_root():
        for i in range(1, SIZE):
            send_list = source[segments[i]:segments[i + 1]]
            COMM.send(send_list, dest=i)
        data = source[:segments[1]]
        del source
    else:
        data = COMM.recv()
    return data
Пример #6
0
def distribute_list(source):
    """Distributes the list from root to individual nodes
    """
    # quick check
    if SIZE == 1:
        return source
    if is_root():
        length = len(source)
        if length == 0:
            logging.warning("Warning: List has length 0")
    else:
        length = 0
    length = COMM.bcast(length)
    if length == 0:
        return []
    segments = get_segments(length)
    if is_root():
        for i in range(1,SIZE):
            send_list = source[segments[i]:segments[i+1]]
            COMM.send(send_list, dest=i)
        data = source[:segments[1]]
        del source
    else:
        data = COMM.recv()
    return data
Пример #7
0
def elect():
    '''elect() randomly chooses a node from all the nodes as the president.
    Input:
        None
    Output:
        the rank of the president
    '''
    president = COMM.bcast(np.random.randint(SIZE))
    return president
Пример #8
0
def elect():
    '''elect() randomly chooses a node from all the nodes as the president.
    Input:
        None
    Output:
        the rank of the president
    '''
    president = COMM.bcast(np.random.randint(SIZE))
    return president
Пример #9
0
def agree(decision):
    """agree() makes the decision consistent by propagating the decision of the
    root to everyone
    """
    return COMM.bcast(decision)
Пример #10
0
def load_matrix_multi(filename, N=None, name=None):
    """Loads the matrix previously dumped by dump_matrix_multi. The MPI size 
    might be different. The stored files are in the format
    filename-xxxxx-of-xxxxx, which we obtain using glob.
    
    Input:
        name: if the input is a hdf5 mat file, specify the name here.
    """
    if type(filename) is str:
        # we use our default format
        files = glob.glob('%s-?????-of-?????.npy' % (filename))
        files.sort()
    else:
        files = list(filename)
    N = len(files)
    logging.debug("Loading the matrix from %d parts" % N)
    # we will load the length of the data, and then try to distribute them
    # as even as possible.
    if RANK == 0:
        # the root will first taste each file
        if files[0][-3:] == 'npy':
            sizes = np.array(
                [np.load(f, mmap_mode='r').shape[0] for f in files])
            temp = np.load(files[0], mmap_mode='r')
            shape = temp.shape[1:]
            dtype = temp.dtype
        elif files[0][-3:] == 'mat':
            sizes = []
            for f in files:
                fid = h5py.File(f, 'r')
                sizes.append(fid[name].shape[0])
                shape = fid[name].shape[1:]
                dtype = fid[name].dtype
                fid.close()
            sizes = np.array(sizes)
    else:
        sizes = None
        shape = None
        dtype = None
    barrier()
    sizes = COMM.bcast(sizes)
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    total = sizes.sum()
    segments = get_segments(total)
    # now, each node opens the file that overlaps with its data, and reads
    # the contents.
    my_start = segments[RANK]
    my_end = segments[RANK + 1]
    my_size = my_end - my_start
    mat = np.empty((my_size, ) + shape, dtype=dtype)
    mat = np.empty((my_size, ) + shape)
    f_start = 0
    f_end = 0
    for i, size in enumerate(sizes):
        f_end += size
        if f_start < my_end and f_end > my_start:
            if files[i][-3:] == 'npy':
                file_mat = np.load(files[i], mmap_mode='r')
                mat[max(f_start - my_start, 0):\
                        min(f_end - my_start, my_size)] = \
                        file_mat[max(my_start - f_start,0):\
                                min(my_end - f_start, size)]
            elif files[i][-3:] == 'mat':
                fid = h5py.File(files[i], 'r')
                mat[max(f_start - my_start, 0):\
                        min(f_end - my_start, my_size)] = \
                        fid[name][max(my_start - f_start,0):\
                                min(my_end - f_start, size)]
                fid.close()
        f_start += size
    return mat
Пример #11
0
def agree(decision):
    """agree() makes the decision consistent by propagating the decision of the
    root to everyone
    """
    return COMM.bcast(decision)
Пример #12
0
def load_matrix_multi(filename, N = None, name=None):
    """Loads the matrix previously dumped by dump_matrix_multi. The MPI size 
    might be different. The stored files are in the format
    filename-xxxxx-of-xxxxx, which we obtain using glob.
    
    Input:
        name: if the input is a hdf5 mat file, specify the name here.
    """
    if type(filename) is str:
        # we use our default format
        files = glob.glob('%s-?????-of-?????.npy' % (filename))
        files.sort()
    else:
        files = list(filename)
    N = len(files)
    logging.debug("Loading the matrix from %d parts" % N)
    # we will load the length of the data, and then try to distribute them
    # as even as possible.
    if RANK == 0:
        # the root will first taste each file
        if files[0][-3:] == 'npy':
            sizes = np.array([np.load(f, mmap_mode='r').shape[0]
                              for f in files])
            temp = np.load(files[0], mmap_mode='r')
            shape = temp.shape[1:]
            dtype = temp.dtype
        elif files[0][-3:] == 'mat':
            sizes = []
            for f in files:
                fid = h5py.File(f, 'r')
                sizes.append(fid[name].shape[0])
                shape = fid[name].shape[1:]
                dtype = fid[name].dtype
                fid.close()
            sizes = np.array(sizes)
    else:
        sizes = None
        shape = None
        dtype = None
    barrier()
    sizes = COMM.bcast(sizes)
    shape = COMM.bcast(shape)
    dtype = COMM.bcast(dtype)
    total = sizes.sum()
    segments = get_segments(total)
    # now, each node opens the file that overlaps with its data, and reads
    # the contents.
    my_start = segments[RANK]
    my_end = segments[RANK+1]
    my_size = my_end - my_start
    mat = np.empty((my_size,) + shape, dtype = dtype)
    mat = np.empty((my_size,) + shape)
    f_start = 0
    f_end = 0
    for i, size in enumerate(sizes):
        f_end += size
        if f_start < my_end and f_end > my_start:
            if files[i][-3:] == 'npy':
                file_mat = np.load(files[i], mmap_mode='r')
                mat[max(f_start - my_start, 0):\
                        min(f_end - my_start, my_size)] = \
                        file_mat[max(my_start - f_start,0):\
                                min(my_end - f_start, size)]
            elif files[i][-3:] == 'mat':
                fid = h5py.File(files[i], 'r')
                mat[max(f_start - my_start, 0):\
                        min(f_end - my_start, my_size)] = \
                        fid[name][max(my_start - f_start,0):\
                                min(my_end - f_start, size)]
                fid.close()
        f_start += size
    return mat