Exemplo n.º 1
0
    def slave(self):  #{{{
        #setup prerequests
        #messages.STATUS {{{
        self.status_reqs = array(0, dtype=int)
        self.status_prequest = comm.Recv_init([self.status_reqs, MPI.INT],
                                              source=0,
                                              tag=messages.STATUS)
        self.control_reqlist += [self.status_prequest]
        self.control_reqhandlers += [self.status]
        #}}}
        #messages.ABORT {{{
        self.abort_reqs = array(0, dtype=int)
        self.abort_prequest = comm.Recv_init([self.abort_reqs, MPI.INT],
                                             source=0,
                                             tag=messages.ABORT)
        self.control_reqlist += [self.abort_prequest]
        self.control_reqhandlers += [self.abort]
        #}}}
        #messages.STATE {{{
        self.state_req = array([0] * 300,
                               dtype='uint8')  #XXX, calculate exact state size
        self.state_prequest = comm.Recv_init([self.state_req, MPI.BYTE],
                                             source=MPI.ANY_SOURCE,
                                             tag=messages.STATE)
        self.computation_reqlist += [self.state_prequest]
        self.computation_reqhandlers += [self.state_received]
        #}}}

        self.setupGoalMessageHandler()
        self.termination_detection.slave_init()

        self.reqlist = self.control_reqlist + self.computation_reqlist
        self.reqhandlers = self.control_reqhandlers + self.computation_reqhandlers

        #print self.reqlist, self.reqhandlers

        #MPI buffer send space
        MPI.Attach_buffer(MPI.Alloc_mem(1024 * 1024))  #1Mb

        MPI.Prequest.Startall(self.reqlist)
        print "Slave", rank, "awaiting your command"

        self.running = True
        while self.running:
            #Do control comm, if any
            (i, comm_todo) = MPI.Prequest.Testany(self.control_reqlist)
            if comm_todo:
                self.control_reqhandlers[i]()
                self.control_reqlist[i].Start()
            #Do computation comm, if any
            (i, comm_todo) = MPI.Prequest.Testany(self.computation_reqlist)
            if comm_todo:
                #print "calling ", self.computation_reqhandlers[i]
                self.computation_reqhandlers[i]()
                self.computation_reqlist[i].Start()

            #Do a computation
            try:
                self.mcReachability.compute()
            except NoMoreStatesException:
                #No more work...
                #print "Rank %d has no more work, len(pwlist) = %d" % (rank, len(self.pwlist.waiting))
                self.termination_detection.noMoreWork()
                #... wait for something to arrive
                i = MPI.Prequest.Waitany(self.reqlist)
                #print "calling ", self.reqhandlers[i]
                self.reqhandlers[i]()
                self.reqlist[i].Start()
            except GoalFoundException:
                #goal state found: Profit!
                comm.Bsend([None, MPI.INT], dest=0, tag=messages.GOAL_REACHED)
Exemplo n.º 2
0
    all_compounds = []
    all_com_beta = []
    all_com_gap = []
    all_com_lumo = []
    wave_compounds = []
    com_gap = []
    com_lumo = []
    depth = []
    result = []
    """
    start distributing jobs to all ranks
    """
    #comm = MPI.COMM_WORLD
    #rank = comm.Get_rank()
    mem = np.zeros(8192)
    MPI.Attach_buffer(mem)
    num_cores = 12
    hsm = HashTable(num_cores)
    q1 = Queue()
    num_job = 3 * num_cores
    #print ("check node:",rootnode.state)
    print("root rank:", hsm.hashing(root))
    #if rank==0:
    #if rank==2:
    print("f**k the code")
    if rank == 0:
        for i in range(num_job):
            print("root rank:", hsm.hashing(root))

            comm.bsend([root, reward, child_char],
                       dest=int(hsm.hashing(root)),
Exemplo n.º 3
0
from mpi4py import MPI
try:
    from numpy import empty
except ImportError:
    from array import array

    def empty(size, dtype):
        return array(dtype, [0] * size)


# --------------------------------------------------------------------

BUFSISE = 10000 + MPI.BSEND_OVERHEAD

buff = empty(BUFSISE, dtype='b')

MPI.Attach_buffer(buff)

buff2 = MPI.Detach_buffer()

MPI.Attach_buffer(buff2)

MPI.Detach_buffer()

# --------------------------------------------------------------------

assert len(buff2) == BUFSISE

# --------------------------------------------------------------------
Exemplo n.º 4
0
 def testPersistent(self):
     size = self.COMM.Get_size()
     rank = self.COMM.Get_rank()
     dest = (rank + 1) % size
     source = (rank - 1) % size
     for array in arrayimpl.ArrayTypes:
         for typecode in arrayimpl.TypeMap:
             for s in range(size):
                 for xs in range(3):
                     #
                     sbuf = array(s, typecode, s)
                     rbuf = array(-1, typecode, s + xs)
                     sendreq = self.COMM.Send_init(sbuf.as_mpi(), dest, 0)
                     recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0)
                     sendreq.Start()
                     recvreq.Start()
                     sendreq.Wait()
                     recvreq.Wait()
                     self.assertNotEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertNotEqual(recvreq, MPI.REQUEST_NULL)
                     sendreq.Free()
                     recvreq.Free()
                     self.assertEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertEqual(recvreq, MPI.REQUEST_NULL)
                     for value in rbuf[:s]:
                         self.assertEqual(value, s)
                     for value in rbuf[s:]:
                         self.assertEqual(value, -1)
                     #
                     sbuf = array(s, typecode, s)
                     rbuf = array(-1, typecode, s + xs)
                     sendreq = self.COMM.Send_init(sbuf.as_mpi(), dest, 0)
                     recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0)
                     reqlist = [sendreq, recvreq]
                     MPI.Prequest.Startall(reqlist)
                     index1 = MPI.Prequest.Waitany(reqlist)
                     self.assertTrue(index1 in [0, 1])
                     self.assertNotEqual(reqlist[index1], MPI.REQUEST_NULL)
                     index2 = MPI.Prequest.Waitany(reqlist)
                     self.assertTrue(index2 in [0, 1])
                     self.assertNotEqual(reqlist[index2], MPI.REQUEST_NULL)
                     self.assertTrue(index1 != index2)
                     index3 = MPI.Prequest.Waitany(reqlist)
                     self.assertEqual(index3, MPI.UNDEFINED)
                     for preq in reqlist:
                         self.assertNotEqual(preq, MPI.REQUEST_NULL)
                         preq.Free()
                         self.assertEqual(preq, MPI.REQUEST_NULL)
                     for value in rbuf[:s]:
                         self.assertEqual(value, s)
                     for value in rbuf[s:]:
                         self.assertEqual(value, -1)
                     #
                     sbuf = array(s, typecode, s)
                     rbuf = array(-1, typecode, s + xs)
                     sendreq = self.COMM.Ssend_init(sbuf.as_mpi(), dest, 0)
                     recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0)
                     sendreq.Start()
                     recvreq.Start()
                     sendreq.Wait()
                     recvreq.Wait()
                     self.assertNotEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertNotEqual(recvreq, MPI.REQUEST_NULL)
                     sendreq.Free()
                     recvreq.Free()
                     self.assertEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertEqual(recvreq, MPI.REQUEST_NULL)
                     for value in rbuf[:s]:
                         self.assertEqual(value, s)
                     for value in rbuf[s:]:
                         self.assertEqual(value, -1)
                     #
                     mem = array(0, typecode,
                                 s + MPI.BSEND_OVERHEAD).as_raw()
                     sbuf = array(s, typecode, s)
                     rbuf = array(-1, typecode, s + xs)
                     MPI.Attach_buffer(mem)
                     sendreq = self.COMM.Bsend_init(sbuf.as_mpi(), dest, 0)
                     recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0)
                     sendreq.Start()
                     recvreq.Start()
                     sendreq.Wait()
                     recvreq.Wait()
                     MPI.Detach_buffer()
                     self.assertNotEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertNotEqual(recvreq, MPI.REQUEST_NULL)
                     sendreq.Free()
                     recvreq.Free()
                     self.assertEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertEqual(recvreq, MPI.REQUEST_NULL)
                     for value in rbuf[:s]:
                         self.assertEqual(value, s)
                     for value in rbuf[s:]:
                         self.assertEqual(value, -1)
                     #
                     rank = self.COMM.Get_rank()
                     sbuf = array(s, typecode, s)
                     rbuf = array(-1, typecode, s + xs)
                     recvreq = self.COMM.Recv_init(rbuf.as_mpi(), rank, 0)
                     sendreq = self.COMM.Rsend_init(sbuf.as_mpi(), rank, 0)
                     recvreq.Start()
                     sendreq.Start()
                     recvreq.Wait()
                     sendreq.Wait()
                     self.assertNotEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertNotEqual(recvreq, MPI.REQUEST_NULL)
                     sendreq.Free()
                     recvreq.Free()
                     self.assertEqual(sendreq, MPI.REQUEST_NULL)
                     self.assertEqual(recvreq, MPI.REQUEST_NULL)
                     for value in rbuf[:s]:
                         self.assertEqual(value, s)
                     for value in rbuf[s:]:
                         self.assertEqual(value, -1)
Exemplo n.º 5
0
        mpi_thread_safe_info_msg = "Provided MPI implementation (%s) is not thread safe configured, " % (
            mpi_vendor_str)
        mpi_thread_safe_info_msg = mpi_thread_safe_info_msg + "maximum thread safe level supported is: %s" % (
            mpi_thread_safe_level_str)
        mpi_thread_safe_info_msg = mpi_thread_safe_info_msg + "\nNOTE: In most MPI implementations thread-safety "
        mpi_thread_safe_info_msg = mpi_thread_safe_info_msg + "can be enabled at pre-compile, "
        mpi_thread_safe_info_msg = mpi_thread_safe_info_msg + "by setting explicit thread-safe configuration options, "
        mpi_thread_safe_info_msg = mpi_thread_safe_info_msg + "\n      e.g. (MPI 1.6.5) --enable-mpi-thread-multiple"

    # Allocate memory for buffered sends
    if mpi_initialized and mpi_world_size > 1 and is_mpi_thread_safe:
        try:
            mpi_buffer_size_in_mb = 100
            __mpi_buffer = __mpi_factory.Alloc_mem(mpi_buffer_size_in_mb *
                                                   1024 * 1024)
            __mpi_factory.Attach_buffer(__mpi_buffer)
            mpi_buffer_allocated = True
        except Exception, instance:
            mpi_buffer_allocated = False
            mpi_buffer_allocation_error_msg = traceback.format_exc()

    # Check if MPI is effectively enabled
    if mpi_initialized and mpi_world_size > 1 and is_mpi_thread_safe and mpi_buffer_allocated:
        is_mpi_enabled = True
    else:
        is_mpi_enabled = False
        if not mpi_initialized:
            mpi_error_msg = mpi_initialization_error_msg
        elif mpi_world_size < 2:
            mpi_error_msg = "Only 1 MPI process found"
        elif not is_mpi_thread_safe:
Exemplo n.º 6
0
comm.Barrier()
comm.Reduce(opt_local_obj, opt_global_obj, op=MPI.SUM, root=0)
opt_global_obj /= size


#initial state: x0, y0
local_x = np.random.normal(0, 4, (fea_size, ))
grad = lr_model.compute_grad(local_x)
last_grad = np.zeros((fea_size, ))
local_y = grad

iter_num = 300
lr = 8e-3

buf = np.empty(((fea_size) * 2 + 3) * out_deg, dtype=np.float)
MPI.Attach_buffer(buf)

sync = 1
for iter in range(iter_num):
    #send local x and y to out neighbors
    for i, outp in enumerate(out_peers):
        send_buff = np.stack((local_x - lr * local_y, local_y / (out_deg + 1)), axis=1)
        comm.Bsend([send_buff, MPI.F_FLOAT], dest=outp)

    #receive local x and y from in neighbors
    #clear receive buffer and flag
    x = np.zeros((fea_size, size))
    y = np.zeros((fea_size, size))
    recv_flag = np.zeros((size, ), dtype=int)

    if sync == 1:
# attach_detach_buf.py

import numpy as np
from mpi4py import MPI


comm = MPI.COMM_WORLD
rank = comm.Get_rank()

max_msg_size = 2**10
BUFSISE = 32 * max_msg_size
mpi_buf = bytearray(BUFSISE)

# Attach a big user-provided buffer for sending in buffered mode
MPI.Attach_buffer(mpi_buf)

recv_buf = np.empty((max_msg_size,), np.float64)

if rank == 0:
    print('-' * 80)
    print('With an attached big buffer:')
    print()

msg_size = 1
tag = 0
while msg_size <= max_msg_size:
    msg = np.random.random((msg_size,))
    if rank == 0:
        print('Trying with size: ', msg_size)

    comm.Bsend(msg, (rank+1)%2, tag)
Exemplo n.º 8
0
def worker(comm, whole_comm, args):
    #distributed setting
    rank = comm.Get_rank()
    size = comm.Get_size()

    #assign out/in neighbors
    neighbors = 1
    out_peers = [(rank + 1 + i) % size for i in range(3)]
    #[(rank + 1) % size, (rank + size - 1) % size]
    in_peers = [(rank - 1 - i + size) % size for i in range(3)]
    print(rank, out_peers, in_peers)
    in_deg = len(in_peers)
    out_deg = len(out_peers)

    #load data and model
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    gpu_ind = rank % torch.cuda.device_count()
    print(rank, "gpu", gpu_ind)
    device = torch.device("cuda:" + str(gpu_ind) if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    #prepare training dataset
    global_dataset = datasets.MNIST('./data',
                                    train=True,
                                    download=True,
                                    transform=transforms.Compose([
                                        transforms.ToTensor(),
                                        transforms.Normalize((0.1307, ),
                                                             (0.3081, ))
                                    ]))
    data_size = len(global_dataset)
    seg = np.floor(data_size / size)
    rmd = data_size % size
    if rank + 1 <= rmd:
        seg += 1
        local_a = rank * seg
    else:
        local_a = rank * seg + rmd
    ind = np.arange(local_a, local_a + seg, dtype=np.int)
    local_dataset = torch.utils.data.Subset(global_dataset, ind)

    #move training data to gpu
    data_tensor = []
    target_tensor = []
    for i in range(local_dataset.__len__()):
        data, target = local_dataset.__getitem__(i)
        data_tensor.append(data)
        target_tensor.append(torch.tensor(target))
    data_tensor = torch.stack(data_tensor).to(device)
    target_tensor = torch.stack(target_tensor).to(device)
    tensor_local_dataset = torch.utils.data.TensorDataset(
        data_tensor, target_tensor)

    train_loader = torch.utils.data.DataLoader(tensor_local_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True)  #, **kwargs)

    #sample a batch
    trainloader_iter = iter(train_loader)
    try:
        batch, target = trainloader_iter.next()
    except StopIteration:
        trainloader_iter = iter(train_loader)
        batch, target = trainloader_iter.next()

    #start time
    start_t = time.time()

    #initial state: x0, y0
    Net = Model().to(device)
    Net.train()
    local_x = []
    for param in Net.parameters():
        local_x.append(param.data.view(-1).clone())
    local_x = torch.cat(local_x).detach()
    net_size = local_x.size()[0]

    optimizer = optim.SGD(Net.parameters(), lr=args.lr)
    obj = worker_grad(Net, device, batch, target, optimizer)
    grad = []
    for param in Net.parameters():
        grad.append(param.grad.view(-1).clone())
    grad = torch.cat(grad).detach()
    local_y = grad.clone().detach()
    last_grad = torch.zeros_like(local_y, device=device, requires_grad=False)

    iter_num = args.iter_num
    lr = args.lr
    decay = 1

    #time series
    t_seq = np.zeros([
        iter_num,
    ], dtype=np.float32)
    loss_seq = np.zeros([
        iter_num,
    ], dtype=np.float32)

    #the number of accumulated received iterates
    acc_recv = 0

    buf = np.empty(10 * out_deg * (net_size * 2 + 10), dtype=np.float)
    MPI.Attach_buffer(buf)

    #record if an neighbor has exited
    stop_flags = np.zeros([
        size,
    ], dtype=int)

    echo_interval = 100
    asyn = args.asyn
    send_req = []
    recv_deg = out_deg
    for i in range(iter_num):
        if not asyn:
            comm.Barrier()

        #send local x and y to out neighbors
        send_complete = MPI.Request.Testall(send_req)
        if recv_deg > 0 and send_complete == True:
            send_x = local_x - lr * local_y
            send_y = local_y / (out_deg + 1)
            send_num = np.minimum(recv_deg, out_deg)
            for outp in out_peers:  #np.random.choice(out_peers, size=send_num, replace=False):
                send_buf = torch.stack((send_x, send_y), dim=0).cpu().numpy()
                tag = 0
                send_req.append(
                    comm.Isend([send_buf, MPI.FLOAT], dest=outp, tag=tag))

        #receive local x and y from in neighbors
        #clear receive buffer and flag
        recv_flag = np.zeros((size, ), dtype=int)

        buf_x = torch.zeros(local_x.size(), requires_grad=False)
        buf_y = torch.zeros(local_y.size(), requires_grad=False)

        recv_deg = 0
        buf_size = [2, net_size]
        if asyn:
            waiting_time = 5
            s_time = time.time()
            while in_deg > 0 and recv_deg == 0:
                info = MPI.Status()
                while comm.Iprobe(source=MPI.ANY_SOURCE, status=info):
                    recv_rank = info.source
                    recv_tag = info.tag
                    #print(i, rank, "receiving", recv_rank)
                    recvbuf = np.zeros(buf_size, dtype=np.float32)
                    comm.Recv([recvbuf, MPI.FLOAT], source=recv_rank)
                    if recv_tag == 1:
                        stop_flags[recv_rank] = 1
                        if recv_rank in out_peers:
                            out_peers.remove(recv_rank)
                            out_deg -= 1
                        if recv_rank in in_peers:
                            in_peers.remove(recv_rank)
                            in_deg -= 1
                    else:
                        buf_x += torch.from_numpy(recvbuf[0, :])
                        buf_y += torch.from_numpy(recvbuf[1, :])
                        recv_flag[recv_rank] += 1
                        recv_deg += 1
                        acc_recv += 1
                    info = MPI.Status()
                    if recv_deg > in_deg:
                        break
                if time.time() - s_time > wait_time:
                    break
            wait_times -= 1
        else:
            for j, inp in enumerate(in_peers):
                recvbuf = np.zeros(buf_size, dtype=send_buf.dtype)
                comm.Recv([recvbuf, MPI.FLOAT], source=inp)
                buf_x += torch.from_numpy(recvbuf[0, :])
                buf_y += torch.from_numpy(recvbuf[1, :])
                recv_deg += 1
                recv_flag[recv_rank] += 1
        buf_x = buf_x.to(device)
        buf_y = buf_y.to(device)

        if recv_deg > 0:
            # local update
            local_x -= lr * local_y
            local_y /= (out_deg + 1)

            #average consensus and update local x
            buf_x += local_x
            local_x = buf_x / (recv_deg + 1)
            buf_y += local_y

            #update net parameters
            assign_array_to_net(local_x, Net)

            #compute gradient and update localy
            last_grad.copy_(grad)
            try:
                batch, target = next(trainloader_iter)
            except StopIteration:
                trainloader_iter = iter(train_loader)
                batch, target = next(trainloader_iter)
            obj = worker_grad(Net, device, batch, target, optimizer)
            grad = []
            for param in Net.parameters():
                grad.append(param.grad.view(-1).clone())
            grad = torch.cat(grad).detach()
            local_y = buf_y + grad - last_grad
            lr *= decay

        t_seq[i] = time.time() - start_t
        loss_seq[i] = obj
        if i % echo_interval == 0:
            print('rank', rank, 'iter: ', i, 'time', t_seq[i], 'local obj',
                  obj)

    send_list = list(range(size))
    send_list.remove(rank)
    for node in send_list:
        sendbuf = np.zeros([
            1,
        ], )
        comm.Send(sendbuf, dest=node, tag=1)

    #receive the left iterates from neighbors
    '''
    info = MPI.Status()
    buffer = np.empty(buf_size, dtype=np.float32)
    while acc_recv < iter_num * in_deg:
        while comm.Iprobe(source=MPI.ANY_SOURCE, status=info):
            recv_rank = info.source
            comm.Recv([buffer, MPI.FLOAT], source=recv_rank)
            acc_recv += 1
            info = MPI.Status()
    '''
    whole_comm.send(np.stack((t_seq, loss_seq), axis=1).tolist(),
                    dest=whole_comm.Get_size() - 1)
    whole_comm.Send([local_x.cpu().numpy(), MPI.FLOAT],
                    dest=whole_comm.Get_size() - 1)