Exemplo n.º 1
0
    def execTask(self, task):
        """Wrapper function calling mapping/reducing/finalizing phase tasks,
        dispatch tasks to workers until all finished and collect feedback. 
        Faulty workers are removed from active duty work list.
        """
        atimer = Timer(task)
        print( 'Entering {0:s} phase...'.format(task) )

        taskDict = { 'Map':(self.mapIn, MAP_START, MAP_FINISH), \
                'Init':(self.mapIn, INIT_START, MAP_FINISH), \
                'Reduce':(self.reduceIn, REDUCE_START, REDUCE_FINISH) }

        # line up jobs and workers into priority queues
        jobs = taskDict[task][0][:]
        heapq.heapify(jobs); running = {}
        heapq.heapify(self.workers)

        while (jobs or running) and self.nActive > 0:
            # dispatch all jobs to all free workers
            while jobs and self.workers[0].isFree():
                job = heapq.heappop(jobs)
                worker = heapq.heappop(self.workers)
                world.send(job, dest=worker.id, tag=taskDict[task][1])
                worker.setBusy(); heapq.heappush(self.workers, worker)
                running[job] = (time(), worker)
                if self.config['verbosity'] >= 6:
                    print('Dispatching file '+os.path.basename(job)+' to worker '+str(worker.id))
                # if no more free workers, break
                if not self.workers[0].isFree(): break

            # wait for finishing workers as well as do cleaning
            self.wait(running, taskDict[task][2])
            self.clean(running, jobs)

        print( '{0:s} phase completed'.format(task) )
Exemplo n.º 2
0
    def map(self, tag):
        """
        Execute supplied mapfn on each key-value pair read from file
        assigned by the master node
        """
        atimer = Timer('Worker_Map')

        # load key-value pairs from filename
        filename = world.recv(source=0, tag=tag)
        data = self.read(filename, tag)

        buffer = [ [] for ii in range(self.config['nReduce']) ]
        for key, val in data.items():
            for newKey, newVal in self.config['mapfn'](key, val):
                idx = self.config['hashfn'](newKey) % self.config['nReduce']
                buffer[idx].append( (newKey, newVal) )

        # write out new key-value pairs in scattered files
        for ii in range(self.config['nReduce']):
            tmpfile = self.reduceIn[ii]+'-tmp'+str(world.rank)
            # dump in append mode
            with open(tmpfile, 'a+b') as fout:
                pickle.dump(buffer[ii], fout, pickle.HIGHEST_PROTOCOL)

        # report back as successful completion of task
        world.send(filename, dest=0, tag=MAP_FINISH)
Exemplo n.º 3
0
    def map(self, f, tasks):
        N = len(tasks)
        P = self.P
        Pless1 = P - 1
        if self.rank != 0:
            self.wait()
            return

        if f is not self.f:
            self.f = f
            requests = []
            for p in range(1, self.P):
                r = COMM_WORLD.isend(f, dest=p)
                requests.append(r)
            MPI.Request.waitall(requests)

        requests = []
        for i, task in enumerate(tasks):
            r = COMM_WORLD.isend(task, dest=(i%Pless1)+1, tag=i)
            requests.append(r)
        MPI.Request.waitall(requests)

        results = []
        for i in range(N):
            result = COMM_WORLD.recv(source=(i%Pless1)+1, tag=i)
            results.append(result)
        return results
Exemplo n.º 4
0
    def test_padded_kernel(self):
        """ Implement a simple padded kernel. """
        for case in self.cases:
            # Form data to work on.
            space.initialize_space(case['shape'])
            x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype']))
            x = Grid(x_np, x_overlap=1)
            s_np = comm.allreduce(np.random.randn(1).astype(case['dtype']))
            s = Const(s_np)
            z = Out(case['dtype'])

            # Make a kernel.
            code = Template("""
                            if (_in_local && _in_global) {
                                x(0,0,0) = s(0) * x(0,0,0);
                                z += a * x(0,0,0);
                            }
                            """).render()
            fun = Kernel(code, \
                        ('a', 'number', case['dtype']), \
                        ('x', 'grid', x.dtype), \
                        ('s', 'const', s.dtype, s.data.size), \
                        ('z', 'out', z.dtype), \
                        padding=(1,1,1,1))

            # Execute and check the result.
            fun(case['dtype'](2), x, s, z)
            gpu_sum = z.get()
            cpu_sum = np.sum(2.0 * s_np * x_np)
            err = abs(gpu_sum - cpu_sum) / abs(cpu_sum)
            # print case, err
            if case['dtype'] in (np.float32, np.complex64):
                self.assertTrue(err < 1e-2, (case, err))
            else:
                self.assertTrue(err < 1e-6, (case, err))
Exemplo n.º 5
0
    def sync(self, key):
        """Synchronize dataType with all worker nodes.
        """
        tagDict = { 'Map': (UPDATE_MAP, self.mapIn),\
                'Reduce': (UPDATE_REDUCE, self.reduceIn),\
                'Config': (UPDATE_CONFIG, self.config) }

        for ii in range(1, len(self.workers)+1):
            world.send(tagDict[key][1], dest=ii, tag=tagDict[key][0])
Exemplo n.º 6
0
 def wait(self):
     if self.rank == 0:
         raise RuntimeError("Proc 0 cannot wait!")
     status = MPI.Status()
     while True:
         task = COMM_WORLD.recv(source=0, tag=MPI.ANY_TAG, status=status)
         if not task:
             break
         if isinstance(task, FunctionType):
             self.f = task
             continue
         result = self.f(task)
         COMM_WORLD.isend(result, dest=0, tag=status.tag)
Exemplo n.º 7
0
    def update(self, tag):
        """Update file list and global configurations
        """
        atimer = Timer('Worker_Update')

        if tag == UPDATE_MAP:
            self.mapIn = world.recv(source=0, tag=tag)
        elif tag == UPDATE_REDUCE:
            self.reduceIn = world.recv(source=0, tag=tag)
            self.reduceOut = [ os.path.splitext(file)[0]+'.red' for file in self.reduceIn ]
        elif tag == UPDATE_CONFIG:
            self.config = world.recv(source=0, tag=tag)
        else:
            raise ValueError('Wrong tag specified.')
Exemplo n.º 8
0
def get_cpu_raw(cpu_data, k):
    # Make sure overlapped data is accurate as well.
    xr = space.get_space_info()['x_range']
    if comm.Get_rank() == 0:
        pad_back = cpu_data[-k:, :, :]
    else:
        pad_back = cpu_data[xr[0] - k:xr[0], :, :]

    if comm.Get_rank() == comm.Get_size() - 1:
        pad_front = cpu_data[:k, :, :]
    else:
        pad_front = cpu_data[xr[1]:xr[1] + k, :, :]

    return np.concatenate((pad_back, cpu_data[xr[0]:xr[1],:,:], \
                                pad_front), axis=0)
Exemplo n.º 9
0
    def wait(self, running, tag):
        """Test if any worker has finished its job.
        If so, decrease its key and make it available
        """
        atimer = Timer('Wait')

        inittime = time()
        status = MPI.Status()
        while time() - inittime < self.config['jobwait']:
            if world.Iprobe(source=MPI.ANY_SOURCE,tag=tag,status=status):
                jobf = world.recv(source=status.source, tag=tag)
                idx = 0
                for ii, worker in enumerate(self.workers):
                    if worker.id == status.source: idx = ii; break
                if self.config['verbosity'] >= 8:
                    print('Freeing worker '+str(self.workers[idx].id))
                worker = self.workers[idx]

                # faulty worker's job has already been cleaned
                if not worker.isFaulty():
                    del running[jobf]
                else:
                    self.nActive += 1
                worker.setFree()
                heapq._siftup(self.workers, idx)
Exemplo n.º 10
0
def _init_gpu(comm):
    """ Chooses a gpu and creates a context on it. """
    # Find out how many GPUs are available to us on this node.
    driver.init()
    num_gpus = driver.Device.count()

    # Figure out the names of the other hosts.
    rank = comm.Get_rank()  # Find out which process I am.
    name = MPI.Get_processor_name()  # The name of my node.
    hosts = comm.allgather(name)  # Get the names of all the other hosts

    # Find out which GPU to take (by precedence).
    gpu_id = hosts[0:rank].count(name)
    if gpu_id >= num_gpus:
        raise TypeError("No GPU available.")

    # Create a context on the appropriate device.
    for k in range(num_gpus):
        try:
            device = driver.Device((gpu_id + k) % num_gpus)
            context = device.make_context()
        except:
            continue
        else:
            #             print "On %s: process %d taking gpu %d of %d.\n" % \
            #                 (name, rank, gpu_id+k, num_gpus)
            break

    return device, context  # Return device and context.
Exemplo n.º 11
0
    def test_batch_sum(self):
        """ Make sure batch summing works. """
        num_outs = 3
        for case in self.cases:
            space.initialize_space(case['shape'])
            x = [Out(case['dtype'], op='sum') for k in range(num_outs)]
            x_cpu_data = [np.random.randn(*case['shape'][1:])\
                            .astype(case['dtype']) for k in range(num_outs)]
                    
            if case['dtype'] in (np.complex64, np.complex128):
                for k in range(num_outs):
                    x_cpu_data[k] = (1 + 1j) * x_cpu_data[k]

            res_gold = []
            for k in range(num_outs):
                x[k].data.set(x_cpu_data[k])
                res_gold.append(comm.allreduce(np.sum(x_cpu_data[k].flatten())))

            batch_reduce(*x)
            res_gpu = [x_indiv.get() for x_indiv in x]

            for k in range(num_outs):
                err = abs(res_gold[k] - res_gpu[k]) / abs(res_gold[k])

                if case['dtype'] in (np.float32, np.complex64):
                    self.assertTrue(err < 1e-3)
                else:
                    self.assertTrue(err < 1e-10)
Exemplo n.º 12
0
    def update(self, tag):
        """Update file list and global configurations
        """
        atimer = Timer('Worker_Update')

        if tag == UPDATE_MAP:
            self.mapIn = world.recv(source=0, tag=tag)
        elif tag == UPDATE_REDUCE:
            self.reduceIn = world.recv(source=0, tag=tag)
            self.reduceOut = [
                os.path.splitext(file)[0] + '.red' for file in self.reduceIn
            ]
        elif tag == UPDATE_CONFIG:
            self.config = world.recv(source=0, tag=tag)
        else:
            raise ValueError('Wrong tag specified.')
Exemplo n.º 13
0
    def get(self):
        """ Redefined so that we don't get overlap data. """
        # Get our section of the grid (excluding overlap).
        if self._xlap is 0:
            data = self.data.get()
        else:
            data = self.data.get()[self._xlap:-self._xlap,:,:]
        
#         return np.concatenate(comm.allgather(data), axis=0) # Super-simple.

        result = comm.gather(data) # Gather all peices to root.
        if comm.Get_rank() == 0:
            # Root node glues everything together.
            return np.concatenate(result, axis=0) 
        else: 
            return None
Exemplo n.º 14
0
def debug(*s):
    import sys
    from mpi4py.MPI import COMM_WORLD
    print('[rank:{}]'.format(COMM_WORLD.Get_rank()),
          *s,
          file=sys.stderr,
          flush=True)
Exemplo n.º 15
0
    def run(self):
        """Receiving job instructions from the master node until
        TERMINATE signal received. Allowed tasks are defined in taskDict
        """
        atimer = Timer('Worker')

        # tasks define signal-behavior in the run function
        taskDict = { MAP_START: self.map, REDUCE_START: self.reduce,\
                INIT_START: self.map,\
                UPDATE_MAP: self.update, UPDATE_REDUCE: self.update,\
                UPDATE_CONFIG: self.update }

        status = MPI.Status()
        while True:
            # ping input
            if not world.Iprobe(source=0, tag=MPI.ANY_TAG, status=status):
                sleep(self.config['delay']);

            # entire calculation finished
            elif status.tag == TERMINATE:
                term = world.recv(source=0, tag=TERMINATE); break

            # check allowed tasks
            elif status.tag in taskDict:
                taskDict[status.tag](status.tag);

            # no instruction found, looping
            else:
                sleep(self.config['delay'])
Exemplo n.º 16
0
def main():
    args = parse_args()
    assert args.pretrained_model_path is None or args.pretrained_model_path.endswith(
        ".ckpt")
    os.makedirs(args.save_dir, exist_ok=True)
    save_args(args, args.save_dir)
    set_seed(args.seed + COMM_WORLD.Get_rank() * 100)
    nprocs = COMM_WORLD.Get_size()

    # Initialize model and agent policy
    aurora = Aurora(args.seed + COMM_WORLD.Get_rank() * 100, args.save_dir,
                    int(7200 / nprocs), args.pretrained_model_path,
                    tensorboard_log=args.tensorboard_log)
    # training_traces, validation_traces,
    training_traces = []
    val_traces = []
    if args.train_trace_file:
        with open(args.train_trace_file, 'r') as f:
            for line in f:
                line = line.strip()
                training_traces.append(Trace.load_from_file(line))

    if args.val_trace_file:
        with open(args.val_trace_file, 'r') as f:
            for line in f:
                line = line.strip()
                if args.dataset == 'pantheon':
                    queue = 100  # dummy value
                    # if "ethernet" in line:
                    #     queue = 500
                    # elif "cellular" in line:
                    #     queue = 50
                    # else:
                    #     queue = 100
                    val_traces.append(Trace.load_from_pantheon_file(
                        line, queue=queue, loss=0))
                elif args.dataset == 'synthetic':
                    val_traces.append(Trace.load_from_file(line))
                else:
                    raise ValueError

    aurora.train(args.randomization_range_file,
                 args.total_timesteps, tot_trace_cnt=args.total_trace_count,
                 tb_log_name=args.exp_name, validation_flag=args.validation,
                 training_traces=training_traces,
                 validation_traces=val_traces,
                 real_trace_prob=args.real_trace_prob)
Exemplo n.º 17
0
def main(argv=None):

    args = process_command_line(argv)

    # note that in MPI mode, lengths will be global, whereas data will
    # be local (i.e. only this node's data).
    lengths, data = load_trjs_or_features(args)

    kwargs = {}
    if args.cluster_iterations is not None:
        kwargs['kmedoids_updates'] = int(args.cluster_iterations)

    clustering = args.Clusterer(metric=args.cluster_distance,
                                n_clusters=args.cluster_number,
                                cluster_radius=args.cluster_radius,
                                mpi_mode=mpi_mode,
                                **kwargs)

    clustering.fit(data)
    # release the RAM held by the trajectories (we don't need it anymore)
    del data

    logger.info("Clustered %s frames into %s clusters in %s seconds.",
                sum(lengths), len(clustering.centers_), clustering.runtime_)

    result = clustering.result_
    if mpi_mode:
        local_ctr_inds, local_dists, local_assigs = \
            result.center_indices, result.distances, result.assignments

        with timed("Reassembled dist and assign arrays in %.2f sec",
                   logging.info):
            all_dists = mpi.ops.assemble_striped_ragged_array(
                local_dists, lengths)
            all_assigs = mpi.ops.assemble_striped_ragged_array(
                local_assigs, lengths)
            ctr_inds = mpi.ops.convert_local_indices(local_ctr_inds, lengths)

        result = ClusterResult(center_indices=ctr_inds,
                               distances=all_dists,
                               assignments=all_assigs,
                               centers=result.centers)
    result = result.partition(lengths)

    if mpi.rank() == 0:
        with timed("Wrote center indices in %.2f sec.", logger.info):
            write_centers_indices(args.center_indices,
                                  [(t, f * args.subsample)
                                   for t, f in result.center_indices])
        with timed("Wrote center structures in %.2f sec.", logger.info):
            write_centers(result, args)
        write_assignments_and_distances_with_reassign(result, args)

    mpi.comm.barrier()

    logger.info("Success! Data can be found in %s.",
                os.path.dirname(args.distances))

    return 0
Exemplo n.º 18
0
def RandomSize(N_lower, N_upper):
    if COMM_WORLD.rank == 0:
        size = randint(N_lower, N_upper)
    else:
        size = None
    size = COMM_WORLD.bcast(size, root=0)
    assert size is not None
    return size
Exemplo n.º 19
0
def RandomNumber():
    if COMM_WORLD.rank == 0:
        number = _rand(1)[0]
    else:
        number = None
    number = COMM_WORLD.bcast(number, root=0)
    assert number is not None
    return number
Exemplo n.º 20
0
def RandomTuple(Q):
    if COMM_WORLD.rank == 0:
        tuple_ = tuple(float(v) for v in _rand(Q))
    else:
        tuple_ = None
    tuple_ = COMM_WORLD.bcast(tuple_, root=0)
    assert tuple_ is not None
    return tuple_
Exemplo n.º 21
0
    def __init__(self):
        """Obtain configurations and all filelists from the master node
        """
        assert world.rank >= 1

        # synchronize configuration file and file lists with the master node
        self.config = {}; self.mapIn = []; self.reduceIn = []; self.reduceOut = []
        self.config = world.bcast(self.config, root=0)
Exemplo n.º 22
0
def simulate(name, check_success_only=False):
    """ Read simulation from input file, simulate, and write out results. """

    # Reset the environment variables pointing to the temporary directory.
    tempfile.tempdir = '/tmp'

    # Create the reporter function.
    write_status = lambda msg: open(name + '.status', 'a').write(msg)
    if comm.Get_rank() == 0:
        # write_status('EXEC initializing\n')
        def rep(err):
            write_status('%e\n' % err)
    else:  # No reporting needed for non-root nodes.

        def rep(err):
            pass

    # Get input parameters.
    params = get_parameters(name)

    # Define operations needed for the lumped bicg operation.
    b, x, ops, post_cond = maxwell_ops_lumped.ops(params)

    # Solve!
    start_time = time.time()
    rep.stime = start_time
    x, err, success = bicg.solve_symm_lumped(b, x=x, \
                                            max_iters=params['max_iters'], \
                                            reporter=rep, \
                                            err_thresh=params['err_thresh'], \
                                            **ops)

    if check_success_only:  # Don't write output, just see if we got a success.
        return success

    # Gather results onto root's host memory.
    result = {  'E': [E.get() for E in x], \
                'err': err, \
                'success': success}

    # Write results to output file.
    if comm.Get_rank() == 0:
        result['E'] = post_cond(result['E'])  # Apply postconditioner.
        write_results(name, result)

    return success
Exemplo n.º 23
0
 def array(self, N=0, filename=None, component=None, root=0):
     """Dump data to numpy format on root processor."""
     assert(N == 0 or N == 1)
     is_root = comm.Get_rank() == root
     size = self.get_total_number_probes() if is_root else len(self)
     comp = self.value_size() if component is None else 1        
     z  = zeros((size, comp))
     
     # Retrieve all values
     if len(self) > 0: 
         for k in range(comp):
             if is_root:
                 ids = self.get_probe_ids()
                 z[ids, k] = self.get_probes_component_and_snapshot(k, N)
             else:
                 z[:, k] = self.get_probes_component_and_snapshot(k, N)
                  
     # Collect on root
     recvfrom = comm.gather(len(self), root=root)
     if is_root:
         for j, k in enumerate(recvfrom):                
             if comm.Get_rank() != j:
                 ids = comm.recv(source=j, tag=101)
                 z0 = comm.recv(source=j, tag=102)
                 z[ids, :] = z0[:, :]
     else:
         ids = self.get_probe_ids()
         comm.send(ids, dest=root, tag=101)
         comm.send(z, dest=root, tag=102)
         
     if is_root:
         if filename:
             z.dump(filename+"_statistics.probes")
         return squeeze(z)
Exemplo n.º 24
0
def simulate(N, D, S, G, dt):
    x0, v0, m = initial_cond(N, D)
    pool = Pool()
    if COMM_WORLD.Get_rank() == 0:
        for s in range(S):
            x1, v1 = timestep(x0, v0, G, m, dt, pool)
            x0, v0 = x1, v1
    else:
        pool.wait()
Exemplo n.º 25
0
    def array(self, N=None, filename=None, component=None, root=0):
        """Dump data to numpy format on root processor for all or one snapshot."""
        is_root = comm.Get_rank() == root
        size = self.get_total_number_probes() if is_root else len(self)
        comp = self.value_size() if component is None else 1
        if not N is None:
            z = zeros((size, comp))
        else:
            z = zeros((size, comp, self.number_of_evaluations()))

        # Get all values
        if len(self) > 0:
            if not N is None:
                for k in range(comp):
                    if is_root:
                        ids = self.get_probe_ids()
                        z[ids,
                          k] = self.get_probes_component_and_snapshot(k, N)
                    else:
                        z[:, k] = self.get_probes_component_and_snapshot(k, N)
            else:
                for i, (index, probe) in enumerate(self):
                    j = index if is_root else i
                    if not N is None:
                        z[j, :] = probe.get_probe_at_snapshot(N)
                    else:
                        for k in range(self.value_size()):
                            z[j, k, :] = probe.get_probe_sub(k)

        # Collect values on root
        recvfrom = comm.gather(len(self), root=root)
        if is_root:
            for j, k in enumerate(recvfrom):
                if comm.Get_rank() != j:
                    ids = comm.recv(source=j, tag=101)
                    z0 = comm.recv(source=j, tag=102)
                    z[ids, :] = z0[:, :]
        else:
            ids = self.get_probe_ids()
            comm.send(ids, dest=root, tag=101)
            comm.send(z, dest=root, tag=102)

        if is_root:
            if filename:
                if not N is None:
                    save(filename + "_snapshot_" + str(N), z)
                else:
                    save(filename + "_all", z)
            return squeeze(z)
Exemplo n.º 26
0
def exchange_guard_cells( physical_F_left, physical_F_right ):
    # MPI exchanges of guard cells
    # Send physical cell to left proc
    req1 = mpi_comm.isend( physical_F_left,
                dest=(mpi_comm.rank-1)%mpi_comm.size )
    # Send physical cell to right proc
    req2 = mpi_comm.isend( physical_F_right,
                dest=(mpi_comm.rank+1)%mpi_comm.size )
    # Receive value from right proc
    req3 = mpi_comm.irecv( source=(mpi_comm.rank+1)%mpi_comm.size )
    # Receive value from left proc
    req4 = mpi_comm.irecv( source=(mpi_comm.rank-1)%mpi_comm.size )
    # Wait for the processors to finish sending/receiving
    req1.wait()
    req2.wait()
    F_from_right = req3.wait()
    F_from_left = req4.wait()

    return F_from_left, F_from_right
Exemplo n.º 27
0
def all_reduce(tensor: torch.Tensor,
               op=ReduceOp.SUM,
               comm: MPI.COMM_WORLD = None) -> torch.Tensor:
    param_numpy = tensor.numpy()
    param_output = np.empty(param_numpy.shape, dtype=param_numpy.dtype)
    if comm is None:
        comm = _get_comm()
    comm.Allreduce(param_numpy, param_output, op=op.value)
    tensor = torch.from_numpy(param_output)
    return tensor
Exemplo n.º 28
0
def _build_cat_distributed(comm, name, path):
    # Control flow explanation:
    # * `build_err` starts out as `None`
    # * Rank 1 to N wait for a broadcast from rank 0 to receive the new value
    #   for `build_err`
    # * Rank 0 splits off from the others and executes the build.
    #   * If it builds correctly it finishes the collective `build_err`
    #     broadcast with the initial value `None`: all nodes continue.
    #   * If it errors, it finishes the collective broadcast with the caught err
    #
    # All MPI ranks either continue or raise the same err. (prevents stalling)
    build_err = None
    if not comm.Get_rank():
        try:
            _build_cat_local(name, path)
        except Exception as e:
            build_err = e
    build_err = comm.bcast(build_err, root=0)
    if build_err:
        raise build_err
Exemplo n.º 29
0
 def test_partition(self):
     """ Make sure the x_ranges span the entire space without any gaps. """
     shapes = ((200,30,10), (33,10,10), (130,5,5), (111,2,2))
     for shape in shapes:
         space.initialize_space(shape)
         x = comm.gather(space.get_space_info()['x_range'])
         if comm.Get_rank() == 0:
             self.assertEqual(x[0][0], 0)
             self.assertEqual(x[-1][-1], space.get_space_info()['shape'][0])
             for k in range(len(x)-1):
                 self.assertEqual(x[k][1], x[k+1][0])
Exemplo n.º 30
0
    def __init__(self):
        """Obtain configurations and all filelists from the master node
        """
        assert world.rank >= 1

        # synchronize configuration file and file lists with the master node
        self.config = {}
        self.mapIn = []
        self.reduceIn = []
        self.reduceOut = []
        self.config = world.bcast(self.config, root=0)
Exemplo n.º 31
0
    def run(self):
        """Running sequence of the master node
        """
        atimer = Timer('Master')

        while self.iterCtrl():
            # map phase
            if self.init:
                self.execTask('Init')
                self.init = False
            else:
                self.execTask('Map')
            # reduce phase
            self.execTask('Reduce')

        # terminate all workers
        for ii in range(1, len(self.workers)+1):
            world.send(True, dest=ii, tag=TERMINATE)

        # final output, serial execution only
        self.finalize()
Exemplo n.º 32
0
    def run(self):
        """Running sequence of the master node
        """
        atimer = Timer('Master')

        while self.iterCtrl():
            # map phase
            if self.init:
                self.execTask('Init')
                self.init = False
            else:
                self.execTask('Map')
            # reduce phase
            self.execTask('Reduce')

        # terminate all workers
        for ii in range(1, len(self.workers) + 1):
            world.send(True, dest=ii, tag=TERMINATE)

        # final output, serial execution only
        self.finalize()
Exemplo n.º 33
0
    def test_recover(self):
        """ Make sure we can store and retrieve information from the GPU. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            data = np.random.randn(*case['shape']).astype(case['dtype'])
            cpu_data = np.empty_like(data)
            comm.Allreduce(data, cpu_data)
            g = Grid(cpu_data)
            gpu_data = g.get()
            if comm.Get_rank() == 0:
                self.assertTrue((cpu_data == gpu_data).all())

            # Test with-overlap cases as well.
            for k in range(1, 3):
                g = Grid(cpu_data, x_overlap=k)
                gpu_data = g.get()
                if comm.Get_rank() == 0:
                    self.assertTrue((cpu_data == gpu_data).all())

                cpu_raw = get_cpu_raw(cpu_data, k)
                self.assertTrue((cpu_raw == g._get_raw()).all())
Exemplo n.º 34
0
    def plot_fields( self, save_figure=False ):
        """
        Plot the Ex and By field using matplotlib
        If save_figure is True, the plots are saved as PNG files,
        in a folder named `diagnostics`
        """
        # PLOTTING: NEW LINES RELATED TO MPI
        Ex_from_all_procs = mpi_comm.gather( self.Ex[1:-1] )
        By_from_all_procs = mpi_comm.gather( self.By[1:-1] )

        if mpi_comm.rank == 0:
            print( 'Plotting the fields at iteration %d' %self.n )
            
            global_Ex = np.concatenate( Ex_from_all_procs )
            global_By = np.concatenate( By_from_all_procs )

            plt.clf()
            plt.suptitle('Fields at iteration %d' %self.n)
            # Plot of Ex
            plt.subplot(211)
            z = self.dz*np.arange( self.Nz_global )
            plt.plot( z, global_Ex, '-' )
            plt.ylim(-1.1, 1.1)
            plt.xlim(0, self.Lz)
            plt.ylabel('$E_x^n$')
            plt.xlabel('z')
            # Plot of By
            plt.subplot(212)
            z = self.dz*np.arange( self.Nz_global ) + 0.5*self.dz
            plt.plot( z, global_By, '-' )
            plt.ylim(-1.1/c, 1.1/c)
            plt.xlim(0, self.Lz)
            plt.ylabel('$B_y^{n-1/2}$')
            plt.xlabel('z')

            if save_figure is True:
                # Check that the diagnostics folder exists
                if os.path.exists('diagnostics') is False:
                    os.mkdir('diagnostics')
                plt.savefig( "diagnostics/iteration%03d.png" %self.n)
Exemplo n.º 35
0
def all_reduce(tensor: EagerTensor,
               op=ReduceOp.SUM,
               comm: MPI.COMM_WORLD = None) -> EagerTensor:
    param_numpy = tensor.numpy()
    original_shape = param_numpy.shape
    param_numpy_flatten = param_numpy.flatten()
    param_output = np.empty(param_numpy_flatten.shape, dtype=param_numpy.dtype)
    if comm is None:
        comm = _get_comm()
    comm.Allreduce(param_numpy_flatten, param_output, op=op.value)
    param_output = np.reshape(param_output, original_shape)
    tensor = tutils.to_tensor(param_output)
    return tensor
Exemplo n.º 36
0
    def test_simple_kernel(self):
        """ Implement a simple kernel. """
        for case in self.cases:
            # Form data to work on.
            space.initialize_space(case['shape'])
            x_np = comm.allreduce(np.random.randn(*case['shape']).astype(case['dtype']))
            x = Grid(x_np, x_overlap=2)
            s_np = comm.allreduce(np.random.randn(case['shape'][0],1,1).astype(case['dtype']))
            s = Const(s_np)
            z = Out(case['dtype'])

            # Make a kernel.
            code = Template("""
                            if (_in_local && _in_global) {
                                z += a * s(_X) * x(0,0,0);
                                // z += a * x(0,0,0);
                            }
                            """).render()
            fun = Kernel(code, \
                        ('a', 'number', case['dtype']), \
                        ('x', 'grid', x.dtype), \
                        ('s', 'const', s.dtype), \
                        ('z', 'out', z.dtype), \
                        shape_filter='all')

            # Execute and check the result.
            # fun()
            while fun.exec_configs:
            # for k in range(40):
                fun(case['dtype'](2.0), x, s, z)
                # fun(case['dtype'](2.0), x, z)
                gpu_sum = z.get()
                cpu_sum = np.sum(2 * s_np * x_np)
                # cpu_sum = np.sum(2 * x_np)
                err = abs(gpu_sum - cpu_sum) / abs(cpu_sum)
                if case['dtype'] in (np.float32, np.complex64):
                    self.assertTrue(err < 1e-2, (case, err))
                else:
                    self.assertTrue(err < 1e-6, (case, err))
Exemplo n.º 37
0
def plot(x, psi, psi_0, nt, v):
    ###
    rcParams["figure.figsize"] = [8 / mpi.Get_size(), 5]
    ###
    pyplot.step(x, psi_0(x), label='initial', where='mid')
    pyplot.step(x, psi_0(x - v * nt), label='analytical', where='mid')
    pyplot.step(x, psi, label='numerical', where='mid')
    pyplot.grid()
    pyplot.gca().set_ylim([0, 12])
    pyplot.legend()
    ###
    # pyplot.savefig("out.svg")
    pyplot.savefig(f"out.{mpi.Get_rank()}.svg")
Exemplo n.º 38
0
    def __init__(self, config):
        """Read in user created config module and initialize the master node
        """
        # set default values and update according to user input (config)
        # NOTE: input files should be prepared in the user module
        # (e.g. split the BIG file into smaller chunks using 'split')
        # each file is fed into a mapper, supposing it can fit into mapper's memory

        assert hasattr(config, 'mapfn') and hasattr(config, 'reducefn')
        self.config = {'nReduce':1, 'nMap':1, 'maxLoop':1, 'appendReduce':True,\
                'scratchFolder':'./', 'readPickle':False, 'writePickle':False,\
                'verbosity':6, 'timeout':60, 'delay':0.2, 'jobwait':1,\
                'mapfn':config.mapfn, 'reducefn':config.reducefn, 'ctrlfn':None,\
                'finalfn':None, 'readfn':None, 'hashfn':hash }

        if world.size == 1:
            raise AttributeError('Parallel mode only! At least one worker node is required.')

        # number of mapping tasks by default equals number of initial files
        # it can be overidden by user input
        assert isinstance(config.initFiles, list)
        self.config['nMap'] = len(config.initFiles)
        self.initFiles = config.initFiles

        # read in user defined configurations
        for key, val in self.config.items():
            if hasattr(config, key): self.config[key] = getattr(config, key)

        # sync config with all nodes
        self.config = world.bcast(self.config, root=0)

        # setup workers into a priority queue
        self.workers = [ State(ii) for ii in range(1, world.size) ]
        heapq.heapify(self.workers)
        self.nActive =  world.size - 1

        # assign map / reduce / finalize file list
        tmpList = [ config.__name__+'_'+str(ii).zfill(len(str(self.config['nMap'])))\
                +'.map' for ii in range(1, self.config['nMap']+1) ]
        self.mapIn = [ os.path.join(self.config['scratchFolder'], file) for file in tmpList ]

        tmpList = [ config.__name__+'_'+str(ii).zfill(len(str(self.config['nReduce'])))\
                +'.int' for ii in range(1, self.config['nReduce']+1) ]
        self.reduceIn = [ os.path.join(self.config['scratchFolder'], file) for file in tmpList ]
        self.reduceOut = [ os.path.splitext(file)[0]+'.red' for file in self.reduceIn ]

        # Currently only support single output file
        self.finalOut = [ config.__name__+'.out' ]

        # count number of iterations
        self.nLoop = 0; self.init = True
Exemplo n.º 39
0
    def execTask(self, task):
        """Wrapper function calling mapping/reducing/finalizing phase tasks,
        dispatch tasks to workers until all finished and collect feedback. 
        Faulty workers are removed from active duty work list.
        """
        atimer = Timer(task)
        print('Entering {0:s} phase...'.format(task))

        taskDict = { 'Map':(self.mapIn, MAP_START, MAP_FINISH), \
                'Init':(self.mapIn, INIT_START, MAP_FINISH), \
                'Reduce':(self.reduceIn, REDUCE_START, REDUCE_FINISH) }

        # line up jobs and workers into priority queues
        jobs = taskDict[task][0][:]
        heapq.heapify(jobs)
        running = {}
        heapq.heapify(self.workers)

        while (jobs or running) and self.nActive > 0:
            # dispatch all jobs to all free workers
            while jobs and self.workers[0].isFree():
                job = heapq.heappop(jobs)
                worker = heapq.heappop(self.workers)
                world.send(job, dest=worker.id, tag=taskDict[task][1])
                worker.setBusy()
                heapq.heappush(self.workers, worker)
                running[job] = (time(), worker)
                if self.config['verbosity'] >= 6:
                    print('Dispatching file ' + os.path.basename(job) +
                          ' to worker ' + str(worker.id))
                # if no more free workers, break
                if not self.workers[0].isFree(): break

            # wait for finishing workers as well as do cleaning
            self.wait(running, taskDict[task][2])
            self.clean(running, jobs)

        print('{0:s} phase completed'.format(task))
Exemplo n.º 40
0
    def reduce(self, tag):
        """Use supplied reducefn to operate on
        a list of values from a given key, generated by self.map()
        """
        atimer = Timer('Worker_Reduce')

        filename = world.recv(source=0, tag=tag)
        files = glob.glob(filename + '-tmp*')
        dataList = []
        for file in files:
            with open(file, 'rb') as fin:
                try:
                    while True:
                        dataList.extend(pickle.load(fin))
                except EOFError:  # read in every instance of pickle dump
                    pass
        data = {}
        for key, val in dataList:
            if key in data:
                data[key].append(val)
            else:
                data[key] = [val]
        results = []
        for key, values in data.items():
            results.append((key, self.config['reducefn'](key, values)))
        results.sort(key=itemgetter(0))

        # write out in dictionary format
        idx = self.reduceIn.index(filename)
        if self.config['appendReduce']:
            with open(self.reduceOut[idx], 'a+') as fout:
                pickle.dump(dict(results), fout, pickle.HIGHEST_PROTOCOL)
        else:
            with open(self.reduceOut[idx], 'w+') as fout:
                pickle.dump(dict(results), fout, pickle.HIGHEST_PROTOCOL)

        world.send(filename, dest=0, tag=REDUCE_FINISH)
Exemplo n.º 41
0
def process_dir(indir, outdir):
    main_text_files = glob.glob("{0}/main/*.txt".format(indir))
    rank = world.Get_rank()
    size = world.Get_size()
    main_text_files_2 = []
    for m in main_text_files:
        tilename = find_tilename(m)
        out_main = "{0}/main/{1}.fits".format(outdir, tilename)
        out_epoch = "{0}/epoch/{1}.fits".format(outdir, tilename)
        if not (os.path.exists(out_main) and os.path.exists(out_epoch)):
            main_text_files_2.append(m)
    main_text_files = main_text_files_2
    print "{0} files left to do".format(len(main_text_files))

    for i, main_text_file in enumerate(main_text_files):
        if i % size != rank:
            continue
        print rank, main_text_file
        tilename = find_tilename(main_text_file)
        epoch_text_file = "{0}/epoch/{1}.epoch.txt".format(indir, tilename)
        out_main = "{0}/main/{1}.fits".format(outdir, tilename)
        out_epoch = "{0}/epoch/{1}.fits".format(outdir, tilename)
        if os.path.exists(out_main) and os.path.exists(out_epoch):
            continue
        try:
            process_text(main_text_file,
                         epoch_text_file,
                         out_main,
                         out_epoch,
                         "r",
                         blind=False,
                         quiet=False,
                         report=report)
        except:
            print "{} did not work".format(out_main)
        if report:
            return
Exemplo n.º 42
0
    def test_synchronize(self):
        """ Make sure that we can make the overlap spaces accurate. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            data = np.random.randn(*case['shape']).astype(case['dtype'])
            cpu_data = np.empty_like(data)
            comm.Allreduce(data, cpu_data)
            g = Grid(case['dtype'])
            self.assertRaises(TypeError, g.synchronize)  # No overlap.
            # Test with-overlap cases as well.
            for k in range(1, 4):
                g = Grid(case['dtype'], x_overlap=k)

                # Overwrite entire grid
                data = np.random.randn(*case['shape']).astype(case['dtype'])
                cpu_data = np.empty_like(data)
                comm.Allreduce(data, cpu_data)
                cpu_raw_bad = get_cpu_raw(cpu_data, k)
                cpu_raw_bad[:k, :, :] += 1  # Mess up padding areas.
                cpu_raw_bad[-k:, :, :] += 1
                drv.memcpy_htod(g.data.ptr, cpu_raw_bad)

                # Prove that the data is not synchronized at this time.
                cpu_raw = get_cpu_raw(cpu_data, k)
                xx = case['shape'][0]
                gd = g._get_raw()
                self.assertTrue((gd[:k, :, :] != cpu_raw[:k, :, :]).all())
                self.assertTrue((gd[-k:, :, :] != cpu_raw[-k:, :, :]).all())

                g.synchronize()  # Synchronize the overlapping data.

                # Make sure that the overlap data is accurate.
                gd = g._get_raw()
                self.assertTrue((gd[:k, :, :] == cpu_raw[:k, :, :]).all())
                self.assertTrue((gd[-k:, :, :] == cpu_raw[-k:, :, :]).all())

                comm.Barrier()  # Wait for other mpi nodes to finish.
Exemplo n.º 43
0
def main(nt, nx, dt, C, x_min, x_max):
    dx = (x_max - x_min) / nx

    ###
    size = mpi.Get_size()
    rank = mpi.Get_rank()

    # dla nx=5 i size=3: lepiej 2+2+1 niż 1+1+3
    import math
    nx_max = math.ceil(nx / size)
    nx = nx_max if (rank + 1) * nx_max <= nx else nx - rank * nx_max
    assert nx > 0

    x_min += dx * nx_max * rank
    x_max = min(x_max, x_min + dx * nx_max)
    #print(rank, '/', size, ':', nx, x_min, x_max)
    ###

    x = np.linspace(x_min - halo * dx,
                    x_max + halo * dx,
                    num=nx + 2 * halo,
                    endpoint=False)
    psi = calc(psi_0(x), nt, C)
    plot(x[halo:-halo], psi[halo:-halo], psi_0, nt, v=C / dt * dx)
Exemplo n.º 44
0
    def reduce(self, tag):
        """Use supplied reducefn to operate on
        a list of values from a given key, generated by self.map()
        """
        atimer = Timer('Worker_Reduce')

        filename = world.recv(source=0, tag=tag)
        files = glob.glob(filename+'-tmp*')
        dataList = []
        for file in files:
            with open(file, 'rb') as fin:
                try:
                    while True: dataList.extend( pickle.load(fin) )
                except EOFError: # read in every instance of pickle dump
                    pass
        data = {}
        for key, val in dataList:
            if key in data:
                data[key].append(val)
            else:
                data[key] = [val]
        results = []
        for key, values in data.items():
            results.append( ( key, self.config['reducefn'](key, values) ) )
        results.sort(key=itemgetter(0))

        # write out in dictionary format
        idx = self.reduceIn.index(filename)
        if self.config['appendReduce']:
            with open(self.reduceOut[idx], 'a+') as fout:
                pickle.dump(dict(results), fout, pickle.HIGHEST_PROTOCOL)
        else:
            with open(self.reduceOut[idx], 'w+') as fout:
                pickle.dump(dict(results), fout, pickle.HIGHEST_PROTOCOL)

        world.send(filename, dest=0, tag=REDUCE_FINISH)
Exemplo n.º 45
0
 def array(self, N=None, filename=None, component=None, root=0):
     """Dump data to numpy format on root processor for all or one snapshot."""
     is_root = comm.Get_rank() == root
     size = self.get_total_number_probes() if is_root else len(self)
     comp = self.value_size() if component is None else 1
     if not N is None:
         z  = zeros((size, comp))
     else:
         z  = zeros((size, comp, self.number_of_evaluations()))
     
     # Get all values
     if len(self) > 0: 
         if not N is None:
             for k in range(comp):
                 if is_root:
                     ids = self.get_probe_ids()
                     z[ids, k] = self.get_probes_component_and_snapshot(k, N)
                 else:
                     z[:, k] = self.get_probes_component_and_snapshot(k, N)
         else:                
             for i, (index, probe) in enumerate(self):
                 j = index if is_root else i
                 if not N is None:
                     z[j, :] = probe.get_probe_at_snapshot(N)
                 else:
                     for k in range(self.value_size()):
                         z[j, k, :] = probe.get_probe_sub(k)
                     
     # Collect values on root
     recvfrom = comm.gather(len(self), root=root)
     if is_root:
         for j, k in enumerate(recvfrom):                
             if comm.Get_rank() != j:
                 ids = comm.recv(source=j, tag=101)
                 z0 = comm.recv(source=j, tag=102)
                 z[ids, :] = z0[:, :]
     else:
         ids = self.get_probe_ids()
         comm.send(ids, dest=root, tag=101)
         comm.send(z, dest=root, tag=102)
         
     if is_root:
         if filename:
             if not N is None:
                 z.dump(filename+"_snapshot_"+str(N)+".probes")
             else:
                 z.dump(filename+"_all.probes")
         return squeeze(z)
Exemplo n.º 46
0
    def __init__(self, N=(256, ), x0=(0.0, ), x1=(1.0, )):
        assert len(N) is len(x0) is len(x1)
        try:
            from mpi4py.MPI import COMM_WORLD, Compute_dims
        except ImportError:
            print "Error! DistributedDomain requires the mpi4py package."
            exit()

        mpi_sizes = Compute_dims(COMM_WORLD.size, len(N))
        cart = COMM_WORLD.Create_cart(mpi_sizes, periods=[True for n in N])
        mpi_coord = cart.Get_coords(COMM_WORLD.rank)

        global_shape = [n for n in N]
        global_start = [0 for n in N]

        local_shape = []
        X0, X1 = [], []
        dx = [float(l1 - l0) / n for n, l0, l1 in zip(N, x0, x1)]

        for i in range(len(N)):

            R = N[i] % mpi_sizes[i]
            normal_size = N[i] / mpi_sizes[i]
            augmnt_size = normal_size + 1
            thisdm_size = augmnt_size if mpi_coord[i] < R else normal_size

            for j in range(mpi_coord[i]):
                global_start[i] += augmnt_size if j < R else normal_size

            local_shape.append(thisdm_size)

            X0.append(x0[i] + dx[i] * global_start[i])
            X1.append(x0[i] + dx[i] * (global_start[i] + thisdm_size))

        self.N = local_shape
        self.dx = dx
        self.x0 = X0
        self.x1 = X1
        self.cart = cart
        self.rank = COMM_WORLD.rank
        self.mpi_coord = mpi_coord
        self.mpi_sizes = mpi_sizes
        self.global_start = global_start
        self.global_shape = global_shape
        self.is_distributed = True
Exemplo n.º 47
0
    def test_sum(self):
        """ Make sure summing works. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            x = Out(case['dtype'], op='sum')
            x_cpu_data = np.random.randn(*case['shape'][1:]).astype(case['dtype'])
            if case['dtype'] in (np.complex64, np.complex128):
                x_cpu_data = (1 + 1j) * x_cpu_data

            x.data.set(x_cpu_data)
            res_gold = comm.allreduce(np.sum(x_cpu_data.flatten()))

            x.reduce()
            err = abs(res_gold - x.get()) / abs(res_gold)

            if case['dtype'] in (np.float32, np.complex64):
                self.assertTrue(err < 1e-3)
            else:
                self.assertTrue(err < 1e-10)
Exemplo n.º 48
0
    def test_sum(self):
        """ Make sure summing works. """
        for case in self.cases:
            space.initialize_space(case['shape'])
            x = Out(case['dtype'], op='sum')
            x_cpu_data = np.random.randn(*case['shape'][1:]).astype(
                case['dtype'])
            if case['dtype'] in (np.complex64, np.complex128):
                x_cpu_data = (1 + 1j) * x_cpu_data

            x.data.set(x_cpu_data)
            res_gold = comm.allreduce(np.sum(x_cpu_data.flatten()))

            x.reduce()
            err = abs(res_gold - x.get()) / abs(res_gold)

            if case['dtype'] in (np.float32, np.complex64):
                self.assertTrue(err < 1e-3)
            else:
                self.assertTrue(err < 1e-10)
Exemplo n.º 49
0
def set_periodic_checkpoint(sim, period):
    """
    Set up periodic checkpoints of the simulation

    The checkpoints are saved in openPMD format, in the directory
    `./checkpoints`, with one subdirectory per process.
    All the field and particle information of each processor is saved.

    NB: Checkpoints are registered among the list of diagnostics
    `diags` of the Simulation object `sim`.

    Parameters
    ----------
    sim: a Simulation object
       The simulation that is to be saved in checkpoints

    period: integer
       The number of PIC iteration between each checkpoint.
    """
    # Only processor 0 creates a directory where checkpoints will be stored
    # Make sure that all processors wait until this directory is created
    # (Use the global MPI communicator instead of the `BoundaryCommunicator`
    # so that this still works in the case `use_all_ranks=False`)
    if comm.rank == 0:
        if os.path.exists('./checkpoints') is False:
            os.mkdir('./checkpoints')
    comm.Barrier()

    # Choose the name of the directory: one directory per processor
    write_dir = 'checkpoints/proc%d/' % comm.rank

    # Register a periodic FieldDiagnostic in the diagnostics of the simulation
    sim.diags.append(FieldDiagnostic(period, sim.fld, write_dir=write_dir))

    # Register a periodic ParticleDiagnostic, which contains all
    # the particles which are present in the simulation
    particle_dict = {}
    for i in range(len(sim.ptcl)):
        particle_dict['species %d' % i] = sim.ptcl[i]
    sim.diags.append(
        ParticleDiagnostic(period, particle_dict, write_dir=write_dir))

maf.bcast = bcast

def compare(msg, mpi_func, maf_func, count=100):
    mpi_times = numpy.zeros(count)
    maf_times = numpy.zeros(count)
    for i in range(count):
        start = timeit.default_timer()
        mpi_func()
        mpi_times[i] = timeit.default_timer() - start
        start = timeit.default_timer()
        maf_func()
        maf_times[i] = timeit.default_timer() - start
    maf.log('{:<20} {:.4e} {:.4e} {:>6.2f}'.format(msg, mpi_times.mean(), maf_times.mean(), 
            100*((maf_times - mpi_times) / mpi_times).mean()))

compare('bcast(s)', lambda: COMM_WORLD.bcast('s'), lambda: maf.bcast('s'))
compare('bcast(1)', lambda: COMM_WORLD.bcast(1), lambda: maf.bcast(1))
compare('bcast(s*100)', lambda: COMM_WORLD.bcast('s'*100), lambda: maf.bcast('s'*100))
compare('bcast(s*1000)', lambda: COMM_WORLD.bcast('s'*1000), lambda: maf.bcast('s'*1000))

compare('bcast(range(1000))',
        lambda: COMM_WORLD.bcast(range(1000)),
        lambda: maf.bcast(range(1000)))

compare('bcast(range(10000))',
        lambda: COMM_WORLD.bcast(range(10000)),
        lambda: maf.bcast(range(10000)))

Exemplo n.º 51
0
import pickle
import matplotlib.patches
import collections
import sys
from scipy import stats
from mpi4py.MPI import COMM_WORLD as CW


def flatten(x):
    if isinstance(x, collections.Iterable):
        return [a for i in x for a in flatten(i)]
    else:
        return [x]


rank = CW.Get_rank()
size = CW.Get_size()

two_col_width = 7.20472  #inches
single_col_width = 3.50394  #inches
page_height = 10.62472
font_size = 10

sys.stdout.flush()
CW.Barrier()

pickle_file = sys.argv[1]
true_birth_con_pickle = sys.argv[2]
plot_gradient = False
read_pickle = bool(sys.argv[3])
baseline_yr = float(sys.argv[4])
Exemplo n.º 52
0
 def reduce(self):
     """ Compute the result. """
     self.result = comm.allreduce(ga.sum(self.data).get())
Exemplo n.º 53
0
def batch_reduce(*outs):
    """ Optimal (compared to self.reduce) when communication cost is latency bound. """
    results = comm.allreduce(np.array([ga.sum(out.data).get() for out in outs]))
    for k in range(len(outs)):
        outs[k].result = results[k]
Exemplo n.º 54
0
    def __init__(self, array_or_dtype, x_overlap=0):
        """ Create a spatial grid on the GPU(s).

        Input variables
        array_or_dtype -- can either be a numpy array of the same shape as
            the global space, or a numpy dtype. If a valid array is passed, 
            it will be loaded on to the GPU. If a dtype is passed, then
            an array of zeros, of that dtype will be loaded onto the GPU.

        Optional variables
        x_overlap -- the number of adjacent cells in either the negative or
            positive x-direction that need to simultaneously be accessed along
            with the current cell. Must be a non-negative integer. Default
            value is 0.

        """

        shape = get_space_info()['shape'] # Get the shape of the space.
        xr = get_space_info()['x_range'] # Get the local x_range.
        all_x_ranges = get_space_info()['all_x_ranges'] # Get the local x_range.
        local_shape = (xr[1]-xr[0], shape[1], shape[2])

        self._set_gce_type('grid') # Set the gce type to grid.

        # Make sure overlap option is valid.
        if type(x_overlap) is not int:
            raise TypeError('x_overlap must be an integer.')
        elif x_overlap < 0:
            raise TypeError('x_overlap must be a non-negative integer.')

        if comm.rank == 0:
            # Process the array_or_dtype input variable.
            if type(array_or_dtype) is np.ndarray: # Input is an array.
                array = array_or_dtype

                # Make sure the array is of the correct shape.
                if array.shape != shape:
                    raise TypeError('Shape of array does not match shape of space.')

                # Make sure the array is of a valid datatype.
                self._get_dtype(array.dtype.type)


            elif type(array_or_dtype) is type: # Input is a datatype.
                self._get_dtype(array_or_dtype) # Validate the dtype.
                array = np.zeros(shape, dtype=self.dtype) # Make a zeros array.

            else: # Invalid input.
                raise TypeError('Input variable must be a numpy array or dtype')

            # Prepare array to be scattered.
            array = [array[r[0]:r[1],:,:] for r in all_x_ranges]

        else:
            array = None

        array = comm.scatter(array)
        self._get_dtype(array.dtype.type)

#         # Narrow down the array to local x_range.
#         array = array[xr[0]:xr[1],:,:]

        # Add padding to array, if needed.
        self._xlap = x_overlap
        if self._xlap is not 0:
            padding = np.empty((self._xlap,) + shape[1:3], dtype=array.dtype)
            array = np.concatenate((padding, array, padding), axis=0)

        self.to_gpu(array) # Load onto device.

        # Determine information needed for synchronization.
        if self._xlap is not 0:
            # Calculates the pointer to the x offset in a grid.
            ptr_dx = lambda x_pos: self.data.ptr + self.data.dtype.itemsize * \
                                        x_pos * shape[1] * shape[2]
            
            # Pointers to different sections of the grid that are relevant
            # for synchronization.
            self._sync_ptrs = { 'forw_src': ptr_dx(xr[1]-xr[0]), \
                                'back_dest': ptr_dx(0), \
                                'back_src': ptr_dx(self._xlap), \
                                'forw_dest': ptr_dx(xr[1]-xr[0] + self._xlap)}

            # Buffers used during synchronization.
            self._sync_buffers = [drv.pagelocked_empty( \
                                    (self._xlap, shape[1], shape[2]), \
                                    self.dtype) for k in range(4)]

            # Streams used during synchronization.
            self._sync_streams = [drv.Stream() for k in range(4)]

            # Used to identify neighboring MPI nodes with whom to synchronize.
            self._sync_adj = get_space_info()['mpi_adj']

            # Offset in bytes to the true start of the grid.
            # This is used to "hide" overlap areas from the kernel.
            self._xlap_offset = self.data.dtype.itemsize * \
                                self._xlap * shape[1] * shape[2]

            self.synchronize() # Synchronize the grid.
            comm.Barrier() # Wait for all grids to synchronize before proceeding.
Exemplo n.º 55
0
 def __del__(self):
     if self.rank == 0:
         for p in range(1, self.P):
             COMM_WORLD.isend(False, dest=p)
Exemplo n.º 56
0
def get_parameters(name):
    """ Reads the simulation parameters from the input hdf5 file. """

    if comm.rank == 0:
        f = h5py.File(name + '.grid', 'r')
        files_to_delete = [name + '.grid']

        omega = np.complex128(f['omega_r'][0] + 1j * f['omega_i'][0])
        shape = tuple([int(s) for s in f['shape'][:]])

        # bound_conds = f['bound_conds'][:]

        # Function used to read in a 1D complex vector fields.
        get_1D_fields = lambda a: [(f[a+'_'+u+'r'][:] + 1j * f[a+'_'+u+'i'][:]).\
                                astype(np.complex128) for u in 'xyz']

        # Read in s and t vectors.
        s = get_1D_fields('sp')
        t = get_1D_fields('sd')

        # Read in max_iters and err_thresh.
        max_iters = int(f['max_iters'][0])
        # max_iters = 100
        err_thresh = float(f['err_thresh'][0])


        f.close() # Close file.

        # Function used to read in 3D complex vector fields.
        def get_3D_fields(a):
            field = []
            for k in range(3):
                key = name + '.' + a + '_' + 'xyz'[k]
                field.append((h5py.File(key + 'r')['data'][:] + \
                        1j * h5py.File(key + 'i')['data'][:]).astype(np.complex128))
                files_to_delete.append(key + 'r')
                files_to_delete.append(key + 'i')
            return field

#         # Read in m, e, and j fields.
#         for name in 'eJmE':
#             print comm.rank, name
#             params[name] = get_3D_fields(name)
        e = get_3D_fields('e')
        j = get_3D_fields('J')
        m = get_3D_fields('m')
        x = get_3D_fields('E')

        # Delete input files.
        for filename in files_to_delete:
            os.remove(filename)

        # Do some simple pre-computation.
        for k in range(3):
            m[k] = m[k]**-1
            e[k] = omega**2 * e[k]
            j[k] = -1j * omega * j[k]

        params = {'omega': omega, 'shape': shape, \
                'max_iters': max_iters, 'err_thresh': err_thresh, \
                's': s, 't': t}
                # 'e': e, 'm': m, 'j': j, 'x': x}
    else:
        params = None

    params = comm.bcast(params)

    if comm.rank == 0:
        params['e'] = e
        params['m'] = m
        params['j'] = j
        params['x'] = x
        
    else:
        for field_name in 'emjx':
            params[field_name] = [None] * 3

    return params