Пример #1
0
def fstack_mp(img, fmap):
    img_stacked = shmem.empty(img.shape[0:2], dtype='uint16')

    # This implementation is faster than breaking each image plane up for parallel processing
    def do_work(x):
        index = ne.evaluate("fmap==x")
        img_stacked[index] = img[:, :, x][index]
        index = ne.evaluate("(fmap > x) & (fmap < x+1)")
        A = fmap[index]
        B = img[:, :, x+1][index]
        C = img[:, :, x][index]
        img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C")

    with shmem.MapReduce(np=img.shape[2]-1) as pool:
        pool.map(do_work, range(img.shape[2]-1))

    last_ind = img.shape[2]-1
    index = ne.evaluate("fmap == last_ind")
    num_proc = shmem.cpu_count()
    edges = get_edges(img, num_proc)

    def mp_assignment(x):
        img_stacked[edges[x]:edges[x+1],:][index[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\
                                                                        [index[edges[x]:edges[x+1], :]]
    with shmem.MapReduce(np=num_proc) as pool:
        pool.map(mp_assignment, range(num_proc))

    return img_stacked
Пример #2
0
    def __call__(self, left, right):
        inter_shape = self.intermediate_shape_func(left.shape, right.shape)
        inter_dtype = self.intermediate_dtype_func(left.dtype, right.dtype)
        out_shape = self.reduce_shape_func(inter_shape)
        out_dtype = self.reduce_dtype_func(inter_dtype)

        # vstack results from processes, or share result and use pool.critical?
        # a vstack seems most foolproof, though it uses more mem.
        with sharedmem.MapReduce(np=self.n_jobs) as pool:
            m_left = sharedmem.full_like(left, left)
            m_right = sharedmem.full_like(right, right)
            result = sharedmem.empty(out_shape, out_dtype)

            chunksize = self.get_chunk_size(inter_shape, inter_dtype)
            #print("chunksize {}".format(chunksize))
            n = m_left.shape[0]

            def op(i):
                ix = slice(i, min(i + chunksize, n))
                res = self.reduce_op(self.binary_op(m_left[ix], m_right))
                return ix, res

            def insert(ix_slice, res):
                result[ix_slice] = res

            pool.map(op, range(0, m_left.shape[0], chunksize), reduce=insert)

        return np.array(result)
Пример #3
0
def test_critical():
    t = sharedmem.empty((), dtype='i8')
    t[...] = 0
    # FIXME: if the system has one core then this will never fail,
    # even if the critical section is not
    with sharedmem.MapReduce(np=8) as pool:

        def work(i):
            with pool.critical:
                t[...] = 1
                if i != 30:
                    time.sleep(0.01)
                assert_equal(t, 1)
                t[...] = 0

        pool.map(work, range(16))

        def work(i):
            t[...] = 1
            if i != 30:
                time.sleep(0.01)
            assert_equal(t, 1)
            t[...] = 0

        try:
            pool.map(work, range(16))
        except sharedmem.SlaveException as e:
            assert isinstance(e.reason, AssertionError)
            return
    raise AssertionError("Shall not reach here.")
Пример #4
0
    def make_predictions(self, angles, eccentricities, sizes, n_jobs=4):
        self.angles, self.eccentricities, self.sizes, = np.meshgrid(
            angles, eccentricities, sizes)
        #self.predictions = np.zeros(list(self.angles.shape) + [self.stimulus.run_length])
        #self.predictions = self.predictions.reshape(-1, self.predictions.shape[-1]).T

        self.xs = np.cos(self.angles) * self.eccentricities
        self.ys = np.sin(self.angles) * self.eccentricities

        print(self.xs.shape)
        print(len(self.xs.ravel()))

        with sharedmem.MapReduce(np=n_jobs) as pool:

            def make_predictions(args):
                x, y, s = args
                return self.model_func.generate_prediction(x, y, s, 1, 0)

            pb = tqdm(total=self.angles.size)

            def reduce(r):
                pb.update()
                return r

            args = list(
                zip(self.xs.ravel(), self.ys.ravel(), self.sizes.ravel()))
            self.predictions = np.array(
                pool.map(make_predictions, args, reduce=reduce)).T
Пример #5
0
def generate_roadmap_parallel(samples, env, max_dist, leafsize, knn):
    """Parallelized roadmap generator """

    n_sample = len(samples)
    leafsize = knn
    if len(samples) < leafsize: leafsize = len(samples) - 1

    import sharedmem
    sample_ids = np.arange(n_sample, dtype='i')
    roadmap = sharedmem.full((n_sample, knn), 0)

    # Start multi processing over samples
    with sharedmem.MapReduce() as pool:
        if n_sample % sharedmem.cpu_count() == 0:
            chunksize = n_sample / sharedmem.cpu_count()
        else:
            chunksize = n_sample / sharedmem.cpu_count() + 1

        def work(i):
            skdtree = KDTree(samples, leafsize=leafsize)
            sub_sample_ids = sample_ids[slice(i, i + chunksize)]

            for j, sub_sample_id in enumerate(sub_sample_ids):
                x = samples[sub_sample_id]
                try:
                    inds, dists = skdtree.search(x, k=leafsize)
                except:
                    print "skdtree search failed"
                    sys.exit()

                edge_id = []
                append = edge_id.append
                for ii, (ind, dist) in enumerate(zip(inds, dists)):
                    if dist > max_dist: break  # undirected
                    if len(edge_id) >= knn: break  # directed?
                    append(ind)

                # to complement fewer number of edges for vectorized valueiteration
                if len(edge_id) < knn:
                    for ii in range(0, len(inds)):
                        #for ind in edge_id:
                        #    edge_id.append(ind)
                        #    if len(edge_id) >= knn: break
                        append(inds[0])
                        if len(edge_id) >= knn: break

                assert len(
                    edge_id
                ) <= leafsize, "fewer leaves than edges {} (dists={})".format(
                    len(edge_id), dists[:len(edge_id)])

                for k in range(len(edge_id)):
                    roadmap[sub_sample_id][k] = edge_id[k]

        pool.map(work, range(0, n_sample, chunksize))  #, reduce=reduce)

    # convert sharedmem array to list
    roadmap = np.array(roadmap).astype(int)
    skdtree = None  #KDTree(samples, leafsize=leafsize)
    return roadmap.tolist(), skdtree
Пример #6
0
def argsort(ar):
    min = minimum.reduce(ar)
    max = maximum.reduce(ar)
    nchunk = sharedmem.cpu_count() * 2
    #bins = numpy.linspace(min, max, nchunk, endpoint=True)
    step = 1.0 * (max - min) / nchunk
    bins = numpy.array(
        1.0 * numpy.arange(nchunk + 1) * (max - min) / nchunk + min, min.dtype)

    dig = digitize(ar, bins)
    binlength = bincount(dig, minlength=len(bins) + 1)
    binoffset = numpy.cumsum(binlength)
    out = sharedmem.empty(len(ar), dtype='intp')

    with sharedmem.MapReduce() as pool:

        def work(i):
            # we can do this a lot faster
            # but already having pretty good speed.
            ind = numpy.nonzero(dig == i + 1)[0]
            myar = ar[ind]
            out[binoffset[i]:binoffset[i + 1]] = ind[myar.argsort()]

        pool.map(work, range(nchunk))

    return out
Пример #7
0
    def ztree(self,
              zkey=None,
              scale=None,
              minthresh=10,
              maxthresh=20,
              np=None):
        if scale is None:
            scale = fc.scale(self['locations'].min(axis=0),
                             self['locations'].ptp(axis=0))
        zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype)

        with sharedmem.MapReduce(np=np) as pool:
            chunksize = 1024 * 1024

            def work(i):
                X, Y, Z = self['locations'][i:i + chunksize].T
                fc.encode(X, Y, Z, scale=scale, out=zkey[i:i + chunksize])

            pool.map(work, range(0, len(zkey), chunksize))

        # use sharemem.argsort, because it is faster
        arg = sharedmem.argsort(zkey, np=np)

        return zt.Tree(zkey=zkey,
                       scale=scale,
                       arg=arg,
                       minthresh=minthresh,
                       maxthresh=maxthresh)
Пример #8
0
def paint(pos, sml, data, shape, mask=None, np=0):
    """ paint on 
          paint (pos, sml, data, image)

          data is a list for quantities to paint per channel
          pos[0] : 0 .. height
          pos[1] : 0 .. width
          so remember to transpose using imshow.

        returns (nchan, shape[0], shape[1])
    """
    with sharedmem.MapReduce(np=np) as pool:
        if np > 0: nbuf = np
        else: nbuf = 1
        buf = sharedmem.empty((nbuf, len(data)) + shape, dtype='f4')
        buf[:] = 0
        chunksize = 1024 * 8

        def work(i):
            sl = slice(i, i + chunksize)
            datas = [d[sl] for d in data]
            if mask is not None: masks = mask[sl]
            else: masks = None
            _painter.paint(pos[sl], sml[sl], numpy.array(datas),
                           buf[pool.local.rank], masks)

        pool.map(work, range(0, len(pos), chunksize))
    return numpy.sum(buf, axis=0)
Пример #9
0
def test_warnings():
    import warnings
    with sharedmem.MapReduce(np=8) as pool:

        def work(i):
            warnings.warn("This shall not be printed")

        pool.map(work, range(8))
Пример #10
0
def sharedmem_pool(total_cores, numexpr=True):
    # see https://stackoverflow.com/questions/15639779
    global AFFINITY_FLAG
    if not AFFINITY_FLAG:
        AFFINITY_FLAG = True
        os.system("taskset -p 0xfff %d" % os.getpid())
    if total_cores is None:
        total_cores = sm.cpu_count()
    return sm.MapReduce(np=good_process_number(total_cores, numexpr))
Пример #11
0
def get_fmap(img):
    num_proc = shmem.cpu_count()
    log_kernel = get_log_kernel(11, 2)
    se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17))

    def mp_imgproc(x):
        bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 )
        bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50)
        part_img = cv2.filter2D(img[bound_in[0]:bound_in[1], :, ii].astype('single'), -1, log_kernel)
        part_img = cv2.dilate(part_img, se)
        img_filtered[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :]

    def mp_gaussblur(x):
        bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 )
        bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50)
        part_img = cv2.GaussianBlur(fmap[bound_in[0]:bound_in[1], :], (31, 31), 6)
        fmap[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :]

    log_response = shmem.empty(img.shape[0:2], dtype='single')
    fmap = shmem.empty(img.shape[0:2], dtype='single')
    edges = get_edges(img, num_proc)

    def mp_assignment_1(x):
        log_response[edges[x]:edges[x+1],:] = img_filtered[edges[x]:edges[x+1],:]

    def mp_assignment_2(x):
        fmap[index[edges[x]:edges[x+1],:]] = ii

    for ii in range(img.shape[2]):
        img_filtered = shmem.empty((img.shape[0], img.shape[1]), dtype='single')
        with shmem.MapReduce(np=num_proc) as pool:
            pool.map(mp_imgproc, range(num_proc))

        index = ne.evaluate("img_filtered > log_response")

        with shmem.MapReduce(np=num_proc) as pool:
            pool.map(mp_assignment_1, range(num_proc))
        # log_response[index] = img_filtered[index]
        with shmem.MapReduce(np=num_proc) as pool:
            pool.map(mp_assignment_2, range(num_proc))

    with shmem.MapReduce(np=num_proc) as pool:
        pool.map(mp_gaussblur, range(num_proc))
    return fmap
Пример #12
0
def test_local():
    t = sharedmem.empty(800)
    with sharedmem.MapReduce(np=4) as pool:

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            with pool.ordered:
                t[i] = pool.local.rank

        pool.map(work, range(800))
    assert_equal(numpy.unique(t), range(4))
Пример #13
0
def fstack_mp_new(img, fmap):
    img_stacked = shmem.empty(img.shape[0:2], dtype='uint16')
    indexl = shmem.empty(img.shape[0:2], dtype='bool')

    edges = get_edges(img, 16)
    # This implementation is faster than breaking each image plane up for parallel processing
    def do_work(x):

        if x!=img.shape[2]-1:

            def mt_assignment(input, y):
                return input[index[edges[y]:edges[y+1],:]]
            index = ne.evaluate("fmap==x")
            img_stacked[index] = img[:, :, x][index]
            index = ne.evaluate("(fmap > x) & (fmap < x+1)")
            with ThreadPoolExecutor(max_workers=16) as pool:
                A = np.concatenate([(pool.submit(mt_assignment, fmap, y)).result() for y in range(16)], axis=0)
                B = np.concatenate([(pool.submit(mt_assignment, img[:, :, x+1], y)).result() for y in range(16)], axis=0)
                C = np.concatenate([(pool.submit(mt_assignment, img[:, :, x], y)).result() for y in range(16)], axis=0)
            print('A Shape is : ', A.shape)
            print('A content is: ', A)

            img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C")
        else:
            last_ind = img.shape[2]-1
            indexl = ne.evaluate("fmap == last_ind")

    with shmem.MapReduce(np=img.shape[2]) as pool:
        pool.map(do_work, range(img.shape[2]))

    num_proc = shmem.cpu_count()
    edges = get_edges(img, num_proc)


    def mp_assignment(x):
        img_stacked[edges[x]:edges[x+1],:][indexl[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\
                                                                        [indexl[edges[x]:edges[x+1], :]]
    with shmem.MapReduce(np=num_proc) as pool:
        pool.map(mp_assignment, range(num_proc))

    return img_stacked
Пример #14
0
def test_scalar():
    s = sharedmem.empty((), dtype='f8')
    s[...] = 1.0
    assert_equal(s, 1.0)

    with sharedmem.MapReduce() as pool:

        def work(i):
            with pool.ordered:
                s[...] = i

        pool.map(work, range(10))

    assert_equal(s, 9)
Пример #15
0
def test_picklable_raise():
    with sharedmem.MapReduce() as pool:

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            if i == 10:
                raise PicklableException("Raise an exception")

        try:
            pool.map(work, range(100))
        except sharedmem.SlaveException as e:
            assert isinstance(e.reason, PicklableException)
            return
    raise AssertionError("Shall not reach here")
Пример #16
0
def process(snapid):
    snapdir = readsubhalo.SnapDir(snapid, '../')

    sfr = snapdir.load(0, 'sfr')
    chunksize = 64 * 1024

    def work(i):
        return sfr[i:i + chunksize].sum(dtype='f8')

    with sharedmem.MapReduce() as pool:
        sfrsum = numpy.sum(pool.map(work, range(0, len(sfr), chunksize)))

    bhmdot = snapdir.load(5, 'bhmdot').copy()
    # fix the ugly things
    bhmdot[bhmdot > 1e3] = 0
    print snapid, snapdir.redshift, sfrsum, bhmdot.sum(dtype='f8'), len(bhmdot)
Пример #17
0
    def reconstruct_cls_parallel(self):
        """
        Calculates the power spectra for different surveys from Gaussian
        realisations of input power spectra. Depending on the choices, this
        creates mocks of multi-probe surveys taking all the cross-correlations
        into account.
        :return cls: 4D array of cls for all the realisations and all the probes;
        0. and 1. axis denote the power spectrum, 2. axis denotes the realisation number
        and the 3. axis gives the cls belonging to this configuration
        :return tempells: array of ell values which is equal for all the probes
        """

        realisations = np.arange(self.params['nrealiz'])
        ncpus = multiprocessing.cpu_count()

        # ncpus = 1
        # Limit the number of processes, to avoid running out of memory
        ncpus = min(ncpus, 32)
        logger.info('Number of available CPUs {}.'.format(ncpus))

        # Manu: use sharedmem for multiprocessing here,
        # by forking rather than pickling
        tStart = time.time()
        with sharedmem.MapReduce(np=ncpus) as pool:
            reslist = pool.map(self, realisations)
#        reslist = map(self, realisations)
        tStop = time.time()

        logger.info('Done generating all maps and spectra.')
        logger.info('Took ' + str((tStop - tStart) / 60.) + ' min')

        # Concatenate the cl lists into 4D arrays. The arrays are expanded and concatenated along the
        # 2nd axis
        cls = np.concatenate([res[0][..., np.newaxis, :] for res in reslist],
                             axis=2)
        noisecls = np.concatenate(
            [res[1][..., np.newaxis, :] for res in reslist], axis=2)
        tempells = reslist[0][2]

        # Remove the noise bias from the auto power spectra
        if self.params['noise']:
            logger.info('Removing noise bias.')
            cls = self.remove_noise(cls, noisecls)

        # Replaced wsps with self.wsps
        #return cls, noisecls, tempells, wsps
        return cls, noisecls, tempells, self.wsps
Пример #18
0
def test_killed():
    import os
    import signal

    with sharedmem.MapReduce() as pool:

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            if i == 10:
                os.kill(os.getpid(), signal.SIGKILL)

        try:
            pool.map(work, range(100))
        except sharedmem.SlaveException:
            return

    raise AssertionError("Shall not reach here")
Пример #19
0
def test_unpicklable_raise():
    with sharedmem.MapReduce() as pool:

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            if i == 10:
                raise UnpicklableException("Raise an exception")

        try:
            with warnings.catch_warnings(record=True) as w:
                pool.map(work, range(100))
            # except an warning here
            assert len(w) == 1
        except Exception as e:
            assert not isinstance(e.reason, UnpicklableException)
            return
    raise AssertionError("Shall not reach here")
Пример #20
0
def test_wordcount():
    """ 
        An example word counting program. The parallelism is per line.

        In reality, the parallelism shall be at least on a file level to
        benefit from sharedmem / multiprocessing.
        
    """
    word_count = {
        'sharedmem': 0,
        'pool': 0,
    }

    with sharedmem.MapReduce() as pool:

        def work(line):
            # create a fresh local counter dictionary
            my_word_count = dict([(word, 0) for word in word_count])

            for word in line.replace('.', ' ').split():
                if word in word_count:
                    my_word_count[word] += 1

            return my_word_count

        def reduce(her_word_count):
            for word in word_count:
                word_count[word] += her_word_count[word]

        pool.map(work, open(__file__, 'r').readlines(), reduce=reduce)

        parallel_result = dict(word_count)

        # establish the ground truth from the sequential counter
        sharedmem.set_debug(True)

        for word in word_count:
            word_count[word] = 0

        pool.map(work, open(__file__, 'r').readlines(), reduce=reduce)
        sharedmem.set_debug(False)

    for word in word_count:
        assert word_count[word] == parallel_result[word]
Пример #21
0
def test_sum():
    """ 
        Integrate [0, ... 1.0) with rectangle rule. 
        Compare results from 
        1. direct sum of 'xdx' (filled by subprocesses)
        2. 'shmsum', cummulated by partial sums on each process
        3. sum of partial sums from each process.

    """
    xdx = sharedmem.empty(1024 * 1024 * 128, dtype='f8')
    shmsum = sharedmem.empty((), dtype='f8')

    shmsum[...] = 0.0

    with sharedmem.MapReduce() as pool:

        def work(i):
            s = slice(i, i + chunksize)
            start, end, step = s.indices(len(xdx))

            dx = 1.0 / len(xdx)

            myxdx = numpy.arange(start, end, step) \
                    * 1.0 / len(xdx) * dx

            xdx[s] = myxdx

            a = xdx[s].sum(dtype='f8')

            with pool.critical:
                shmsum[...] += a

            return i, a

        def reduce(i, a):
            # print('chunk', i, 'done', 'local sum', a)
            return a

        chunksize = 1024 * 1024

        r = pool.map(work, range(0, len(xdx), chunksize), reduce=reduce)

    assert_almost_equal(numpy.sum(r, dtype='f8'), shmsum)
    assert_almost_equal(numpy.sum(xdx, dtype='f8'), shmsum)
Пример #22
0
    def smooth(self, ftype, ngb=32):
        gas = self.F[ftype]
        tree = self.T[ftype]
        from gaepsi.compiledbase.ngbquery import NGBQueryN
        q = NGBQueryN(tree, ngb)
        gas['sml'] = sharedmem.empty(len(gas), dtype='f8')
        with sharedmem.MapReduce(np=self.np) as pool:
            chunksize = 1024 * 64

            def work(i):
                sl = slice(i, i + chunksize)
                x, y, z = gas['pos'][sl].T
                arr = q(x, y, z)[0]['weights']
                arr = arr.reshape(-1, ngb)
                dist = arr[:, 0]**0.5
                gas['sml'][sl] = dist
                print i, len(gas)

            pool.map(work, range(0, len(gas), chunksize))
Пример #23
0
    def filterAndCalculateActivitySlice(self):
        if not self.sliceImported:
            raise WillowProcessingError('Import slice before filtering it.')
        with sharedmem.MapReduce() as pool:
            nchan = self.slice_nchans // self.ncpu

            def work(i):
                chans = slice(i * nchan, (i + 1) * nchan)
                self.slice_filtered[chans] = dsp.lfilter(FILTER_B,
                                                         FILTER_A,
                                                         self.slice_uv[chans],
                                                         axis=1)
                threshold = np.broadcast_to(
                    np.median(np.abs(self.slice_filtered[chans]), axis=1) *
                    THRESH_SCALE, (self.slice_nsamples, nchan)).transpose()
                self.slice_activity[chans] = np.sum(
                    (self.slice_filtered[chans] < threshold),
                    axis=1) * ACTIVITY_SCALE / float(self.slice_nsamples)

            pool.map(work, range(0, self.ncpu))
        self.sliceBeenFiltered = True
Пример #24
0
def todevice(pos2d, extent, np=None):
    """
        convert to device coordinate
    """
    l, r, b, t = extent

    chunksize = 1024 * 32
    out = sharedmem.empty_like(pos2d)

    def work(i):
        tmp = (pos2d[i:i + chunksize] + 1.0)
        tmp *= 0.5
        tmp[..., 0] *= (r - l)
        tmp[..., 0] += l
        tmp[..., 1] *= (t - b)
        tmp[..., 1] += b
        out[i:i + chunksize] = tmp

    with sharedmem.MapReduce(np=np) as pool:
        pool.map(work, range(0, len(pos2d), chunksize))
    return out
Пример #25
0
def test_ordered():
    t = sharedmem.empty(800)
    with sharedmem.MapReduce(np=32) as pool:

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            with pool.ordered:
                t[i] = time.time()

        pool.map(work, range(800))

        # without ordered, the time is ordered
        assert (t[1:] > t[:-1]).all()

        def work(i):
            time.sleep(0.1 * numpy.random.uniform())
            t[i] = time.time()

        pool.map(work, range(800))
        # without ordered, the ordering is messy
        assert not (t[1:] > t[:-1]).all()
Пример #26
0
def shm_chunk_gaukernop_at(x, xp, y, data):
    nthread = int(os.environ["TENSIGA_NUM_THREADS"])

    chunk_size = x.shape[0]//nthread
    last_chunk_size = chunk_size + x.shape[0] % nthread

    indices_start = [ chunk_size*k for k in range(nthread-1) ]
    indices_start.append(chunk_size*(nthread-1))
    indices_start = shm.copy(np.array(indices_start))

    indices_stop = [ chunk_size*(k+1) for k in range(nthread-1) ]
    indices_stop.append(chunk_size*(nthread-1) + last_chunk_size)
    indices_stop = shm.copy(np.array(indices_stop))
    
    y = np.ascontiguousarray(y)
    x = shm.copy(x)
    xp = shm.copy(xp)
    y = shm.copy(y)
    data = shm.copy(data)

    result = shm.empty((y.shape[0],1), np.float)

    with shm.MapReduce(np=nthread) as pool:
        @jit(fastmath=True)
        def row(k):
            xslice = x[slice(indices_start[k], indices_stop[k]),:]
            res = np.empty((xslice.shape[0],1)) 
            for l in range(xslice.shape[0]):
                d = xslice[l,:] - xp
                norm = np.sqrt(np.sum(d**2, axis=1))
                res[l] = ((data[0]**2) * np.exp(-(norm/(data[1]*data[2]))**2)) @ y

            return k, res 

        def reduce(k, coeff):
            result[slice(indices_start[k], indices_stop[k])] = coeff

        r = pool.map(row, np.arange(nthread), reduce=reduce)

    return result
Пример #27
0
def haloextraMain(args):
    snap = args.snap
    g = snap.readsubhalo()
    try:
        os.makedirs(snap.subhalodir + '/subhalo')
    except OSError:
        pass
    for ptype, field in [(None, 'type'), (0, 'sfr'), (5, 'bhmdot'),
                         (5, 'bhmass')]:
        dtype = extradtype[field]
        try:
            #     wrong_file_or_die(snap.filename('subhalo', field),
            #         dtype.itemsize * len(g))
            pass
        except:
            continue
        target = numpy.memmap(snap.filename('subhalo', field),
                              shape=len(g),
                              dtype=dtype,
                              mode='w+')
        if ptype is not None:
            input = snap.load(ptype, field, g)
            target[:] = 0
            ind = (g['lenbytype'][:, ptype] > 0).nonzero()[0]
            print len(ind)
            with sharedmem.MapReduce() as pool:
                chunksize = 1024

                def work(s):
                    for i in ind[s:s + chunksize]:
                        target[i] = input[i].sum()

                if len(ind) > 0:
                    pool.map(work, range(0, len(ind), chunksize))
        else:
            print numpy.isnan(g['mass']).sum()
            target[:] = numpy.isnan(g['mass'])
            lg = snap.readgroup()
            assert (target[:] == 1).sum() == len(lg)
        target.flush()
Пример #28
0
def shm_gaukernop_at(x, xp, y, data):
    y = np.ascontiguousarray(y)
    x_shm = shm.copy(x)
    xp_shm = shm.copy(xp)
    y_shm = shm.copy(y)
    data_shm = shm.copy(data)

    nthread = int(os.environ["TENSIGA_NUM_THREADS"])

    result = shm.empty(y.shape, np.float)
    with shm.MapReduce(np=nthread) as pool:
        def row(k):
            d = x_shm[k,:] - xp_shm
            norm = np.sqrt(np.sum(d**2, axis=1))
            return k, ((data_shm[0]**2) * np.exp(-(norm/(data_shm[1]*data_shm[2]))**2)) @ y_shm

        def reduce(k, coeff):
            result[k] = coeff

        r = pool.map(row, np.arange(x_shm.shape[0]), reduce=reduce)

    return result
Пример #29
0
def makegigapan(snaps, camera, CCD):
    x = []
    y = []
    z = []
    T = []
    sml = []
    mass = []
    print 'reading'
    Len = numpy.array([snap.C['N'][0] for snap in snaps], dtype='intp')
    End = Len.cumsum()
    Start = End.copy()
    Start[1:] = End[:-1]
    Start[0] = 0
    N = Len.sum()

    x = sharedmem.empty(N, dtype='f4')
    y = sharedmem.empty(N, dtype='f4')
    z = sharedmem.empty(N, dtype='f4')
    T = sharedmem.empty(N, dtype='f4')
    sml = sharedmem.empty(N, dtype='f4')
    mass = sharedmem.empty(N, dtype='f4')

    with sharedmem.MapReduce() as pool:
        def work(i):
            sl = slice(Start[i], End[i])
            snap = snaps[i]
            x[sl] = snap[0, 'pos'][:, 0]
            y[sl] = snap[0, 'pos'][:, 1]
            z[sl] = snap[0, 'pos'][:, 2]
            cub.apply(x[sl], y[sl], z[sl])
            ie = snap[0, 'ie']
            ye = snap[0, 'ye']
            T[sl] = cosmology.ie2T(ie=ie, ye=ye, Xh=0.76)
            sml[sl] = snap[0, 'sml']
            mass[sl] = snap[0, 'mass']
        pool.map(work, range(len(snaps)))

    print 'painting'
    paint((x, y, z), T, mass, sml, camera, CCD, normalize=False, direct_write=True)
Пример #30
0
def get_features_from_states(env, states, feature_fn):
    import sharedmem
    n_states = len(states)
    feat_len = len(feature_fn(env, states[0]))
    state_ids = np.arange(n_states, dtype='i')

    features = sharedmem.full((n_states, feat_len), 0.)

    # Start multi processing over support states
    with sharedmem.MapReduce() as pool:
        if n_states % sharedmem.cpu_count() == 0:
            chunksize = n_states / sharedmem.cpu_count()
        else:
            chunksize = n_states / sharedmem.cpu_count() + 1

        def work(i):
            s_ids = state_ids[slice(i, i + chunksize)]
            for j, s_id in enumerate(s_ids):
                s = states[s_id]  # state id in states
                features[s_id] = feature_fn(env, s)

        pool.map(work, range(0, n_states, chunksize))  #, reduce=reduce)
    return np.array(features)