def fstack_mp(img, fmap): img_stacked = shmem.empty(img.shape[0:2], dtype='uint16') # This implementation is faster than breaking each image plane up for parallel processing def do_work(x): index = ne.evaluate("fmap==x") img_stacked[index] = img[:, :, x][index] index = ne.evaluate("(fmap > x) & (fmap < x+1)") A = fmap[index] B = img[:, :, x+1][index] C = img[:, :, x][index] img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C") with shmem.MapReduce(np=img.shape[2]-1) as pool: pool.map(do_work, range(img.shape[2]-1)) last_ind = img.shape[2]-1 index = ne.evaluate("fmap == last_ind") num_proc = shmem.cpu_count() edges = get_edges(img, num_proc) def mp_assignment(x): img_stacked[edges[x]:edges[x+1],:][index[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\ [index[edges[x]:edges[x+1], :]] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment, range(num_proc)) return img_stacked
def __call__(self, left, right): inter_shape = self.intermediate_shape_func(left.shape, right.shape) inter_dtype = self.intermediate_dtype_func(left.dtype, right.dtype) out_shape = self.reduce_shape_func(inter_shape) out_dtype = self.reduce_dtype_func(inter_dtype) # vstack results from processes, or share result and use pool.critical? # a vstack seems most foolproof, though it uses more mem. with sharedmem.MapReduce(np=self.n_jobs) as pool: m_left = sharedmem.full_like(left, left) m_right = sharedmem.full_like(right, right) result = sharedmem.empty(out_shape, out_dtype) chunksize = self.get_chunk_size(inter_shape, inter_dtype) #print("chunksize {}".format(chunksize)) n = m_left.shape[0] def op(i): ix = slice(i, min(i + chunksize, n)) res = self.reduce_op(self.binary_op(m_left[ix], m_right)) return ix, res def insert(ix_slice, res): result[ix_slice] = res pool.map(op, range(0, m_left.shape[0], chunksize), reduce=insert) return np.array(result)
def test_critical(): t = sharedmem.empty((), dtype='i8') t[...] = 0 # FIXME: if the system has one core then this will never fail, # even if the critical section is not with sharedmem.MapReduce(np=8) as pool: def work(i): with pool.critical: t[...] = 1 if i != 30: time.sleep(0.01) assert_equal(t, 1) t[...] = 0 pool.map(work, range(16)) def work(i): t[...] = 1 if i != 30: time.sleep(0.01) assert_equal(t, 1) t[...] = 0 try: pool.map(work, range(16)) except sharedmem.SlaveException as e: assert isinstance(e.reason, AssertionError) return raise AssertionError("Shall not reach here.")
def make_predictions(self, angles, eccentricities, sizes, n_jobs=4): self.angles, self.eccentricities, self.sizes, = np.meshgrid( angles, eccentricities, sizes) #self.predictions = np.zeros(list(self.angles.shape) + [self.stimulus.run_length]) #self.predictions = self.predictions.reshape(-1, self.predictions.shape[-1]).T self.xs = np.cos(self.angles) * self.eccentricities self.ys = np.sin(self.angles) * self.eccentricities print(self.xs.shape) print(len(self.xs.ravel())) with sharedmem.MapReduce(np=n_jobs) as pool: def make_predictions(args): x, y, s = args return self.model_func.generate_prediction(x, y, s, 1, 0) pb = tqdm(total=self.angles.size) def reduce(r): pb.update() return r args = list( zip(self.xs.ravel(), self.ys.ravel(), self.sizes.ravel())) self.predictions = np.array( pool.map(make_predictions, args, reduce=reduce)).T
def generate_roadmap_parallel(samples, env, max_dist, leafsize, knn): """Parallelized roadmap generator """ n_sample = len(samples) leafsize = knn if len(samples) < leafsize: leafsize = len(samples) - 1 import sharedmem sample_ids = np.arange(n_sample, dtype='i') roadmap = sharedmem.full((n_sample, knn), 0) # Start multi processing over samples with sharedmem.MapReduce() as pool: if n_sample % sharedmem.cpu_count() == 0: chunksize = n_sample / sharedmem.cpu_count() else: chunksize = n_sample / sharedmem.cpu_count() + 1 def work(i): skdtree = KDTree(samples, leafsize=leafsize) sub_sample_ids = sample_ids[slice(i, i + chunksize)] for j, sub_sample_id in enumerate(sub_sample_ids): x = samples[sub_sample_id] try: inds, dists = skdtree.search(x, k=leafsize) except: print "skdtree search failed" sys.exit() edge_id = [] append = edge_id.append for ii, (ind, dist) in enumerate(zip(inds, dists)): if dist > max_dist: break # undirected if len(edge_id) >= knn: break # directed? append(ind) # to complement fewer number of edges for vectorized valueiteration if len(edge_id) < knn: for ii in range(0, len(inds)): #for ind in edge_id: # edge_id.append(ind) # if len(edge_id) >= knn: break append(inds[0]) if len(edge_id) >= knn: break assert len( edge_id ) <= leafsize, "fewer leaves than edges {} (dists={})".format( len(edge_id), dists[:len(edge_id)]) for k in range(len(edge_id)): roadmap[sub_sample_id][k] = edge_id[k] pool.map(work, range(0, n_sample, chunksize)) #, reduce=reduce) # convert sharedmem array to list roadmap = np.array(roadmap).astype(int) skdtree = None #KDTree(samples, leafsize=leafsize) return roadmap.tolist(), skdtree
def argsort(ar): min = minimum.reduce(ar) max = maximum.reduce(ar) nchunk = sharedmem.cpu_count() * 2 #bins = numpy.linspace(min, max, nchunk, endpoint=True) step = 1.0 * (max - min) / nchunk bins = numpy.array( 1.0 * numpy.arange(nchunk + 1) * (max - min) / nchunk + min, min.dtype) dig = digitize(ar, bins) binlength = bincount(dig, minlength=len(bins) + 1) binoffset = numpy.cumsum(binlength) out = sharedmem.empty(len(ar), dtype='intp') with sharedmem.MapReduce() as pool: def work(i): # we can do this a lot faster # but already having pretty good speed. ind = numpy.nonzero(dig == i + 1)[0] myar = ar[ind] out[binoffset[i]:binoffset[i + 1]] = ind[myar.argsort()] pool.map(work, range(nchunk)) return out
def ztree(self, zkey=None, scale=None, minthresh=10, maxthresh=20, np=None): if scale is None: scale = fc.scale(self['locations'].min(axis=0), self['locations'].ptp(axis=0)) zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): X, Y, Z = self['locations'][i:i + chunksize].T fc.encode(X, Y, Z, scale=scale, out=zkey[i:i + chunksize]) pool.map(work, range(0, len(zkey), chunksize)) # use sharemem.argsort, because it is faster arg = sharedmem.argsort(zkey, np=np) return zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=minthresh, maxthresh=maxthresh)
def paint(pos, sml, data, shape, mask=None, np=0): """ paint on paint (pos, sml, data, image) data is a list for quantities to paint per channel pos[0] : 0 .. height pos[1] : 0 .. width so remember to transpose using imshow. returns (nchan, shape[0], shape[1]) """ with sharedmem.MapReduce(np=np) as pool: if np > 0: nbuf = np else: nbuf = 1 buf = sharedmem.empty((nbuf, len(data)) + shape, dtype='f4') buf[:] = 0 chunksize = 1024 * 8 def work(i): sl = slice(i, i + chunksize) datas = [d[sl] for d in data] if mask is not None: masks = mask[sl] else: masks = None _painter.paint(pos[sl], sml[sl], numpy.array(datas), buf[pool.local.rank], masks) pool.map(work, range(0, len(pos), chunksize)) return numpy.sum(buf, axis=0)
def test_warnings(): import warnings with sharedmem.MapReduce(np=8) as pool: def work(i): warnings.warn("This shall not be printed") pool.map(work, range(8))
def sharedmem_pool(total_cores, numexpr=True): # see https://stackoverflow.com/questions/15639779 global AFFINITY_FLAG if not AFFINITY_FLAG: AFFINITY_FLAG = True os.system("taskset -p 0xfff %d" % os.getpid()) if total_cores is None: total_cores = sm.cpu_count() return sm.MapReduce(np=good_process_number(total_cores, numexpr))
def get_fmap(img): num_proc = shmem.cpu_count() log_kernel = get_log_kernel(11, 2) se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 17)) def mp_imgproc(x): bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 ) bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50) part_img = cv2.filter2D(img[bound_in[0]:bound_in[1], :, ii].astype('single'), -1, log_kernel) part_img = cv2.dilate(part_img, se) img_filtered[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :] def mp_gaussblur(x): bound_in = (edges[x]-(edges[x] > 0)*50, edges[x+1] + (edges[x+1] < img.shape[0]) *50 ) bound_out = (50 if edges[x] > 0 else 0, None if edges[x+1] == img.shape[0] else -50) part_img = cv2.GaussianBlur(fmap[bound_in[0]:bound_in[1], :], (31, 31), 6) fmap[edges[x]:edges[x+1], :] = part_img[bound_out[0]:bound_out[1], :] log_response = shmem.empty(img.shape[0:2], dtype='single') fmap = shmem.empty(img.shape[0:2], dtype='single') edges = get_edges(img, num_proc) def mp_assignment_1(x): log_response[edges[x]:edges[x+1],:] = img_filtered[edges[x]:edges[x+1],:] def mp_assignment_2(x): fmap[index[edges[x]:edges[x+1],:]] = ii for ii in range(img.shape[2]): img_filtered = shmem.empty((img.shape[0], img.shape[1]), dtype='single') with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_imgproc, range(num_proc)) index = ne.evaluate("img_filtered > log_response") with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment_1, range(num_proc)) # log_response[index] = img_filtered[index] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment_2, range(num_proc)) with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_gaussblur, range(num_proc)) return fmap
def test_local(): t = sharedmem.empty(800) with sharedmem.MapReduce(np=4) as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) with pool.ordered: t[i] = pool.local.rank pool.map(work, range(800)) assert_equal(numpy.unique(t), range(4))
def fstack_mp_new(img, fmap): img_stacked = shmem.empty(img.shape[0:2], dtype='uint16') indexl = shmem.empty(img.shape[0:2], dtype='bool') edges = get_edges(img, 16) # This implementation is faster than breaking each image plane up for parallel processing def do_work(x): if x!=img.shape[2]-1: def mt_assignment(input, y): return input[index[edges[y]:edges[y+1],:]] index = ne.evaluate("fmap==x") img_stacked[index] = img[:, :, x][index] index = ne.evaluate("(fmap > x) & (fmap < x+1)") with ThreadPoolExecutor(max_workers=16) as pool: A = np.concatenate([(pool.submit(mt_assignment, fmap, y)).result() for y in range(16)], axis=0) B = np.concatenate([(pool.submit(mt_assignment, img[:, :, x+1], y)).result() for y in range(16)], axis=0) C = np.concatenate([(pool.submit(mt_assignment, img[:, :, x], y)).result() for y in range(16)], axis=0) print('A Shape is : ', A.shape) print('A content is: ', A) img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C") else: last_ind = img.shape[2]-1 indexl = ne.evaluate("fmap == last_ind") with shmem.MapReduce(np=img.shape[2]) as pool: pool.map(do_work, range(img.shape[2])) num_proc = shmem.cpu_count() edges = get_edges(img, num_proc) def mp_assignment(x): img_stacked[edges[x]:edges[x+1],:][indexl[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\ [indexl[edges[x]:edges[x+1], :]] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment, range(num_proc)) return img_stacked
def test_scalar(): s = sharedmem.empty((), dtype='f8') s[...] = 1.0 assert_equal(s, 1.0) with sharedmem.MapReduce() as pool: def work(i): with pool.ordered: s[...] = i pool.map(work, range(10)) assert_equal(s, 9)
def test_picklable_raise(): with sharedmem.MapReduce() as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) if i == 10: raise PicklableException("Raise an exception") try: pool.map(work, range(100)) except sharedmem.SlaveException as e: assert isinstance(e.reason, PicklableException) return raise AssertionError("Shall not reach here")
def process(snapid): snapdir = readsubhalo.SnapDir(snapid, '../') sfr = snapdir.load(0, 'sfr') chunksize = 64 * 1024 def work(i): return sfr[i:i + chunksize].sum(dtype='f8') with sharedmem.MapReduce() as pool: sfrsum = numpy.sum(pool.map(work, range(0, len(sfr), chunksize))) bhmdot = snapdir.load(5, 'bhmdot').copy() # fix the ugly things bhmdot[bhmdot > 1e3] = 0 print snapid, snapdir.redshift, sfrsum, bhmdot.sum(dtype='f8'), len(bhmdot)
def reconstruct_cls_parallel(self): """ Calculates the power spectra for different surveys from Gaussian realisations of input power spectra. Depending on the choices, this creates mocks of multi-probe surveys taking all the cross-correlations into account. :return cls: 4D array of cls for all the realisations and all the probes; 0. and 1. axis denote the power spectrum, 2. axis denotes the realisation number and the 3. axis gives the cls belonging to this configuration :return tempells: array of ell values which is equal for all the probes """ realisations = np.arange(self.params['nrealiz']) ncpus = multiprocessing.cpu_count() # ncpus = 1 # Limit the number of processes, to avoid running out of memory ncpus = min(ncpus, 32) logger.info('Number of available CPUs {}.'.format(ncpus)) # Manu: use sharedmem for multiprocessing here, # by forking rather than pickling tStart = time.time() with sharedmem.MapReduce(np=ncpus) as pool: reslist = pool.map(self, realisations) # reslist = map(self, realisations) tStop = time.time() logger.info('Done generating all maps and spectra.') logger.info('Took ' + str((tStop - tStart) / 60.) + ' min') # Concatenate the cl lists into 4D arrays. The arrays are expanded and concatenated along the # 2nd axis cls = np.concatenate([res[0][..., np.newaxis, :] for res in reslist], axis=2) noisecls = np.concatenate( [res[1][..., np.newaxis, :] for res in reslist], axis=2) tempells = reslist[0][2] # Remove the noise bias from the auto power spectra if self.params['noise']: logger.info('Removing noise bias.') cls = self.remove_noise(cls, noisecls) # Replaced wsps with self.wsps #return cls, noisecls, tempells, wsps return cls, noisecls, tempells, self.wsps
def test_killed(): import os import signal with sharedmem.MapReduce() as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) if i == 10: os.kill(os.getpid(), signal.SIGKILL) try: pool.map(work, range(100)) except sharedmem.SlaveException: return raise AssertionError("Shall not reach here")
def test_unpicklable_raise(): with sharedmem.MapReduce() as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) if i == 10: raise UnpicklableException("Raise an exception") try: with warnings.catch_warnings(record=True) as w: pool.map(work, range(100)) # except an warning here assert len(w) == 1 except Exception as e: assert not isinstance(e.reason, UnpicklableException) return raise AssertionError("Shall not reach here")
def test_wordcount(): """ An example word counting program. The parallelism is per line. In reality, the parallelism shall be at least on a file level to benefit from sharedmem / multiprocessing. """ word_count = { 'sharedmem': 0, 'pool': 0, } with sharedmem.MapReduce() as pool: def work(line): # create a fresh local counter dictionary my_word_count = dict([(word, 0) for word in word_count]) for word in line.replace('.', ' ').split(): if word in word_count: my_word_count[word] += 1 return my_word_count def reduce(her_word_count): for word in word_count: word_count[word] += her_word_count[word] pool.map(work, open(__file__, 'r').readlines(), reduce=reduce) parallel_result = dict(word_count) # establish the ground truth from the sequential counter sharedmem.set_debug(True) for word in word_count: word_count[word] = 0 pool.map(work, open(__file__, 'r').readlines(), reduce=reduce) sharedmem.set_debug(False) for word in word_count: assert word_count[word] == parallel_result[word]
def test_sum(): """ Integrate [0, ... 1.0) with rectangle rule. Compare results from 1. direct sum of 'xdx' (filled by subprocesses) 2. 'shmsum', cummulated by partial sums on each process 3. sum of partial sums from each process. """ xdx = sharedmem.empty(1024 * 1024 * 128, dtype='f8') shmsum = sharedmem.empty((), dtype='f8') shmsum[...] = 0.0 with sharedmem.MapReduce() as pool: def work(i): s = slice(i, i + chunksize) start, end, step = s.indices(len(xdx)) dx = 1.0 / len(xdx) myxdx = numpy.arange(start, end, step) \ * 1.0 / len(xdx) * dx xdx[s] = myxdx a = xdx[s].sum(dtype='f8') with pool.critical: shmsum[...] += a return i, a def reduce(i, a): # print('chunk', i, 'done', 'local sum', a) return a chunksize = 1024 * 1024 r = pool.map(work, range(0, len(xdx), chunksize), reduce=reduce) assert_almost_equal(numpy.sum(r, dtype='f8'), shmsum) assert_almost_equal(numpy.sum(xdx, dtype='f8'), shmsum)
def smooth(self, ftype, ngb=32): gas = self.F[ftype] tree = self.T[ftype] from gaepsi.compiledbase.ngbquery import NGBQueryN q = NGBQueryN(tree, ngb) gas['sml'] = sharedmem.empty(len(gas), dtype='f8') with sharedmem.MapReduce(np=self.np) as pool: chunksize = 1024 * 64 def work(i): sl = slice(i, i + chunksize) x, y, z = gas['pos'][sl].T arr = q(x, y, z)[0]['weights'] arr = arr.reshape(-1, ngb) dist = arr[:, 0]**0.5 gas['sml'][sl] = dist print i, len(gas) pool.map(work, range(0, len(gas), chunksize))
def filterAndCalculateActivitySlice(self): if not self.sliceImported: raise WillowProcessingError('Import slice before filtering it.') with sharedmem.MapReduce() as pool: nchan = self.slice_nchans // self.ncpu def work(i): chans = slice(i * nchan, (i + 1) * nchan) self.slice_filtered[chans] = dsp.lfilter(FILTER_B, FILTER_A, self.slice_uv[chans], axis=1) threshold = np.broadcast_to( np.median(np.abs(self.slice_filtered[chans]), axis=1) * THRESH_SCALE, (self.slice_nsamples, nchan)).transpose() self.slice_activity[chans] = np.sum( (self.slice_filtered[chans] < threshold), axis=1) * ACTIVITY_SCALE / float(self.slice_nsamples) pool.map(work, range(0, self.ncpu)) self.sliceBeenFiltered = True
def todevice(pos2d, extent, np=None): """ convert to device coordinate """ l, r, b, t = extent chunksize = 1024 * 32 out = sharedmem.empty_like(pos2d) def work(i): tmp = (pos2d[i:i + chunksize] + 1.0) tmp *= 0.5 tmp[..., 0] *= (r - l) tmp[..., 0] += l tmp[..., 1] *= (t - b) tmp[..., 1] += b out[i:i + chunksize] = tmp with sharedmem.MapReduce(np=np) as pool: pool.map(work, range(0, len(pos2d), chunksize)) return out
def test_ordered(): t = sharedmem.empty(800) with sharedmem.MapReduce(np=32) as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) with pool.ordered: t[i] = time.time() pool.map(work, range(800)) # without ordered, the time is ordered assert (t[1:] > t[:-1]).all() def work(i): time.sleep(0.1 * numpy.random.uniform()) t[i] = time.time() pool.map(work, range(800)) # without ordered, the ordering is messy assert not (t[1:] > t[:-1]).all()
def shm_chunk_gaukernop_at(x, xp, y, data): nthread = int(os.environ["TENSIGA_NUM_THREADS"]) chunk_size = x.shape[0]//nthread last_chunk_size = chunk_size + x.shape[0] % nthread indices_start = [ chunk_size*k for k in range(nthread-1) ] indices_start.append(chunk_size*(nthread-1)) indices_start = shm.copy(np.array(indices_start)) indices_stop = [ chunk_size*(k+1) for k in range(nthread-1) ] indices_stop.append(chunk_size*(nthread-1) + last_chunk_size) indices_stop = shm.copy(np.array(indices_stop)) y = np.ascontiguousarray(y) x = shm.copy(x) xp = shm.copy(xp) y = shm.copy(y) data = shm.copy(data) result = shm.empty((y.shape[0],1), np.float) with shm.MapReduce(np=nthread) as pool: @jit(fastmath=True) def row(k): xslice = x[slice(indices_start[k], indices_stop[k]),:] res = np.empty((xslice.shape[0],1)) for l in range(xslice.shape[0]): d = xslice[l,:] - xp norm = np.sqrt(np.sum(d**2, axis=1)) res[l] = ((data[0]**2) * np.exp(-(norm/(data[1]*data[2]))**2)) @ y return k, res def reduce(k, coeff): result[slice(indices_start[k], indices_stop[k])] = coeff r = pool.map(row, np.arange(nthread), reduce=reduce) return result
def haloextraMain(args): snap = args.snap g = snap.readsubhalo() try: os.makedirs(snap.subhalodir + '/subhalo') except OSError: pass for ptype, field in [(None, 'type'), (0, 'sfr'), (5, 'bhmdot'), (5, 'bhmass')]: dtype = extradtype[field] try: # wrong_file_or_die(snap.filename('subhalo', field), # dtype.itemsize * len(g)) pass except: continue target = numpy.memmap(snap.filename('subhalo', field), shape=len(g), dtype=dtype, mode='w+') if ptype is not None: input = snap.load(ptype, field, g) target[:] = 0 ind = (g['lenbytype'][:, ptype] > 0).nonzero()[0] print len(ind) with sharedmem.MapReduce() as pool: chunksize = 1024 def work(s): for i in ind[s:s + chunksize]: target[i] = input[i].sum() if len(ind) > 0: pool.map(work, range(0, len(ind), chunksize)) else: print numpy.isnan(g['mass']).sum() target[:] = numpy.isnan(g['mass']) lg = snap.readgroup() assert (target[:] == 1).sum() == len(lg) target.flush()
def shm_gaukernop_at(x, xp, y, data): y = np.ascontiguousarray(y) x_shm = shm.copy(x) xp_shm = shm.copy(xp) y_shm = shm.copy(y) data_shm = shm.copy(data) nthread = int(os.environ["TENSIGA_NUM_THREADS"]) result = shm.empty(y.shape, np.float) with shm.MapReduce(np=nthread) as pool: def row(k): d = x_shm[k,:] - xp_shm norm = np.sqrt(np.sum(d**2, axis=1)) return k, ((data_shm[0]**2) * np.exp(-(norm/(data_shm[1]*data_shm[2]))**2)) @ y_shm def reduce(k, coeff): result[k] = coeff r = pool.map(row, np.arange(x_shm.shape[0]), reduce=reduce) return result
def makegigapan(snaps, camera, CCD): x = [] y = [] z = [] T = [] sml = [] mass = [] print 'reading' Len = numpy.array([snap.C['N'][0] for snap in snaps], dtype='intp') End = Len.cumsum() Start = End.copy() Start[1:] = End[:-1] Start[0] = 0 N = Len.sum() x = sharedmem.empty(N, dtype='f4') y = sharedmem.empty(N, dtype='f4') z = sharedmem.empty(N, dtype='f4') T = sharedmem.empty(N, dtype='f4') sml = sharedmem.empty(N, dtype='f4') mass = sharedmem.empty(N, dtype='f4') with sharedmem.MapReduce() as pool: def work(i): sl = slice(Start[i], End[i]) snap = snaps[i] x[sl] = snap[0, 'pos'][:, 0] y[sl] = snap[0, 'pos'][:, 1] z[sl] = snap[0, 'pos'][:, 2] cub.apply(x[sl], y[sl], z[sl]) ie = snap[0, 'ie'] ye = snap[0, 'ye'] T[sl] = cosmology.ie2T(ie=ie, ye=ye, Xh=0.76) sml[sl] = snap[0, 'sml'] mass[sl] = snap[0, 'mass'] pool.map(work, range(len(snaps))) print 'painting' paint((x, y, z), T, mass, sml, camera, CCD, normalize=False, direct_write=True)
def get_features_from_states(env, states, feature_fn): import sharedmem n_states = len(states) feat_len = len(feature_fn(env, states[0])) state_ids = np.arange(n_states, dtype='i') features = sharedmem.full((n_states, feat_len), 0.) # Start multi processing over support states with sharedmem.MapReduce() as pool: if n_states % sharedmem.cpu_count() == 0: chunksize = n_states / sharedmem.cpu_count() else: chunksize = n_states / sharedmem.cpu_count() + 1 def work(i): s_ids = state_ids[slice(i, i + chunksize)] for j, s_id in enumerate(s_ids): s = states[s_id] # state id in states features[s_id] = feature_fn(env, s) pool.map(work, range(0, n_states, chunksize)) #, reduce=reduce) return np.array(features)