Пример #1
0
  def _featurize_complexes(self, df, featurizer, parallel=True,
                           worker_pool=None):
    """Generates circular fingerprints for dataset."""
    protein_pdbs = list(df["protein_pdb"])
    ligand_pdbs = list(df["ligand_pdb"])
    complexes = zip(ligand_pdbs, protein_pdbs)

    def featurize_wrapper(ligand_protein_pdb_tuple):
      ligand_pdb, protein_pdb = ligand_protein_pdb_tuple
      print("Featurizing %s" % ligand_pdb[0:2])
      molecule_features = featurizer.featurize_complexes([ligand_pdb], [protein_pdb])
      return molecule_features

    if worker_pool is None:
      features = []
      for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs):
        features.append(featurize_wrapper(ligand_protein_pdb_tuple))
    else:
      if worker_pool is None:
        worker_pool = ProcessingPool(mp.cpu_count())
        features = worker_pool.map(featurize_wrapper, 
                                   zip(ligand_pdbs, protein_pdbs))
      else:
        features = worker_pool.map_sync(featurize_wrapper, 
                                        zip(ligand_pdbs, protein_pdbs))
      #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs))
    df[featurizer.__class__.__name__] = list(features)
Пример #2
0
    def map(self, f, seq):
        """
        Parallel implementation of map.

        Parameters
        ----------
        f : callable
            A function to map to all the values in 'seq'

        seq : iterable
            An iterable of values to process with 'f'

        Returns
        -------
        results : list, shape=[len(seq)]
            The evaluated values
        """
        if self.n_jobs < 1:
            n_jobs = multiprocessing.cpu_count()
        elif self.n_jobs == 1:
            return list(map(f, seq))
        else:
            n_jobs = self.n_jobs

        pool = Pool(n_jobs)
        results = list(pool.map(f, seq))
        # Closing/joining is not really allowed because pathos sees pools as
        # lasting for the duration of the program.
        return results
    def image_division(self):
        image_rows, image_cols = self.__image.shape[:2]
        print self.__image.shape[:2]
        grid_indices = [
            np.array([x, y])
            for x in xrange(0, image_cols - self.__GRID_SIZE, self.__GRID_SIZE)
            for y in xrange(0, image_rows - self.__GRID_SIZE, self.__GRID_SIZE)
        ]
        pool = Pool()
        output = pool.map(self.grid_division, grid_indices)
        threshod_sucess_sample = 6
        ransacGrouper = RansacLine(1, threshod_sucess_sample, 25, 2)
        for index, edgels in enumerate(output):
            if len(edgels) > threshod_sucess_sample:
                ransacGrouper.edgels = edgels
                ransac_groups = ransacGrouper.applay_parallel_ransac()
                self.line_segment(ransac_groups)

        # print len(self.__lines)
        # for line in self.__lines:
        #     print (line.slope, line.intercept)
        #     coefficients = np.array([line.slope, line.intercept])
        #     # print "cof: ", coefficients
        #     x = np.array([20, 50], dtype=np.int32)
        #     polynomial = np.poly1d(coefficients)
        #     # print "Poly: ", polynomial
        #     y = polynomial(x)
        #     y = [int(e) for e in y]
        #     print "x: ", x, "y: ", y
        #     cv2.line(self.__image, (x[0], y[0]), (x[1], y[1]), (0, 255, 0), 1)

        cv2.imshow("image", self.__image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
Пример #4
0
 def apply(values):
     pool = Pool()
     # result = []
     result = pool.map(func, values)
         # result.append(ret)
     # pool.close()
     # pool.join()
     return result
Пример #5
0
    def compute_importance(self, alpha):
        """

        """
        pool = ProcessingPool(self._numJobs)
        errors = pool.map(self._computeImportanceOfTree,
                          [alpha] * self._numTree, range(self._numTree))
        return np.array(errors).mean(axis=0)
    def alignAllShapes( self ):
        import pathos.multiprocessing as mp
        start = time.time()
        pool = Pool()
        self.allShapes = pool.map( self.alignOneShape, self.allShapes )
#        for sh in self.allShapes:
#          self.alignOneShape( sh )
        print 'alignAllShapes: %f' % (time.time() - start  )
        return 
Пример #7
0
 def run_all_control_analysis(self):
     dirs = dir_walker(self.encode_root)
     control_dir = None
     for d in dirs:
         if 'control' in d.lower():
             control_dir = d
     assert control_dir is not None
     replicates = dir_walker(control_dir, level=1)
     pool = ProcessingPool(nodes=14)
     pool.map(self.control_analysis, tuple(replicates))
     return replicates
Пример #8
0
def register_stack_to_template(frames, template, regfn, njobs=4, **fnargs):
    """
    Given stack of frames (or a FSeq obj) and a template image, 
    align every frame to template and return a list of functions,
    which take an image and return warped image, aligned to template.
    """
    if njobs > 1:
        pool = ProcessingPool(nodes=njobs) 
        out = pool.map(partial(regfn, template=template, **fnargs), frames)
    else:
        out = np.array([regfn(img, template, **fnargs) for img in frames])
    return out
def main():
	from hyperopt import fmin,tpe,hp,Trials
	from hyperopt.mongoexp import MongoTrials
	import os 

	fit_params=eval(open('fit_parameters.txt').read())
	fit_params['root']=os.getcwd()
	directory=init_directory(fit_params)
	if fit_params['optimization']=='hyperopt':
		space=search_space(fit_params)
		trials=Trials()
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	#https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB
	''' commands for MongoDB
	mongod --dbpath . --port 1234
	export PYTHONPATH=$PYTHONPATH:/home/pduggins/influence_susceptibility_conformity
	hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1
	'''
	if fit_params['optimization']=='mongodb':
		space=search_space(fit_params)
		space['directory']=directory
		trials=MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp4')
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	if fit_params['optimization']=='evolve':
		from pathos.multiprocessing import ProcessingPool as Pool
		from pathos.helpers import freeze_support #for Windows
		import numpy as np
		import pandas as pd
		# freeze_support()
		evo_pop=init_evo_pop(fit_params)
		pool = Pool(nodes=fit_params['threads'])

		for g in range(fit_params['generations']):
			exp_params=[value['P'] for value in evo_pop.itervalues()]
			fitness_list=pool.map(run, exp_params)
			# new_gen_list=tournament_selection(fitness_list,fit_params)
			new_gen_list=rank_proportional_selection(fitness_list)
			remade_pop=remake(evo_pop,new_gen_list)
			mutated_pop=mutate(remade_pop,evo_pop,fit_params)
			evo_pop=mutated_pop
			# crossed_pop=crossover(mutated_pop)
			# evo_pop=crossed_pop
			mean_F=np.average([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			std_F=np.std([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			print '\nGeneration %s: mean_F=%s, std F=%s' %(g+1,mean_F,std_F) 

		out_pop=pd.DataFrame([evo_pop])
		out_pop.reset_index().to_json('evo_pop.json',orient='records')
Пример #10
0
def apply_warps(warps, frames, njobs=4):
    """
    returns result of applying warps for given frames (one warp per frame)
    """
    if njobs > 1 :
        pool = ProcessingPool(nodes=njobs)
        out = np.array(pool.map(parametric_warp, frames, warps))
    else:
        out = np.array([parametric_warp(f,w) for f,w in itt.izip(frames, warps)])
    if isinstance(frames, fseq.FrameSequence):
        out = fseq.open_seq(out)
        out.meta = frames.meta
    return out
Пример #11
0
	def multi_ray_sim(self, sources, procs=8):
		self.minener = 1e-10 # minimum energy threshold
		self.itmax = 1000 # stop iteration after this many ray bundles were generated (i.e. 
					# after the original rays intersected some surface this many times).
		# The multiprocessing raytracing method to call from the original engine.
		if len(sources) != procs:
			raise Exception('Number of sources and processors do not agree')

		# Creates a pool of processes and makes them raytrace one different source each. The resm list returned is a list of copies of the original engine post raytrace.
		pool = Pool(processes=procs)
		resm = pool.map(self.trace, sources)

		# New tree container and length envaluation to redimension it.
		tree_len = N.zeros(len(resm), dtype=N.int)
		trees = []

		for eng in xrange(len(resm)):
			# Get and regroup results in one tree and assembly only:
			S = resm[eng]._asm.get_surfaces()
			tree_len[eng] = len(resm[eng].tree._bunds)
			trees.append(resm[eng].tree)
			# Next loop is to get the optics callable objects and copy regroup their values without asumptions about what they are.
			for s in xrange(len(S)):
				part_res = S[s]._opt.__dict__
				keys = S[s]._opt.__dict__.keys()
				for k in xrange(len(keys)):
					if (keys[k] == '_opt') or (keys[k] == '_abs'):
						continue
					if len(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]]) < 1:
						self._asm.get_surfaces()[s]._opt.__dict__[keys[k]] = part_res[keys[k]]
					elif len(part_res[keys[k]]) < 1:
						continue
					else:
						self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0] = N.append(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0], part_res[keys[k]][0], axis=1)

		# Regroup trees:
		self.tree = RayTree() # Create a new tree for all
		for t in xrange(N.amax(tree_len)): # Browse through general tree levels up to the maximum length that has been raytraced
			for eng in xrange(len(resm)): # Browse through bundles of each parallel engine.
				if t<(tree_len[eng]): # to not go over the length of the present parallel tree.
					if t==len(self.tree._bunds): # if the index is greater than the actual length of the general tree, add a new bundle to the general tree with the present parallel bundle to initialise it.
						bundt = trees[eng]._bunds[t]
					else:	
						if t>0: # adapt parents indexing prior to concatenation
							trees[eng]._bunds[t].set_parents(trees[eng]._bunds[t].get_parents()+len(self.tree._bunds[t].get_parents()))
						bundt = concatenate_rays([bundt, trees[eng]._bunds[t]])
			self.tree.append(bundt)
		
		trees = 0
Пример #12
0
def launch_simulation_parallel(simulation_config,
                               max_iterations,
                               parallel_blocks=gtconfig.parallel_blocks,
                               show_progress=True):
    """
    Parallel version of the simulation launch, to maximize CPU utilization.

    :param catalog_size: Number of defects present on the system.
    :param priority_generator: Generator for the priority of the defects.
    :param team_capacity:
    :param reporters_config:
    :param resolution_time_gen:
    :param max_iterations:
    :param max_time:
    :param dev_team_bandwidth:
    :param gatekeeper_config:
    :param inflation_factor:
    :param quota_system:
    :param parallel_blocks:
    :return:
    """
    pool = Pool(processes=parallel_blocks)
    samples_per_worker = max_iterations / parallel_blocks

    logger.info("Launching " + str(max_iterations) + " replications IN PARALLEL. Using " + str(parallel_blocks) +
                " workers with " + str(samples_per_worker) + " samples each.")

    worker_inputs = []

    for block_id in range(parallel_blocks):
        worker_input = {'simulation_config': simulation_config,
                        'max_iterations': samples_per_worker,
                        'block_id': block_id,
                        'show_progress': False}

        worker_inputs.append(worker_input)

    # Showing progress bar of first batch
    worker_inputs[0]['show_progress'] = show_progress
    worker_outputs = pool.map(launch_simulation_wrapper, worker_inputs)

    logger.info(str(max_iterations) + " replications finished. Starting output consolidation.")
    simulation_metrics = SimulationMetrics()

    for output in worker_outputs:
        simulation_metrics.append_results(output)

    return simulation_metrics
    def preprocess_docs(self, docs):
        """
        Preprocess string or list of strings
        """
        if isinstance(docs, string_types):
            docs = [docs]

        if self.stemming is True:
            if not self.parallel:
                logger.info('preprocess %i documents without multiprocessing' % len(docs))
                docs_preprocess = list(map(self.preprocess, docs))
            else:
                if sys.version_info[0] == 3:
                    from multiprocessing import Pool
                    pool = Pool()
                    n_processes = pool._processes
                else:
                    logger.info('use pathos for multiprocessing')
                    from pathos.multiprocessing import ProcessingPool
                    pool = ProcessingPool()
                    n_processes = pool.nodes
                logger.info('preprocess %i documents with %i workers' % (len(docs), n_processes))
                docs_preprocess = pool.map(self.preprocess, docs)
        else:
            logger.info('no prepocess function apply')
            docs_preprocess = docs
        return docs_preprocess
Пример #14
0
    def __init__(self, dynamics):

        # dynamics
        self.dynamics = dynamics

        # parallel
        self.pool = Pool(8)
Пример #15
0
def sample_function(function,
                    value_range=(-1, 1),
                    resolution=(1000, 1000),
                    grid=True,
                    parallel=True,
                    **params):
    """
    Sample a function over an xy plane with the given value range and resolution.
    Function is called with ((x,y), **params)
    Returns an array of shape (resolution_x, resolution_y, *function_shape),
        e.g. (1000,1000,3) if f(p)=[a,b,c]
        e.g. (1000,1000,3,3) if f(p).shape=(3,3)
    """
    # TODO make over any number of dimensions?

    xy = xy_plane(value_range, resolution, grid=grid)
    if parallel:
        # Flatten into array of 2d points [(x,y), ...]
        points = xy.reshape(-1, xy.shape[-1])
        with ProcessingPool() as pool:
            values = pool.map(lambda p: function(p, **params), points)
        sampled = np.resize(
            values,
            xy.shape[:-1])  # TODO Doesn't work for non-scalar functions
    else:
        sampled = np.apply_along_axis(lambda p: function(p, **params), 2, xy)

    # returns shape: (resolution_x, resolution_y, *function_shape)
    return sampled
def main(argv):
    logging.info('Building assignee features')

    config = configparser.ConfigParser()
    config.read([
        'config/database_config.ini', 'config/database_tables.ini',
        'config/inventor/build_assignee_features_sql.ini'
    ])

    # create output folder if it doesn't exist
    logging.info(
        'writing results to folder: %s',
        os.path.dirname(config['INVENTOR_BUILD_ASSIGNEE_FEAT']['feature_out']))
    os.makedirs(os.path.dirname(
        config['INVENTOR_BUILD_ASSIGNEE_FEAT']['feature_out']),
                exist_ok=True)

    feats = [n for n in ProcessingPool().imap(run, ['granted', 'pregranted'])]
    with open(config['INVENTOR_BUILD_ASSIGNEE_FEAT']['base_assignee_features'],
              'rb') as fin:
        features = pickle.load(fin)

    for i in range(0, len(feats)):
        features.update(feats[i])

    with open(
            config['INVENTOR_BUILD_ASSIGNEE_FEAT']['feature_out'] +
            '.%s.pkl' % 'both', 'wb') as fout:
        pickle.dump(features, fout)
Пример #17
0
 def __init__(self,
              n_features,
              n_nodes,
              embedding_dim,
              rnn_dim,
              bidirectional=True,
              sinkhorn_iters=5,
              sinkhorn_tau=1,
              num_workers=4,
              cuda=True):
     super(SPGSequentialActor, self).__init__()
     self.use_cuda = cuda
     self.n_nodes = n_nodes
     self.embedding_dim = embedding_dim
     self.rnn_dim = rnn_dim
     self.num_workers = num_workers
     self.embedding = nn.Linear(n_features, embedding_dim)
     self.gru = nn.GRU(embedding_dim, rnn_dim, bidirectional=bidirectional)
     scale = 2 if bidirectional else 1
     self.fc2 = nn.Linear(scale * self.rnn_dim, n_nodes)
     self.sinkhorn = Sinkhorn(n_nodes, sinkhorn_iters, sinkhorn_tau)
     self.round = linear_assignment
     init_hx = torch.zeros(scale, self.rnn_dim)
     if cuda:
         init_hx = init_hx.cuda()
     self.init_hx = Variable(init_hx, requires_grad=False)
     if num_workers > 0:
         self.pool = Pool(num_workers)
Пример #18
0
def get_signal_bg_many_parallel(runList, detid, **kwargs):
    """
    Return the averaged signal and background (based on blank frames) over the given runs
    """
    def mapfunc(run_number):
        return get_signal_bg_one_run(run_number, detid, **kwargs)

    MAXNODES = 14
    pool = ProcessingPool(nodes=min(MAXNODES, len(runList)))
    bg = np.zeros(DIMENSIONS_DICT[detid])
    signal = np.zeros(DIMENSIONS_DICT[detid]) 
    run_data = pool.map(mapfunc, runList)
    for signal_increment, bg_increment in run_data:
        signal += (signal_increment / len(runList))
        bg += (bg_increment / len(runList))
    return signal, bg
Пример #19
0
 def __init__(self,
              n_features,
              n_nodes,
              embedding_dim,
              rnn_dim,
              sinkhorn_iters=5,
              sinkhorn_tau=1.,
              num_workers=4,
              cuda=True):
     super(SPGMatchingActor, self).__init__()
     self.use_cuda = cuda
     self.n_nodes = n_nodes
     self.rnn_dim = rnn_dim
     self.num_workers = num_workers
     self.embedding = nn.Linear(n_features, embedding_dim)
     self.gru = nn.GRU(n_nodes, rnn_dim)
     self.fc1 = nn.Linear(self.rnn_dim, n_nodes)
     self.sinkhorn = Sinkhorn(n_nodes, sinkhorn_iters, sinkhorn_tau)
     self.round = linear_assignment
     init_hx = torch.zeros(1, self.rnn_dim)
     if cuda:
         init_hx = init_hx.cuda()
     self.init_hx = Variable(init_hx, requires_grad=False)
     if num_workers > 0:
         self.pool = Pool(num_workers)
Пример #20
0
 def __init__(self):
     """
         Initalizes DataProcessing class with utilities and parallel processing
         
     """
     self.res = Res()
     self.pool = Pool()
Пример #21
0
        def closure(rolling_groupby, func, *args, **kwargs):
            groups = list(rolling_groupby._groupby.groups.items())
            chunks = chunk(len(groups), nb_workers)
            object_id = plasma_client.put(rolling_groupby.obj)
            groups_id = plasma_client.put(groups)

            attribute2value = {
                attribute: getattr(rolling_groupby, attribute)
                for attribute in rolling_groupby._attributes
            }

            worker_args = [
                (
                    plasma_store_name,
                    object_id,
                    groups_id,
                    attribute2value,
                    chunk,
                    func,
                    args,
                    kwargs,
                )
                for chunk in chunks
            ]

            with ProcessingPool(nb_workers) as pool:
                result_workers = pool.map(RollingGroupby.worker, worker_args)

            result = pd.concat(
                [plasma_client.get(result_worker) for result_worker in result_workers],
                copy=False,
            )

            return result
Пример #22
0
def climByAveragingPeriods(urls,              # list of (daily) granule URLs for a long time period (e.g. a year)
                    nEpochs,                  # compute a climatology for every N epochs (days) by 'averaging'
                    nWindow,                  # number of epochs in window needed for averaging
                    variable,                 # name of primary variable in file
                    mask,                     # name of mask variable
                    coordinates,              # names of coordinate arrays to read and pass on (e.g. 'lat' and 'lon')
                    maskFn=qcMask,            # mask function to compute mask from mask variable
                    averager='pixelAverage',  # averaging function to use, one of ['pixelAverage', 'gaussInterp']
                    mode='sequential',        # Map across time periods of N-days for concurrent work, executed by:
                                              # 'sequential' map, 'multicore' using pool.map(), 'cluster' using pathos pool.map(),
                                              # or 'spark' using PySpark
                    numNodes=1,               # number of cluster nodes to use
                    nWorkers=4,               # number of parallel workers per node
                    averagingFunctions=AveragingFunctions,    # dict of possible averaging functions
                    legalModes=ExecutionModes  # list of possiblel execution modes
                   ):
    '''Compute a climatology every N days by applying a mask and averaging function.
Writes the averaged variable grid, attributes of the primary variable, and the coordinate arrays in a dictionary.
***Assumption:  This routine assumes that the N grids will fit in memory.***
    '''
    try:
        averageFn = averagingFunctions[averager]
    except :
        averageFn = average
        print >>sys.stderr, 'climatology: Error, Averaging function must be one of: %s' % str(averagingFunctions)

    urlSplits = [s for s in fixedSplit(urls, nEpochs)]
    if VERBOSE: print >>sys.stderr, urlSplits

    def climsContoured(urls):
        n = len(urls)
        var = climByAveraging(urls, variable, mask, coordinates, maskFn, averageFn)
        return contourMap(var, variable, coordinates, n, urls[0])

    if mode == 'sequential':
        plots = map(climsContoured, urlSplits)
    elif mode == 'multicore':
        pool = Pool(nWorkers)
        plots = pool.map(climsContoured, urlSplits)        
    elif mode == 'cluster':
        pass
    elif mode == 'spark':
        pass

    plots = map(climsContoured, urlSplits)
    print plots
    return plots
Пример #23
0
def calculate_bleu(sess, trainable_model, data_loader):
    # bleu score implementation
    # used for performance evaluation for pre-training & adv. training
    # separate true dataset to the valid set
    # conditionally generate samples from the start token of the valid set
    # measure similarity with nltk corpus BLEU
    smoother = SmoothingFunction()

    data_loader.reset_pointer()
    bleu_avg = 0

    references = []
    hypotheses = []

    for it in xrange(data_loader.num_batch):
        batch = data_loader.next_batch()
        # predict from the batch
        # TODO: which start tokens?
        # start_tokens = batch[:, 0]
        start_tokens = np.array([START_TOKEN] * BATCH_SIZE, dtype=np.int64)
        prediction = trainable_model.predict(sess, batch, start_tokens)

        # argmax to convert to vocab
        #prediction = np.argmax(prediction, axis=2)

        # cast batch and prediction to 2d list of strings
        batch_list = batch.astype(np.str).tolist()
        pred_list = prediction.astype(np.str).tolist()
        references.extend(batch_list)
        hypotheses.extend(pred_list)

    bleu = 0.

    # calculate bleu for each predicted seq
    # compare each predicted seq with the entire references
    # this is slow, use multiprocess
    def calc_sentence_bleu(hypothesis):
        return sentence_bleu(references,
                             hypothesis,
                             smoothing_function=smoother.method4)

    if __name__ == '__main__':
        p = Pool()
        result = (p.map(calc_sentence_bleu, hypotheses))
    bleu = np.mean(result)

    return bleu
Пример #24
0
def makePower():
    global c
    pMin, pMax = d["power"]["pMin"], d["power"]["pMax"]

    pPath = np.linspace(pMin, pMax, frameCount)

    pool = Pool(4)

    # Get interesting c
    while True:
        subIm = JuliaTools.subImage(c=c,
                                    n=10,
                                    iters=iters / 2,
                                    r=r,
                                    p=pMin,
                                    split=split,
                                    save=False,
                                    aura=False)
        isBlackList = pool.map(subIm, coords)
        if not all(isBlackList):
            break
        else:
            c *= 0.975

    for frame in xrange(frameCount):
        subIm = JuliaTools.subImage(c=c,
                                    r=r,
                                    n=n,
                                    p=pPath[frame],
                                    iters=iters / 2,
                                    split=split)
        isBlackList = pool.map(subIm, coords)
        allBlack = all(isBlackList)

        if not allBlack:
            JuliaTools.makeFrame(frame, n, split, coords)

    pool.close()

    JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True)

    with open("tweet.txt", "w") as out:
        out.write("woooooooooooooooooooo")

    stop = timeit.default_timer()

    print stop - start
Пример #25
0
def run(non_iter_args, do_multiprocessing):
    [
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    ] = non_iter_args

    partial_gaincalc_oneset = partial(
        calc_weights_oneset,
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    )

    if do_multiprocessing:
        pool = Pool(processes=pathos.multiprocessing.cpu_count())
        pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes)

        # Current solution to no close and join methods on ProcessingPool
        # https://github.com/uqfoundation/pathos/issues/46

        s = pathos.multiprocessing.__STATE["pool"]
        s.close()
        s.join()
        pathos.multiprocessing.__STATE["pool"] = None

    else:
        for causevarindex in weightcalcdata.causevarindexes:
            partial_gaincalc_oneset(causevarindex)

    return None
def test_multiprocess():
    x_list = [1,2,3,4,5,6,7,]
    y_list = ['1','2','3','4','5','6','7']
    epoch = 8
    pool = Pool(epoch)
    res = pool.amap(test_task,x_list,y_list)
    pool.pipe(test_task,'22','222')
    pool.close()
    pool.join()
Пример #27
0
    def _process_set_reads_library(self, input_object_info, genome_index_base,
                                   result_directory, cli_option_params):
        """
        _process_set_reads_library: process set reads library
        """

        reads_refs = self.fetch_reads_refs_from_sampleset(
            input_object_info['ref'], input_object_info['info'])

        set_object_name = input_object_info['info'][1]
        alignment_set_name = set_object_name + cli_option_params[
            'alignment_set_suffix']

        arg_1 = []
        arg_2 = [genome_index_base] * len(reads_refs)
        arg_3 = [result_directory] * len(reads_refs)
        arg_4 = []
        conditions = []
        for reads_ref in reads_refs:
            reads_input_object_info = self._get_input_object_info(
                reads_ref['ref'])
            option_params = cli_option_params.copy()
            option_params['reads_condition'] = reads_ref['condition']
            conditions.append(reads_ref['condition'])
            arg_1.append(reads_input_object_info)
            arg_4.append(option_params)

        cpus = min(cli_option_params.get('num_threads'),
                   multiprocessing.cpu_count())
        pool = Pool(ncpus=cpus)
        log('running _process_alignment_object with {} cpus'.format(cpus))

        reads_alignment_object_refs = pool.map(
            self._process_single_reads_library, arg_1, arg_2, arg_3, arg_4)

        for reads_alignment_object_ref in reads_alignment_object_refs:
            if reads_alignment_object_ref.startswith('ERROR'):
                error_msg = 'Caught exception in worker\n'
                error_msg += '{}'.format(reads_alignment_object_ref)
                raise ValueError(error_msg)

        workspace_name = cli_option_params['workspace_name']
        reads_alignment_set_object_ref = self._save_alignment_set(
            reads_alignment_object_refs, workspace_name, alignment_set_name,
            conditions)

        return reads_alignment_set_object_ref
Пример #28
0
def pad_pdf(path, ratio, output_path=None):
    """Pad PDF with a <ratio>% white margin increase on the right.

  Takes a path to the original PDF file, converts them to PIL images,
  and pads them with the appropriate whitespace. Returns a path to the
  padded PDF.

  If a valid output_path is given, it will move the PDF to the given path
  and return the path.
  """

    images = pdf2image.convert_from_path(path)

    p = Pool(4)

    def overlay_and_store(img):
        """Pad the individual images by overlaying it on a white background.

    Passed to a multiprocessing pool as each individual PDF page is
    independent of each other. Saves the image in a temp path as a JPEG,
    and returns the absolute file path.
    """

        w, h = img.size
        padded_img = Image.new("RGB", (int(w * (1.0 + ratio)), h), "white")
        padded_img.paste(img, (0, 0))

        tmp_path = _generate_tmp_path(ext='.jpeg')
        padded_img.save(tmp_path, "JPEG")
        return tmp_path

    padded_images = p.map(overlay_and_store, images)

    # Output as PDF.
    output = _generate_tmp_path(ext='.pdf')
    with open(output, 'wb') as f:
        f.write(img2pdf.convert(padded_images))

    # Clean up temp image files used.
    for tmp_img in padded_images:
        os.remove(tmp_img)

    if output_path:
        os.rename(output, output_path)
        return output_path

    return output
Пример #29
0
def main(args):
    """Main function for calculating BD shift.

    Parameters
    ----------
    args : dict
        See ``BD_shift`` subcommand
    """
    sys.stderr.write('Loading KDE objects...\n')
    kde1 = Utils.load_kde(args['<kde1>'])
    kde2 = Utils.load_kde(args['<kde2>'])

    # adding top-level library ID if not present
    kde1 = kde_add_lib(kde1)
    kde2 = kde_add_lib(kde2)

    sys.stderr.write('Calculating BD shifts...\n')
    print '\t'.join(['lib1','lib2','taxon','BD_shift'])
    for libID1,d1 in kde1.items():
        for libID2,d2 in kde2.items():
            msg = '  Comparing libraries: "{}", "{}"\n'
            sys.stderr.write(msg.format(libID1, libID2))

            # overlap of taxa btw libraries
            taxa = taxon_overlap(d1, d2)            

            # calculating BD shift (in parallel)
            pfunc = partial(kde_intersect, 
                            start=float(args['--start']),
                            end=float(args['--end']),
                            step=float(args['--step']))

            pool = ProcessingPool(nodes=int(args['--np']))
            if args['--debug']:
                res = map(pfunc, [(taxon, d1[taxon], d2[taxon])
                                  for taxon in taxa])
            else:
                res = pool.amap(pfunc, [(taxon, d1[taxon], d2[taxon])
                                        for taxon in taxa])
                while not res.ready():
                    time.sleep(2)
                res = res.get()        
                            
            # writing out table
            for line in res:
                print '\t'.join([libID1, libID2] + \
                                [str(x) for x in line])                            
Пример #30
0
def run(non_iter_args, do_multiprocessing):
    [
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    ] = non_iter_args

    partial_gaincalc_oneset = partial(
        calc_weights_oneset,
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    )

    if do_multiprocessing:
        pool = Pool(processes=pathos.multiprocessing.cpu_count())
        pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes)

        # Current solution to no close and join methods on ProcessingPool
        # https://github.com/uqfoundation/pathos/issues/46

        s = pathos.multiprocessing.__STATE["pool"]
        s.close()
        s.join()
        pathos.multiprocessing.__STATE["pool"] = None

    else:
        for causevarindex in weightcalcdata.causevarindexes:
            partial_gaincalc_oneset(causevarindex)

    return None
Пример #31
0
    def __init__(
        self,
        func,
        bounds,
        niter=500,
        population=10,
        ftol=0.001,
        workers=-1,
        restart=False,
        vec_dump=10,
        seed=None,
        aggressive_parasite=False
    ):
        """ 
        Initialise a symbiotic organisms search instance
        
        Args:
            func (callable): Function to be minimised. f(x, *args) - x is the argument to be minimised, args is a tuple of any additional  fixed parameters to specify the function
            bounds (list(Double)): list of pairs of (min,max) bounds for x
            niter (Int): number of iterations for optimiser
            population (Int): number of members in population
            ftol (Double) : convergence criteria for function
            workers (Int): number of multiprocessing workers to use. -1 sets workers to mp.cpu_count()
            vec_dump (Int): outputs restart file vec_dump number of steps  
            restart (Bool): restart the run from a restart file
            seed (Int): seed for random number generator, useful for tests

        """

        self.function = func
        self.niter = niter
        self.population = population
        self.particles = []
        self.best_global_vec = None
        self.best_global_fit = math.inf
        self.ftol = ftol
        self.bounds = np.asarray(bounds)
        self.restart = restart
        self.vector_restart = VectorInOut(bounds, "sos.rst")
        self.vec_dump = vec_dump
        self.seed = seed
        self.aggressive_parasite = aggressive_parasite

        if workers == -1:
            self.pool = Pool(mp.cpu_count())
        else:
            self.pool = Pool(workers)
Пример #32
0
    def __init__(self,
                 enable_compression=True,
                 enable_s3=True,
                 file_path=None,
                 num_workers=30):
        """Initialise the S3 array IO interface.

        :param bool enable_s3: Flag to store objects in s3 or disk.
            True: store in S3
            False: store on disk (for testing purposes)
        :param str file_path: The root directory for the emulated s3 buckets when enable_se is set to False.
        :param int num_workers: The number of workers for parallel IO.
        """
        self.s3io = S3IO(enable_s3, file_path, num_workers)

        self.pool = ProcessingPool(num_workers)
        self.enable_compression = enable_compression
Пример #33
0
    def fit(self, dataset):
        """
        Runs dataset through the designated pipeline, extracts features, and fits a conditional random field.

        :param training_data_loader: Instance of Dataset.
        :return model: a trained instance of a sklearn_crfsuite.CRF model.
        """

        if not isinstance(dataset, Dataset):
            raise TypeError(
                "Must pass in an instance of Dataset containing your training files"
            )
        if not isinstance(self.pipeline, BasePipeline):
            raise TypeError(
                "Model object must contain a medacy pipeline to pre-process data"
            )

        pool = Pool(nodes=self.n_jobs)

        results = [
            pool.apipe(self._extract_features, data_file, self.pipeline,
                       dataset.is_metamapped())
            for data_file in dataset.get_data_files()
        ]

        while any([i.ready() is False for i in results]):
            time.sleep(1)

        for idx, i in enumerate(results):
            X, y = i.get()
            self.X_data += X
            self.y_data += y

        logging.info("Currently Waiting")

        learner_name, learner = self.pipeline.get_learner()
        logging.info("Training: %s", learner_name)

        assert self.X_data, "Training data is empty."

        train_data = [x[0] for x in self.X_data]
        learner.fit(train_data, self.y_data)
        logging.info("Successfully Trained: %s", learner_name)

        self.model = learner
        return self.model
Пример #34
0
    def delete_resources(self, id_list):
        """
        Batch Deletes stale resources from IBM topology service via DELETE
        Expects list of _ids that need to be deleted
        """
        def delete_single_resource(resource_id):
            """
            Multiprocess Worker Method to delete single resource from IBM topology service
            """
            r = requests.delete(self.api_prefix + 'resources/' + resource_id,
                                headers=self.headers,
                                verify=False)

        sys.stdout.write('Deleting ' + str(len(id_list)) +
                         ' resources from IBM Topology Service\n')
        pool = Pool(NUM_PROC)
        pool.map(delete_single_resource, id_list)
Пример #35
0
    def __init__(self, conf, bodies, frame=None, n_procs=1):
        self.conf = conf
        self.bodies = bodies
        self.n = len(bodies)
        self.frame = frame

        self.n_procs = n_procs
        self.pool = Pool(self.n_procs)
Пример #36
0
def run_service3(service,
                 iterable,
                 iterable_arguments,
                 iterable_argument_names,
                 worker_count,
                 log_function=print):

    start = timer()
    args = list(iterable_arguments.keys())
    args.extend(iterable_argument_names)
    if log_function is not None:
        log_function("[run_service] running service {} with {} workers".format(
            service, worker_count))

    # add everything to work queue
    all_args = []
    for x in iterable:
        if type(x) is not tuple:
            x = [x]
        args = dict(dict(zip(iterable_argument_names, x)),
                    **iterable_arguments)
        all_args.append(args)

    pool = Pool(worker_count)
    results = pool.amap(service, all_args)
    final_results = results.get()
    # if example service model is used, metrics can be gathered in this way
    messages = []
    total = len(final_results)
    failure = 0
    for error, mem_usage in final_results:
        if error is not False:
            failure += 1
            if type(error) is str:
                messages.append(error)

    # if we should be logging and if there is material to be logged
    if log_function is not None and (total + failure + len(messages)) > 0:
        log_function(
            "[run_service] Summary {}:\n[run_service]\tTime: {}s\n[run_service]\tTotal: {}\n[run_service]\tFailure: {}"
            .format(service, int(timer() - start), total, failure))
        log_function("[run_service]\tMessages:\n[run_service]\t\t{}".format(
            "\n[run_service]\t\t".join(messages)))

    # return relevant info
    return total, failure, messages
    def __enter__(self):
        """Runs the RPKI Validator"""

        utils.kill_port(self.port)
        # Must remove these to ensure a clean run
        utils.clean_paths(self.rpki_db_paths)
        cmds = [f"cd {self.rpki_package_path}",
                f"chown -R root:root {self.rpki_package_path}"]
        utils.run_cmds(cmds)
        # Writes validator file and serves it
        # Can't use cntext manager here since it returns it
        self._rpki_file = RPKI_File(self._table_input)
        self._rpki_file.spawn_process()
        self._process = ProcessingPool()
        self._process.apipe(self._start_validator)
        self.total_prefix_origin_pairs = self._rpki_file.total_lines
        return self
Пример #38
0
def run(nodes=1, filename="config.xml"):
    """
    Dispatch Wrapper to run xml by dispatching each login to a multiprocess pool
    :param nodes: sys argument 1
    :param filename: (determine) by sys argument 2
    :return: array of status of pool
    """
    file_xml = ""
    with open(filename, "r") as fd:
        lines = fd.readlines()
        for line in lines:
            file_xml += line
    xml = " ".join(file_xml.rsplit())

    pool = ProcessingPool(nodes)
    run_list = [[xml, value] for value in PATTERN_SEL.findall(xml)]
    return pool.map(helper, run_list)
Пример #39
0
    def produce_classic(self, processes=1):
        self.create_output_file()
        self.produced = True
        if processes == 1:
            for i in range(len(self.root_objects)):
                self.create_result(i)
        else:
            from pathos.multiprocessing import ProcessingPool as Pool
            pool = Pool(processes=processes)
            self.root_objects = pool.map(self.create_result,
                                         range(len(self.root_objects)))

        for h in self.root_objects:  # write sequentially to prevent race conditions
            h.save(self.output_tree)
        logger.debug("Produced root objects %s",
                     [h.get_name() for h in self.root_objects])
        return self
Пример #40
0
 def get_stats(self):
     """Get stats for all genomes. Concat the results into a DataFrame"""
     # pool.map needs an arg for each function that will be run
     dmx_mean = [self.dmx.mean()] * len(self.genome_paths)
     with ProcessingPool() as pool:
         results = pool.map(genome.mp_stats, self.genome_paths, dmx_mean)
     self.stats = pd.concat(results)
     self.stats.to_csv(self.stats_path)
Пример #41
0
    def __init__(self, n_obj=2, aggregation='WS', n_point=5, n_job=1, *argv, **kwargs):
        """
        Arguments
        ---------
        n_point : int,
            the number of evaluated points in each iteration
        aggregation: str or callable,
            the scalarization method/function. Supported options are:
                'WS' : weighted sum
                'Tchebycheff' : Tchebycheff scalarization
        """
        super(MOBO_D, self).__init__(*argv, **kwargs)
        self.n_point = int(n_point)
        # TODO: perhaps leave this an input parameter
        self.mu = 2 * self.n_point   # the number of generated points
        self.n_obj = int(n_obj)
        assert self.n_obj > 1

        if isinstance(self.minimize, bool):
            self.minimize = [self.minimize] * self.n_obj
        elif hasattr(self.minimize, '__iter__'):
            assert len(self.minimize) == self.n_obj

        self.minimize = np.asarray(self.minimize)

        if hasattr(self.obj_func, '__iter__'):
            assert self.n_obj == len(self.obj_func)
        
        assert self.n_obj == len(self.surrogate)
        self.n_job = min(MOBO_D.__max_procs__, self.mu, n_job)

        # TODO: implement the Tchebycheff approach
        if isinstance(aggregation, str):
            assert aggregation in ['WS', 'Tchebycheff']
        else:
            assert hasattr(aggregation, '__call__')
        self.aggregation = aggregation

        # generate weights
        self.weights = np.random.rand(self.mu, self.n_obj)
        self.weights /= np.sum(self.weights, axis=1).reshape(self.mu, 1)
        self.labels_ = KMeans(n_clusters=self.n_point).fit(self.weights).labels_
        self.frange = np.zeros(self.n_obj)

        if self.n_job > 1:
            self.p = ProcessingPool(ncpus=self.n_job)
Пример #42
0
    def run(self):

        files = os.listdir(self.folder)
        outfile_bed = self.outfile.replace('.txt', '.bed')

        output_file = open(self.outfile, 'w')
        output_file.write('circle_id\ttranscript_id\tskipped_exon\tintron\tread_names\tsplice_reads\texon_reads\n')
        output_file.close()

        output_file = open(outfile_bed, 'w')
        output_file.write('# bed12 format\n')
        output_file.close()

        from pathos.multiprocessing import ProcessingPool as Pool

        p = Pool(self.cpus)
        p.map(self.run_parallel, files)
def main(argv):
    logging.info('Building coinventor features')
    feats = [n for n in ProcessingPool().imap(run, ['granted', 'pregranted'])]
    features = feats[0]
    for i in range(1, len(feats)):
        features.update(feats[i])
    with open(FLAGS.feature_out + '.%s.pkl' % 'both', 'wb') as fout:
        pickle.dump(features, fout)
    def collect_significances(self):
        with open(self.filename, 'w') as f:
            f.write(
                "Higgsino mass,Bino mass,Discovery Significance,Exclusion Limit\n"
            )

        def get_disc_sig(signal, classifier, bdt_cut):
            try:
                table = BDTCutFlowTable(signal, classifier, bdt_cut)
                calc = table.initialize_significance_calculator()
                sig = calc.calculate_discovery_significance('bdt')
                return sig
            except:
                pass

        def get_excl_lim(signal, classifier, bdt_cut):
            try:
                table = BDTCutFlowTable(signal, classifier, bdt_cut)
                calc = table.initialize_significance_calculator()
                lim = calc.calculate_exclusion_limit('bdt')
                return lim
            except:
                pass

        mySignals = self.signals

        pbar = tqdm(total=len(mySignals) / 8)

        def write_sigs(signal):
            try:
                classifier = Classifier(signal.mass_combination_tuple)
                discs = map(lambda x: get_disc_sig(signal, classifier, x),
                            np.arange(-10, 10, 0.1))
                excls = map(lambda x: get_excl_lim(signal, classifier, x),
                            np.arange(-10, 10, 0.1))

                with open(self.filename, 'a') as f:
                    f.write("{},{},{},{}\n".format(signal.higgsino_mass,
                                                   signal.bino_mass,
                                                   max(discs), max(excls)))
                pbar.update(1)
            except:
                pass

        p = Pool(8)
        p.map(write_sigs, mySignals)
    def _calculate_s_powder_over_atoms_core(self, q_indx=None):
        """
        Helper function for _calculate_s_powder_1d.
        :returns: Python dictionary with S data
        """
        atoms_items = {}
        atoms = range(self._num_atoms)
        self._prepare_data(k_point=q_indx)

        if PATHOS_FOUND:
            p_local = ProcessingPool(nodes=AbinsModules.AbinsParameters.threads)
            result = p_local.map(self._calculate_s_powder_one_atom, atoms)
        else:
            result = [self._calculate_s_powder_one_atom(atom=atom) for atom in atoms]

        for atom in range(self._num_atoms):
            atoms_items["atom_%s" % atom] = {"s": result[atoms.index(atom)]}
            self._report_progress(msg="S for atom %s" % atom + " has been calculated.")
        return atoms_items
Пример #46
0
    def _exec_sample(X):
        from pathos.multiprocessing import ProcessingPool
        try:
            p = ProcessingPool(n_cpus)
            X = np.array(X)
            x = np.array_split(X, n_cpus)
            pipe = []
            for i in range(n_cpus):
                pipe.append(p.apipe(func, x[i]))

            rs = []
            for i in range(n_cpus):
                rs.append(pipe[i].get())
    
            rs = [item for sublist in rs for item in sublist]

            return ot.NumericalSample(rs)
        except ValueError:
            # Get there if the chuck size left some single evaluations left
            return func(X)
Пример #47
0
    def transpose_index(self):  # WORKS ONLY FOR TEST DATA
        """Transpose the data according to the index."""

        data = self.data
        indexes = list(set(data.index))

        names, datasets = [], []
        for name in indexes:
            names.append(name)
            datasets.append(data[[name in i for i in data.index]])

        plotSets = zip(names, datasets)

        pool = ProcessingPool()
        plots = []
        for name, dataset in plotSets:
            plots.append(pool.map(self.create_transposed_plot, [name], [dataset]))

        logging.debug('Index transposed')

        return plots
Пример #48
0
  def _featurize_compounds(self, df, featurizer, parallel=True,
                           worker_pool=None):    
    """Featurize individual compounds.

       Given a featurizer that operates on individual chemical compounds 
       or macromolecules, compute & add features for that compound to the 
       features dataframe
    """
    sample_smiles = df["smiles"].tolist()

    if worker_pool is None:
      features = []
      for ind, smiles in enumerate(sample_smiles):
        if ind % self.log_every_n == 0:
          log("Featurizing sample %d" % ind, self.verbose)
        mol = Chem.MolFromSmiles(smiles)
        features.append(featurizer.featurize([mol]))
    else:
      def featurize_wrapper(smiles, dilled_featurizer):
        print("Featurizing %s" % smiles)
        mol = Chem.MolFromSmiles(smiles)
        featurizer = dill.loads(dilled_featurizer)
        feature = featurizer.featurize([mol])
        return feature

      if worker_pool is None:
        dilled_featurizer = dill.dumps(featurizer)
        worker_pool = ProcessingPool(mp.cpu_count())
        featurize_wrapper_partial = partial(featurize_wrapper,
                                            dilled_featurizer=dilled_featurizer)
        features = []
        for smiles in sample_smiles:
          features.append(featurize_wrapper_partial(smiles))
      else:
        features = worker_pool.map_sync(featurize_wrapper, 
                                        sample_smiles)

    df[featurizer.__class__.__name__] = features
Пример #49
0
    def run(self):


        if self.args.jumpdists:
            n_bins=100.
            bin_width = 1/n_bins
            bins = np.arange(0,1+bin_width,1/n_bins)

            if self.args.file:
                user,vals = self.artist_jump_distributions(self.args.file,bins=bins,self_jumps=False)
                with open(self.args.resultdir+user,'w') as fout:
                    fout.write(','.join(vals.astype(str))+'\n')



            else:
                raise('not implemented!')
                self.pool = Pool(self.args.n)
                self.rootLogger.info("Pool started")

                self.rootLogger.info("Starting jump distance analysis")

                func_partial = partial(self.artist_jump_distributions,bins=bins,self_jumps=False)
                with open(self.args.resultdir+'jumpdists','w') as fout:
                    for user,vals in self.pool.imap(func_partial,self.listen_files):
                        fout.write(user+'\t'+','.join(vals.astype(str))+'\n')

                self.pool.close()
                self.rootLogger.info("Pool closed")

        if self.args.blockdists:
            #self.rootLogger.info("Starting block distance analysis")
            self.mean_block_distances(self.args.file)

        if self.args.diversity_dists:
            bins = np.arange(0,1.01,.01)
            self.diversity_distributions(self.args.file,bins=bins)

        if self.args.clustering:
            self.clustering(self.args.file)

        if self.args.values:
            self.patch_values(self.args.file)

        if self.args.exp:
            self.explore_exploit(self.args.file)

        if self.args.patch_len_dists:
            self.patch_len_dists(self.args.file)
Пример #50
0
def parallelmap(func, data, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs
    """
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, data)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
Пример #51
0
class C(object):
    def __init__(self,files):
        self.pool = Pool(4)
        self.files = files

    def raw_processor(self, fi,prefix,somedict):
        df = pd.read_table(
                fi,
                header=None,
                names=['artist_id','ts'],
                parse_dates=['ts'])\
            .sort_values(by='ts')
        user = fi.split('/')[-1][:-4]
        df.to_pickle('/Users/jaredlorince/git/MusicForaging/testData/scrobbles_test/{}_{}.pkl'.format(prefix,user))
        rootLogger.info('preprocessing complete for user {} ({})'.format(user,fi))

    def run_p(self):
        func_partial = partial(self.raw_processor,prefix='blah',somedict=d)
        result = self.pool.amap(func_partial, self.files)
Пример #52
0
def parallelmap(func, lst, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs using
    multiprocessing (as opposed to MPI).
    """
    from pathos.multiprocessing import ProcessingPool
    from pathos import multiprocessing
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, lst)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
while True:
	try:
		get_states = requests.get(nation_url, timeout=(1,60)).text
		break
	except:
		sleep(1.5**wait)
		wait += 1

parsed = BeautifulSoup(get_states, 'html.parser')
state_urls = [a['href'] for a in parsed.find('div', class_='newLocUSListArea').find_all('a')]

################
#Get town links#
################
print "Getting town URLs..."
pool = Pool(10)
result_iter = pool.imap(get_town_urls, state_urls)

town_urls = []
for result in result_iter:
	town_urls += result

#Clean up town URLs
town_urls = [re.sub("st\.-","st-",url) for url in town_urls]

#################
#Get paper links#
#################
print "Getting paper URLs..."
result_iter = pool.imap(get_paper_urls, town_urls)
Пример #54
0
 def parallel_motif_analysis(self, samples_dirs):
     pool = ProcessingPool(nodes=16)
     pool.map(self.sample_motif_analysis, tuple(samples_dirs))
Пример #55
0
 def analyse_samples_parallely(self, samples_dirs):
     pool = ProcessingPool(nodes=15)
     pool.map(self.sample_analysis, tuple(samples_dirs))
Пример #56
0

def genseq(idx):

    first = np.where(np.random.multinomial(1,pvals=pops)==1)[0][0]
    last = first
    last_ts = datetime.now()
    result = {'artist_idx':[first],'ts':[last_ts]}
    for i in xrange(seq_length-1):
        next_listen = draw(last)
        last = next_listen
        gap_bin = 120*np.where(np.random.multinomial(1,pvals=td)==1)[0][0]
        gap = np.random.randint(gap_bin,gap_bin+120)
        result['artist_idx'].append(next_listen)
        new_ts = last_ts+timedelta(0,gap)
        result['ts'].append(new_ts)
        last_ts = new_ts

    df = pd.DataFrame(result)
    df['block'] = ((df['artist_idx'].shift(1) != df['artist_idx']).astype(int).cumsum())-1
    df.to_pickle(str(idx)+'.pkl')
    logging.info('idx {} complete'.format(idx))

pool = Pool(cpu_count())
indices = range(n)
pool.map(genseq,indices)
pool.close()



Пример #57
0
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 1997-2014 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE

from pathos.multiprocessing import ProcessingPool as Pool
from pathos.multiprocessing import ThreadingPool as TPool
pool = Pool()
tpool = TPool()

# pickle fails for nested functions
def adder(augend):
  zero = [0]
  def inner(addend):
    return addend+augend+zero[0]
  return inner

# build from inner function
add_me = adder(5)

# build from lambda functions
squ = lambda x:x**2

# test 'dilled' multiprocessing for inner
print "Evaluate 10 items on 2 proc:"
pool.ncpus = 2
print pool
print pool.map(add_me, range(10))
print ''
Пример #58
0
 def applay_parallel_ransac(self):
     sample_indices = [i for i in xrange(25)]
     pool = Pool()
     output = pool.map(self.calculate_distance, sample_indices)
     return output
Пример #59
0
    
    # creates a worker pool from given comand line parameter. If the given
    # parameter is to large all detectable CPUs will be utilised. If the given
    # parameter is nonsense only 1 core will be utilized.
    workers = 1
    if len(sys.argv) >= 2 and sys.argv[1].isdigit() and int(sys.argv[1]) > 0:
        workers = cpu_count()
        if int(sys.argv[1]) <= workers:
            workers = int(sys.argv[1])
    
    print 'N:  ' + str(N)
    print 'PW: ' + str(workers)
    sleep(3) # just 3 seconds pause to read the input again.

    # All the magic happens here:
    pool = ProcessingPool(workers)
    Ys = pool.map(steadyState,y0)   

    clock = time()-clock # elapsed time
    print 'Seconds: ' + str(clock) # Not essential but useful.

    # Serilisation of results and stats:
    ss = {'STrange': STrange, 'PFDrange': PFDrange, 'Ys': Ys, 'Sec': clock, 'PoolWorkers': workers}
    output = open('steadyStateAnalysisFixedST_MC_N' + str(N) + '.pkl', 'wb')
    dill.dump(ss,output,2)
    output.close()

else:
    print('Well, something went wrong.')

#================================================================= #
Пример #60
0
class analyze(setup.setup):

    def __init__(self,args,logging_level=logging.INFO):

         super(analyze, self ).__init__(args,logging_level)


    # set up processing pool and run all analyses specified in args
    def run(self):


        if self.args.jumpdists:
            n_bins=100.
            bin_width = 1/n_bins
            bins = np.arange(0,1+bin_width,1/n_bins)

            if self.args.file:
                user,vals = self.artist_jump_distributions(self.args.file,bins=bins,self_jumps=False)
                with open(self.args.resultdir+user,'w') as fout:
                    fout.write(','.join(vals.astype(str))+'\n')



            else:
                raise('not implemented!')
                self.pool = Pool(self.args.n)
                self.rootLogger.info("Pool started")

                self.rootLogger.info("Starting jump distance analysis")

                func_partial = partial(self.artist_jump_distributions,bins=bins,self_jumps=False)
                with open(self.args.resultdir+'jumpdists','w') as fout:
                    for user,vals in self.pool.imap(func_partial,self.listen_files):
                        fout.write(user+'\t'+','.join(vals.astype(str))+'\n')

                self.pool.close()
                self.rootLogger.info("Pool closed")

        if self.args.blockdists:
            #self.rootLogger.info("Starting block distance analysis")
            self.mean_block_distances(self.args.file)

        if self.args.diversity_dists:
            bins = np.arange(0,1.01,.01)
            self.diversity_distributions(self.args.file,bins=bins)

        if self.args.clustering:
            self.clustering(self.args.file)

        if self.args.values:
            self.patch_values(self.args.file)

        if self.args.exp:
            self.explore_exploit(self.args.file)

        if self.args.patch_len_dists:
            self.patch_len_dists(self.args.file)


    # calculate distribution (using histogram with specified bins)
    # of sequential artist-to-artist distances
    def artist_jump_distributions(self,fi,bins,self_jumps=False):
        user = fi.split('/')[-1][:-4]
        df = pd.read_pickle(fi)
        if self_jumps:
            vals = np.histogram(df['dist'].dropna(),bins=bins)[0]
        else:
            vals = np.histogram(df['dist'][df['dist']>0],bins=bins)[0]
        self.rootLogger.info('artist jump distances done for user {} ({})'.format(user,fi))
        return user,vals

    # calculate distribution (using histogram with specified bins)
    # of patch diversity for each user

    # awk 'FNR==1' * > diversity_dists_zeros
    # awk 'FNR==2' * > diversity_dists_nozeros
    def diversity_distributions(self,fi,bins):
        if 'patches' not in fi:
            raise('WRONG DATATYPE')
        user = fi.split('/')[-1].split('_')[0]
        df = pd.read_pickle(fi).dropna(subset=['diversity'])
        zeros = np.histogram(df[df['n']>=5]['diversity'],bins=bins)[0]
        nozeros = np.histogram(df[(df['n']>=5)&(df['diversity']>0)]['diversity'],bins=bins)[0]

        zeros = zeros/float(zeros.sum())
        nozeros = nozeros/float(nozeros.sum())

        with open(self.args.resultdir+user,'w') as fout:
            fout.write(user+'\t'+'zeros'+'\t'+','.join(zeros.astype(str))+'\n')
            fout.write(user+'\t'+'nozeros'+'\t'+','.join(nozeros.astype(str))+'\n')
        self.rootLogger.info('diversity distributions done for user {} ({})'.format(user,fi))


    def mean_block_distances(self,fi,n=100):

        def cos_nan(arr1,arr2):
            if np.any(np.isnan(arr1)) or np.any(np.isnan(arr2)):
                return np.nan
            else:
                return cosine(arr1,arr2)


        user = fi.split('/')[-1].split('_')[0]
        df = pd.read_pickle(fi)
        blocks = df[df['n']>=5].dropna()

        result = []
        for i in xrange(len(blocks)-n):
            first = blocks['centroid'].iloc[i]
            result.append(np.array(blocks['centroid'][i+1:i+n+1].apply(lambda val: cos_nan(val,first))))
        result = np.nanmean(np.vstack(result),0)

        with open(self.args.resultdir+user,'w') as fout:
            fout.write('\t'.join([user,'patch',','.join(result.astype(str))])+'\n')

        self.rootLogger.info('Block distances for user {} processed successfully ({})'.format(user,fi))


        # now shuffled
        # idx = np.array(blocks.index)
        # np.random.shuffle(idx)
        # blocks = blocks.reindex(idx)

        # result_random = []
        # for i in xrange(len(blocks)-n):
        #     first = blocks['centroid'].iloc[i]
        #     result_random.append(np.array(blocks['centroid'][i+1:i+n+1].apply(lambda val: cos_nan(val,first))))
        # result_random = np.nanmean(np.vstack(result_random),0)

        # with open(self.args.resultdir+user,'w') as fout:
        #     fout.write('\t'.join([user,'patch',','.join(result.astype(str))])+'\n')
        #     fout.write('\t'.join([user,'patch_random',','.join(result_random.astype(str))])+'\n')
        # self.rootLogger.info('Block distances for user {} processed successfully ({})'.format(user,fi))

    def clustering(self,fi):
        df = pd.read_pickle(fi)
        user = fi.split('/')[-1].split('_')[0]

        mask = (df['centroid'].apply(lambda arr: ~np.any(np.isnan(arr))).values)&(df['n']>=5)&(df['diversity']<=0.2)
        clust_data = df[mask].reset_index()
        arr =  np.vstack(clust_data['centroid'])
        Z = linkage(arr, 'complete')
        clusters = fcluster(Z,t=0.2,criterion='distance')
        assignments = np.repeat(np.nan,len(df))
        assignments[np.where(mask)] = clusters
        df['patch_clust'] = assignments
        df.to_pickle('{}{}.pkl'.format(self.args.resultdir,user))
        self.rootLogger.info('Patch clusters for user {} processed successfully ({})'.format(user,fi))

    def patch_len_dists(self,fi):
        df = pd.read_pickle(fi)
        user = fi.split('/')[-1][:-4]

        explore = df[np.isnan(df['patch_clust'])]
        result_explore = explore['n'].value_counts()

        df['explore'] = np.isnan(df['patch_clust']).astype(int)
        df['explore-idx'] = df['explore'].cumsum()

        result_exploit =  df.groupby('explore-idx').apply(lambda df: df.dropna()['n'].sum()).value_counts()

        result_explore = result_explore.reindex(xrange(1,max(result_explore.index)+1),fill_value=0.).values
        result_exploit = result_exploit.reindex(xrange(1,max(result_exploit.index)+1),fill_value=0.).values

        result_explore = sparse.csr_matrix(result_explore)
        result_exploit = sparse.csr_matrix(result_exploit)


        with open(self.args.resultdir+user,'w') as fout:
            fout.write(user+'\t'+'explore'+'\t'+':'.join([','.join(a.astype(str)) for a in result_explore.data,result_explore.indices,result_explore.indptr])+'\n')
            fout.write(user+'\t'+'exploit'+'\t'+':'.join([','.join(a.astype(str)) for a in result_exploit.data,result_exploit.indices,result_exploit.indptr])+'\n')
        self.rootLogger.info('User {} processed successfully ({})'.format(user,fi))



    def explore_exploit(self,fi):

        user = fi.split('/')[-1][:-4]

        df_patches_raw = pd.read_pickle(fi)

        # add time in next bout
        df_patches_raw['next_n'] = df_patches_raw['n'].shift(-1)

        # add patch values
        # listensPerPatch = df_patches_raw.groupby('patch_clust')['n'].sum()
        # overall_prop = listensPerPatch/float(df_patches_raw['n'].sum())
        # overall_prop.name = 'final_value'
        # df_patches_raw = df_patches_raw.join(overall_prop,on='patch_clust')


        """
        # time in next exploit patch as function of exploration time
        result = df_patches_raw[np.isnan(df_patches_raw['patch_clust'])].groupby('n')['next_n'].mean()

        fout.write(user+'\t'+'next-exploit-vs-explore'+'\t'+','.join(["{}:{}".format(a,b) for a,b in result.iteritems()])+'\n')
        """
        # total time exploiting as a function of time exploring
        df_patches_raw['explore'] = np.isnan(df_patches_raw['patch_clust']).astype(int)
        df_patches_raw['explore-idx'] = df_patches_raw['explore'].cumsum()

        # combine all exploit listens
        #grp_explore = df_patches_raw.groupby('explore-idx').apply(lambda df: pd.DataFrame({'n':[df['n'].iloc[0]],'n-exploit':[df['n'][1:].sum()]}))

        # only last exploit bout
        grp_explore = df_patches_raw.groupby('explore-idx').apply(lambda df: pd.DataFrame({'n':[df['n'].iloc[0]],'n-exploit':[df['n'].iloc[-1]]}))

        #result = grp_explore.groupby('n')['n-exploit'].mean()
        #fout.write(user+'\t'+'total-exploit-vs-explore'+'\t'+','.join(["{}:{}".format(a,b) for a,b in result.iteritems()])+'\n')
        """
        # exploration time as a function of exploitation time
        grp_exploit = grp_explore.copy()
        grp_exploit['n-explore'] = grp_exploit['n'].shift(-1)

        result = grp_exploit.groupby('n-exploit')['n-explore'].mean()
        fout.write(user+'\t'+'explore-vs-exploit'+'\t'+','.join(["{}:{}".format(a,b) for a,b in result.iteritems()])+'\n')
        """

        # prob exploit given explore time - already done

        # explore_only = df_patches_raw[np.isnan(df_patches_raw['patch_clust'])]
        # result = explore_only['n'][:-1].value_counts()
        # arr = result.reindex(xrange(1,max(result.index)+1),fill_value=0.).values
        # final_result = arr/(np.cumsum(arr[::-1])[::-1])
        # final_result = sparse.csr_matrix(final_result)

        # with open(self.args.resultdir+user+'_exploit','w') as fout:
        #     fout.write(user+'\t'+':'.join([','.join(a.astype(str)) for a in final_result.data,final_result.indices,final_result.indptr])+'\n')


        # prob explore given exploit time
        result = grp_explore['n-exploit'][grp_explore['n-exploit']>0].value_counts()
        arr = result.reindex(xrange(1,max(result.index)+1),fill_value=0.).values
        final_result = arr/np.cumsum(arr[::-1])[::-1]
        final_result = sparse.csr_matrix(final_result)

        with open(self.args.resultdir+user+'_explore','w') as fout:
            fout.write(user+'\t'+':'.join([','.join(a.astype(str)) for a in final_result.data,final_result.indices,final_result.indptr])+'\n')


        #fout.write(user+'\t'+'prob-explore-given-exploit'+'\t'+','.join(["{}:{}".format(a,b) for a,b in result.iteritems()])+'\n')

        """
        # patch value as a function of exploration time
        df_patches_raw['final_value_next'] = df_patches_raw['final_value'].shift(-1)
        result = df_patches_raw[np.isnan(df_patches_raw['patch_clust'])].groupby('n')['final_value_next'].mean()
        fout.write(user+'\t'+'exploit-value-vs-explore'+'\t'+','.join(["{}:{}".format(a,b) for a,b in result.iteritems()])+'\n')
        """

        self.rootLogger.info('User {} processed successfully ({})'.format(user,fi))