Пример #1
0
def test_wrap():
    x = [(-1, -2), (3, -4)]
    y = [3, 1]
    assert list(map(addabs, x)) == y

    from pathos.pools import ProcessPool as Pool
    assert Pool().map(addabs, x) == y

    from pathos.pools import ParallelPool as Pool
    assert Pool().map(addabs, x) == y
Пример #2
0
    def find_transcript_mutations(self, paths, processes=1):
        """Create a `DataFrame` of mutation counts, where the row indices are
        cell names and the column indices are gene names."""
        def init_process(aa_mutation_finder):
            global current_process_aa_mutation_finder
            current_process_aa_mutation_finder = aa_mutation_finder

        def process_cell(path):
            return (
                Path(path).stem,
                current_process_aa_mutation_finder.find_cell_gene_aa_mutations(
                    path=path))

        if processes > 1:
            with Pool(processes, initializer=init_process,
                      initargs=(self, )) as pool:
                results = list(
                    tqdm(pool.imap(process_cell, paths),
                         total=len(paths),
                         smoothing=0.01))

        else:
            init_process(self)
            results = list(map(process_cell, tqdm(paths)))

        return self._make_mutation_counts_df(results)
Пример #3
0
    def _meanfield_sgdstep_components(self,
                                      y,
                                      x,
                                      scores,
                                      prob,
                                      stepsize,
                                      nb_threads=4):

        if nb_threads == 1:
            for idx, (b, m) in enumerate(zip(self.basis, self.models)):
                b.meanfield_sgdstep(x, [_score[:, idx] for _score in scores],
                                    prob, stepsize)
                m.meanfield_sgdstep(y, x,
                                    [_score[:, idx] for _score in scores],
                                    prob, stepsize)
        else:

            def _loop(idx):
                self.basis[idx].meanfield_sgdstep(
                    x, [_score[:, idx] for _score in scores], prob, stepsize)
                self.models[idx].meanfield_sgdstep(
                    y, x, [_score[:, idx] for _score in scores], prob,
                    stepsize)

            with Pool(threads=nb_threads) as p:
                p.map(_loop, range(self.likelihood.size))
Пример #4
0
def run_count(keywords_list, url_path, use_cashed=True, process_num=8):

    if len(keywords_list) == 0: return None
    keywords_list = [
        clean_text(i)
        for i in sorted(set(keywords_list), key=keywords_list.index)
    ]
    res = [['Name', 'Org', 'Year']]
    res[0].extend(keywords_list)

    with open(url_path, 'r') as f:
        pdf_tuples_list = [i for i in yield_tuples(json.load(f))]

    keywords_list = [keywords_list for _ in pdf_tuples_list]

    errs = []
    with tqdm(total=len(pdf_tuples_list), ncols=80) as pbar:
        with Pool(process_num) as pool:
            pool_iter = pool.imap(count_keywords_in_one_pdf, pdf_tuples_list,
                                  keywords_list)
            for r in pool_iter:
                # print(r)
                if 'ERROR' in str(r): errs.append(r)
                elif r: res.append(r)  # not None
                pbar.update()

    if len(errs) != 0:
        time_now = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
        with open(f'FIND ERR_{time_now}.log', 'w',
                  encoding='utf8') as err_file:
            for i in errs:
                err_file.write(f'{i}\n')
    return res
Пример #5
0
    def test01_bgsave_stress(self):
        n_nodes = 1000
        n_iterations = 10
        conn = self.env.getConnection()
        graphs[0].query("CREATE INDEX ON :Node(val)")

        pool = Pool(nodes=5)

        t1 = pool.apipe(create_nodes, graphs[0], n_iterations, n_nodes)

        t2 = pool.apipe(delete_nodes, graphs[1], n_iterations, n_nodes / 2)

        t3 = pool.apipe(read_nodes, graphs[2], n_iterations)

        t4 = pool.apipe(update_nodes, graphs[3], n_iterations)

        t5 = pool.apipe(BGSAVE_loop, self.env, conn, 3)

        # wait for processes to join
        t1.wait()
        t2.wait()
        t3.wait()
        t4.wait()
        t5.wait()

        # make sure we did not crashed
        conn.ping()
        conn.close()
Пример #6
0
    def find_mutation_counts(self, paths, processes=4):
        """Create a `DataFrame` of mutation counts, where the row indices are
        cell names and the column indices are gene names."""
        def init_process(mutation_counter):
            global current_process_mutation_counter
            current_process_mutation_counter = mutation_counter

        def process_cell(path):
            cell_name = Path(path).stem
            return (cell_name,
                    current_process_mutation_counter.find_cell_gene_mut_counts(
                        path=path))

        if processes > 1:
            with Pool(processes, initializer=init_process,
                      initargs=(self, )) as pool:
                results = list(
                    tqdm(pool.imap(process_cell, paths),
                         total=len(paths),
                         smoothing=0.01))
        else:
            results = list(map(process_cell, tqdm(paths)))

        cell_genemuts_pairs, filtered_cell_genmuts_pairs = [], []

        for cell_name, (mutation_counts, filtered_mutation_counts) in results:
            cell_genemuts_pairs.append((cell_name, mutation_counts))
            filtered_cell_genmuts_pairs.append(
                (cell_name, filtered_mutation_counts))

        return (
            self._make_mutation_counts_df(cell_genemuts_pairs),
            self._make_mutation_counts_df(filtered_cell_genmuts_pairs),
        )
Пример #7
0
    def clear_stack(cls):

        cpu = cpu_count()
        pool = Pool(cpu)

        def classify_set(c, pd, pc):

            pd = Directory.classify_product_auto(config, pd)
#           if product.part_id:
            pc.product = pd
            Directory.set_price(pc)
#           else:
#               manuals.append((config, product, price))

#       manuals = []

        for config, product, price in cls.STACK:

            pool.apply_async(classify_set, args=(config, product, price))

#       for config, product, price in manuals:
#           product = Directory.classify_product_manual(config, product)
#           if product.part_id:
#               price.product = product
#               Directory.set_price(price)
#           else:
#               Directory.set_product(product)

        cls.STACK = []
Пример #8
0
    def test_calc_uncertainty_pool_pass(self):
        """Test parallel compute the uncertainty distribution for an impact"""

        exp_unc, impf_unc, _ = make_input_vars()
        haz = haz_dem()
        unc_calc = CalcImpact(exp_unc, impf_unc, haz)
        unc_data = unc_calc.make_sample(N=2)

        pool = Pool(nodes=2)
        try:
            unc_data = unc_calc.uncertainty(unc_data,
                                            calc_eai_exp=False,
                                            calc_at_event=False,
                                            pool=pool)
        finally:
            pool.close()
            pool.join()
            pool.clear()

        self.assertEqual(unc_data.unit, exp_dem().value_unit)
        self.assertListEqual(unc_calc.rp, [5, 10, 20, 50, 100, 250])
        self.assertEqual(unc_calc.calc_eai_exp, False)
        self.assertEqual(unc_calc.calc_at_event, False)

        self.assertEqual(unc_data.aai_agg_unc_df.size, unc_data.n_samples)
        self.assertEqual(unc_data.tot_value_unc_df.size, unc_data.n_samples)

        self.assertEqual(unc_data.freq_curve_unc_df.size,
                         unc_data.n_samples * len(unc_calc.rp))
        self.assertTrue(unc_data.eai_exp_unc_df.empty)
        self.assertTrue(unc_data.at_event_unc_df.empty)
Пример #9
0
    def test_set_one_file_pass(self):
        """ Test set function set_from_tracks with one input."""

        pool = Pool()

        tc_track = TCTracks(pool)

        tc_track.read_processed_ibtracs_csv(TEST_TRACK)
        tc_track.calc_random_walk()
        tc_track.equal_timestep()

        tc_haz = TropCyclone(pool)
        tc_haz.set_from_tracks(tc_track, CENTR_TEST_BRB)
        tc_haz.check()

        pool.close()
        pool.join()

        self.assertEqual(tc_haz.tag.haz_type, 'TC')
        self.assertEqual(tc_haz.tag.description, '')
        self.assertEqual(tc_haz.units, 'm/s')
        self.assertEqual(tc_haz.centroids.size, 296)
        self.assertEqual(tc_haz.event_id.size, 10)
        self.assertTrue(isinstance(tc_haz.intensity, sparse.csr.csr_matrix))
        self.assertTrue(isinstance(tc_haz.fraction, sparse.csr.csr_matrix))
        self.assertEqual(tc_haz.intensity.shape, (10, 296))
        self.assertEqual(tc_haz.fraction.shape, (10, 296))
Пример #10
0
def UQ(start, end, lower, upper):
    #from pathos.pools import ProcessPool as Pool
    from pathos.pools import ThreadPool as Pool
    #from pool_helper import func_pickle  # if fails to pickle, try using a helper

    # run optimizer for each subdiameter
    lb = [lower + [lower[i]] for i in range(start, end + 1)]
    ub = [upper + [upper[i]] for i in range(start, end + 1)]
    nb = [nbins[:] for i in range(start, end + 1)]
    for i in range(len(nb)):
        nb[i][-1] = nb[i][i]
    cf = [costFactory(i) for i in range(start, end + 1)]
    #cf = [func_pickle(i)     for i in cf]
    #cf = [cost.name          for cost in cf]
    nnodes = len(lb)

    #construct cost function and run optimizer
    results = Pool(nnodes).map(optimize, cf, lb, ub, nb)
    #print("results = %s" % results)

    results = list(zip(*results))
    diameters = list(results[0])
    function_evaluations = list(results[1])

    total_func_evals = sum(function_evaluations)
    total_diameter = sum(diameters)

    print("subdiameters (squared): %s" % diameters)
    print("diameter (squared): %s" % total_diameter)
    print("func_evals: %s => %s" % (function_evaluations, total_func_evals))

    return total_diameter
Пример #11
0
def optimize(cost,lb,ub):
  from pathos.pools import ProcessPool as Pool
  from mystic.solvers import DifferentialEvolutionSolver2
  from mystic.termination import CandidateRelativeTolerance as CRT
  from mystic.strategy import Best1Exp
  from mystic.monitors import VerboseMonitor, Monitor
  from mystic.tools import random_seed

  random_seed(123)

 #stepmon = VerboseMonitor(100)
  stepmon = Monitor()
  evalmon = Monitor()

  ndim = len(lb) # [(1 + RVend) - RVstart] + 1

  solver = DifferentialEvolutionSolver2(ndim,npop)
  solver.SetRandomInitialPoints(min=lb,max=ub)
  solver.SetStrictRanges(min=lb,max=ub)
  solver.SetEvaluationLimits(maxiter,maxfun)
  solver.SetEvaluationMonitor(evalmon)
  solver.SetGenerationMonitor(stepmon)
  solver.SetMapper(Pool().map)

  tol = convergence_tol
  solver.Solve(cost,termination=CRT(tol,tol),strategy=Best1Exp, \
               CrossProbability=crossover,ScalingFactor=percent_change)

  print("solved: %s" % solver.bestSolution)
  scale = 1.0
  diameter_squared = -solver.bestEnergy / scale  #XXX: scale != 0
  func_evals = solver.evaluations
  return diameter_squared, func_evals
Пример #12
0
    def test_09_concurrent_multiple_readers_after_big_write(self):
        # Test issue #890
        redis_graphs = []
        for i in range(0, CLIENT_COUNT):
            redis_con = self.env.getConnection()
            redis_graphs.append(Graph("G890", redis_con))
        redis_graphs[0].query(
            """UNWIND(range(0,999)) as x CREATE()-[:R]->()""")
        read_query = """MATCH (n)-[r:R]->(m) RETURN n, r, m"""

        queries = [read_query] * CLIENT_COUNT
        pool = Pool(nodes=CLIENT_COUNT)

        # invoke queries
        m = pool.amap(thread_run_query, redis_graphs, queries)

        # wait for processes to return
        m.wait()

        # get the results
        result = m.get()

        for i in range(CLIENT_COUNT):
            if isinstance(result[i], str):
                self.env.assertIsNone(result[i])
            else:
                self.env.assertEquals(1000, len(result[i].result_set))
Пример #13
0
    def test01_bgsave_stress(self):
        n_reads = 50000
        n_creations = 50000
        n_updates = n_creations / 10
        n_deletions = n_creations / 2

        conn = self.env.getConnection()
        graphs[0].query("CREATE INDEX FOR (n:Node) ON (n.v)")

        pool = Pool(nodes=5)

        t1 = pool.apipe(create_nodes, graphs[0], n_creations)

        t2 = pool.apipe(delete_nodes, graphs[1], n_deletions)

        t3 = pool.apipe(read_nodes, graphs[2], n_reads)

        t4 = pool.apipe(update_nodes, graphs[3], n_updates)

        t5 = pool.apipe(BGSAVE_loop, self.env, conn, 10000)

        # wait for processes to join
        t1.wait()
        t2.wait()
        t3.wait()
        t4.wait()
        t5.wait()

        # make sure we did not crashed
        conn.ping()
        conn.close()
Пример #14
0
    def test_07_concurrent_write_rename(self):
        # Test setup - validate that graph exists and possible results are None
        graphs[0].query("MATCH (n) RETURN n")

        pool = Pool(nodes=1)
        redis_con = self.env.getConnection()
        new_graph = GRAPH_ID + "2"
        # Create new empty graph with id GRAPH_ID + "2"
        redis_con.execute_command("GRAPH.QUERY", new_graph,
                                  """MATCH (n) return n""", "--compact")
        heavy_write_query = """UNWIND(range(0,999999)) as x CREATE(n)"""
        writer = pool.apipe(thread_run_query, graphs[0], heavy_write_query)
        redis_con.rename(GRAPH_ID, new_graph)
        writer.wait()
        # Possible scenarios:
        # 1. Rename is done before query is sent. The name in the graph context is new_graph, so when upon commit, when trying to open new_graph key, it will encounter an empty key since new_graph is not a valid key.
        #    Note: As from https://github.com/RedisGraph/RedisGraph/pull/820 this may not be valid since the rename event handler might actually rename the graph key, before the query execution.
        # 2. Rename is done during query executing, so when commiting and comparing stored graph context name (GRAPH_ID) to the retrived value graph context name (new_graph), the identifiers are not the same, since new_graph value is now stored at GRAPH_ID value.

        possible_exceptions = [
            "Encountered different graph value when opened key " + GRAPH_ID,
            "Encountered an empty key when opened key " + new_graph
        ]

        result = writer.get()
        if isinstance(result, str):
            self.env.assertContains(result, possible_exceptions)
        else:
            self.env.assertEquals(1000000, result.nodes_created)
Пример #15
0
def test_pp():
    from pathos.pools import ParallelPool as Pool
    pool = Pool(nodes=4)
    check_sanity(pool)
    check_maps(pool, items, delay)
    check_dill(pool)
    check_ready(pool, maxtries, delay, verbose=False)
def main(argv):
    print('Input/Output data folder: ', DATA_DIR)

    # set parallel processing
    pool = Pool()

    # exposures
    expo_dict = calc_exposure(DATA_DIR)

    # tracks
    sel_tr = calc_tracks(DATA_DIR, pool)

    # dictionary of tc per island
    tc_dict = calc_tc(expo_dict, sel_tr, DATA_DIR, pool)

    # damage per isl
    imp_dict = calc_imp(expo_dict, tc_dict, DATA_DIR)

    # damage irma
    get_irma_damage(imp_dict)

    # average annual impact
    aai_isl(imp_dict)

    # compute impact exceedance frequency
    get_efc_isl(imp_dict)

    # FIG03 and FIG04
    fig03_fig04(DATA_DIR, FIG_DIR)  # 5min

    # FIG 06
    fig06(DATA_DIR, FIG_DIR)

    pool.close()
    pool.join()
Пример #17
0
 def run_N(self,
           nb_execution=10,
           loop=100,
           grphq=False,
           pas=10,
           duration_gif=0.5):
     """
     Exécute N itération de l'algorithme des k-means, et conserve les centres qui produisent le moins d'erreur.
     Chaque itération est produite à partir de centres initiaux aléatoires, donc les résultats sont différents à chaque fois.
     Retourne cette erreur minimale.
     Les paramètres d'entrée sont les même que pour run, avec l'ajout de :
         nb_execution : entier désignant le nombre de calcul de k-means à faire.
     """
     f = partial(self.__k_run, loop=loop, grphq=grphq, pas=pas)
     pool = Pool(self.cpu)
     memory = list(pool.uimap(f, range(nb_execution)))
     pool.close()
     pool.join()
     ind = np.argmin(np.array([m[0] for m in memory]))
     means = memory[ind][1]
     self.means = means
     self.calc_grp()
     if grphq: self.grphq.create_gif(duration=duration_gif)
     del pool
     return memory[ind][0]
Пример #18
0
    def test_calc_uncertainty_pool_pass(self):
        """Test compute the uncertainty distribution for an impact"""

        ent_iv, _ = make_costben_iv()
        _, _, haz_iv = make_input_vars()
        unc_calc = CalcCostBenefit(haz_iv, ent_iv)
        unc_data = unc_calc.make_sample(N=2)

        pool = Pool(n=2)
        try:
            unc_data = unc_calc.uncertainty(unc_data, pool=pool)
        finally:
            pool.close()
            pool.join()
            pool.clear()

        self.assertEqual(unc_data.unit, ent_dem().exposures.value_unit)

        self.assertEqual(unc_data.tot_climate_risk_unc_df.size,
                         unc_data.n_samples)
        self.assertEqual(
            unc_data.cost_ben_ratio_unc_df.size,
            unc_data.n_samples * 4  #number of measures
        )
        self.assertEqual(unc_data.imp_meas_present_unc_df.size, 0)
        self.assertEqual(
            unc_data.imp_meas_future_unc_df.size,
            unc_data.n_samples * 4 * 5  #All measures 4 and risks/benefits 5
        )
Пример #19
0
    def test_08_concurrent_write_replace(self):
        # Test setup - validate that graph exists and possible results are None
        self.graph.query("MATCH (n) RETURN n")

        pool = Pool(nodes=1)
        heavy_write_query = """UNWIND(range(0,999999)) as x CREATE(n) RETURN count(n)"""
        writer = pool.apipe(thread_run_query, heavy_write_query, None)
        set_result = self.conn.set(GRAPH_ID, "1")
        writer.wait()
        possible_exceptions = [
            "Encountered a non-graph value type when opened key " + GRAPH_ID,
            "WRONGTYPE Operation against a key holding the wrong kind of value"
        ]

        result = writer.get()
        if isinstance(result, str):
            # If the SET command attempted to execute while the CREATE query was running,
            # an exception should have been issued.
            self.env.assertContains(result, possible_exceptions)
        else:
            # Otherwise, both the CREATE query and the SET command should have succeeded.
            self.env.assertEquals(1000000, result.result_set[0][0])
            self.env.assertEquals(set_result, True)

        # Delete the key
        self.conn.delete(GRAPH_ID)
Пример #20
0
def run_concurrent(env, queries, f):
    pool = Pool(nodes=CLIENT_COUNT)

    # invoke queries
    result = pool.map(f, graphs, queries)

    # validate all process return true
    env.assertTrue(all(result))
Пример #21
0
    def test_04_concurrent_delete(self):
        pool = Pool(nodes=CLIENT_COUNT)

        # invoke queries
        assertions = pool.map(delete_graph, graphs)

        # Exactly one thread should have successfully deleted the graph.
        self.env.assertEquals(assertions.count(True), 1)
Пример #22
0
def run_concurrent(queries, f):
    pool = Pool(nodes=CLIENT_COUNT)
    manager = pathos_multiprocess.Manager()

    barrier = manager.Barrier(CLIENT_COUNT)
    barriers = [barrier] * CLIENT_COUNT

    # invoke queries
    return pool.map(f, queries, barriers)
Пример #23
0
def map_parallel_progress(func, items: List[Any],
                          processes: int = cpu_count()):
    with Pool(processes=processes) as pool:
        with tqdm.tqdm(total=len(items)) as pbar:
            results = []
            for result in pool.imap(func, items):
                pbar.update()
                results.append(result)
            return results
Пример #24
0
    def test_10_write_starvation(self):
        # make sure write query do not starve
        # when issuing a large number of read queries
        # alongside a single write query
        # we dont want the write query to have to wait for
        # too long, consider the following sequence:
        # R, W, R, R, R, R, R, R, R...
        # if write is starved our write query might have to wait
        # for all queued read queries to complete while holding
        # Redis global lock, this will hurt performance
        #
        # this test issues a similar sequence of queries and
        # validates that the write query wasn't delayed too much

        self.graph = Graph(self.conn, GRAPH_ID)
        pool = Pool(nodes=CLIENT_COUNT)

        Rq = "UNWIND range(0, 10000) AS x WITH x WHERE x = 9999 RETURN 'R', timestamp()"
        Wq = "UNWIND range(0, 1000) AS x WITH x WHERE x = 27 CREATE ({v:1}) RETURN 'W', timestamp()"
        Slowq = "UNWIND range(0, 100000) AS x WITH x WHERE (x % 73) = 0 RETURN count(1)"

        # issue a number of slow queries, this will give us time to fill up
        # RedisGraph internal threadpool queue
        queries = [Slowq] * CLIENT_COUNT * 5
        nulls = [None] * CLIENT_COUNT * 5

        # issue queries asynchronously
        pool.imap(thread_run_query, queries, nulls)

        # create a long sequence of read queries
        queries = [Rq] * CLIENT_COUNT * 10
        nulls = [None] * CLIENT_COUNT * 10

        # inject a single write query close to the begining on the sequence
        queries[CLIENT_COUNT] = Wq

        # invoke queries
        # execute queries in parallel
        results = pool.map(thread_run_query, queries, nulls)

        # count how many queries completed before the write query
        count = 0
        write_ts = results[CLIENT_COUNT]["result_set"][0][1]
        for result in results:
            row = result["result_set"][0]
            ts = row[1]
            if ts < write_ts:
                count += 1

        # make sure write query wasn't starved
        self.env.assertLessEqual(count, len(queries) * 0.3)

        # delete the key
        self.conn.delete(GRAPH_ID)
Пример #25
0
    def test00_stress(self):
        ids = range(self.client_count)
        pool = Pool(nodes=self.client_count)

        # invoke queries
        pool.map(query_crud, graphs, ids)

        # make sure we did not crashed
        conn = self.env.getConnection()
        conn.ping()
        conn.close()
Пример #26
0
def main(query=DEFAULT_QUERY,
         number_of_processes=DEFAULT_PROCESSES,
         base_dir=DEFAULT_BASE_DIR,
         file=None):
    saver = DocSaver(base_dir)
    pool = Pool(number_of_processes)
    if file:
        docs = docs_from_file(file)
    else:
        docs = docs_from_query(query)
    download(docs, pool, saver)
Пример #27
0
    def test_read_raster_pool_pass(self):
        """Test from_raster constructor with pool"""
        from pathos.pools import ProcessPool as Pool
        pool = Pool()
        haz_fl = Hazard.from_raster([HAZ_DEMO_FL], haz_type='FL', pool=pool)
        haz_fl.check()

        self.assertEqual(haz_fl.intensity.shape, (1, 1032226))
        self.assertEqual(haz_fl.intensity.min(), -9999.0)
        self.assertAlmostEqual(haz_fl.intensity.max(), 4.662774085998535)
        pool.close()
        pool.join()
Пример #28
0
def liste_machines_allumees(machines):
    machinesTemp = []
    pool = Pool(nodes=10)
    machinesTemp = pool.map(recherche, machines)

    #On ne garde que les machines allumees
    machinesAllumees = []
    for val in machinesTemp:
        if val != None:
            machinesAllumees.append(val)

    return machinesAllumees
Пример #29
0
def parallel_ilr_inference(nb_jobs=50, **kwargs):
    kwargs_list = []
    for n in range(nb_jobs):
        kwargs['seed'] = n
        kwargs_list.append(kwargs.copy())

    with Pool(processes=min(nb_jobs, nb_cores),
              initializer=tqdm.set_lock,
              initargs=(tqdm.get_lock(), )) as p:
        res = p.map(_job, kwargs_list)

    return res
Пример #30
0
 def test05_index_delete(self):
     def create_drop_index(graph_id):
         env = Env(decodeResponses=True)
         redis_con = env.getConnection()
         for _ in range(1, 100):
             pipe = redis_con.pipeline()
             pipe.execute_command("GRAPH.QUERY", f"x{graph_id}", "CREATE (a:L), (n:L), (n)-[:T]->(a)")
             pipe.execute_command("GRAPH.QUERY", f"x{graph_id}", "CREATE INDEX FOR ()-[n:T]-() ON (n.p)")
             pipe.execute()
             redis_con.execute_command("GRAPH.DELETE", f"x{graph_id}")
     pool = Pool(nodes=10)
     pool.map(create_drop_index, range(1, 100))