Exemplo n.º 1
0
def main(output_fname):
    nparams = args.Nparams
    params = np.array((0.73, 14.09, 0.32, 13.27, 13.004))
    params = params * np.ones((nparams, 5))

    nproc = 55

    global model
    model = standard_hod_model()

    output_dict = collections.defaultdict(list)

    with Pool(nproc) as pool:
        for i, output_data in enumerate(pool.map(calc_all_observables,
                                                 params)):
            if i % 55 == 54:
                print i
                print str(datetime.now())
            for name, data in zip(output_names, output_data):
                output_dict[name].append(data)

    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(output_fname, **output_dict)
Exemplo n.º 2
0
def main(model_gen_func, fiducial, output_fname):
    global model
    model = model_gen_func()

    params = np.array(fiducial)*np.ones((args.Nparam,7))
    
    output_dict = collections.defaultdict(list)
    nproc = args.nproc
    
    global halocat
    
    with Pool(nproc) as pool:
        if 1:
            halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \
                                halo_finder = args.halofinder)
            model.populate_mock(halocat)
            for i, output_data in enumerate(pool.map(calc_all_observables, params)):
                if 1:
                    print i
                    print str(datetime.now())
                for name, data in zip(output_names, output_data):
                    output_dict[name].append(data)
    
    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(output_fname, **output_dict)
Exemplo n.º 3
0
def get_rows(stack: List[Column], range_: range) -> np.ndarray:
    if _MULTIPROCESSING:
        with Pool(psutil.cpu_count(logical=False)) as pool:
            return np.column_stack(
                list(pool.map(get_col, [(col, range_) for col in stack])))
    else:
        return np.column_stack([col[range_] for col in stack])
Exemplo n.º 4
0
def get_trace():
    tunnel = np.loadtxt('../data/tunnel.txt', delimiter=',')
    n, m = tunnel.shape[:]
    mat = np.zeros([n, n])
    work_list = []

    MAXN = n
    args = []
    for i in range(MAXN):
        x0, y0, x1, y1 = tunnel[i][:]
        a_list = [[x0, y0], [x1, y1]]
        work_list.append(a_list)
        args.append(tunnel)
        a = xy_radian(x0, y0, x1, y1)
        # ret = proc(a_list, tunnel)
        # for j in range(len(ret)):
        #     mat[i][j] = ret[j]

    with Pool(max_workers=4) as pool:
        results = pool.map(proc, work_list, args)
        for i, ret in enumerate(results):
            for j in range(len(ret)):
                mat[i][j] = ret[j]

    np.savetxt("./data/dist.txt", mat, fmt="%.2f")
Exemplo n.º 5
0
def do_socket_find_toll():
    # 扫描范围,添加新IP,'新IP,这里是IP的前三位'
    # all_range = ['10.34.22'] #Wu Zhuang
    all_range = ['10.134.35']  #Chu Zhou Dong
    # 默认0
    range_start = 0
    # 默认255
    range_end = 255
    core_max_workers = 200
    find_result = []
    find_ips = []
    pool = Pool(max_workers=core_max_workers)
    for ip_statement in all_range:
        for find_ip in range(range_start, range_end):
            find_ips.append("{}.{}".format(ip_statement, find_ip))
    find_sum = len(find_ips)
    find_success = 0
    find_error = 0
    print("scan length:{}".format(find_sum))
    for result in pool.map(connect_toll, find_ips):
        if result is not None:
            find_success += 1
            find_result.append(result)
        else:
            find_error += 1
    print("需要扫描:{}个IP".format(find_sum))
    print("有效IP:{}个".format(find_success))
    print("无效IP:{}个".format(find_error))
    return find_result
def add_spectra(analogue, spectra, temperature_parser):
    # Pre-download using multiple threads
    with Pool(max_workers=pool_size) as e:
        for spectrum in e.map(download, spectra):
            temperature = float(temperature_parser(spectrum))
            add_spectrum(analogue.id, spectrum, temperature)
    db.session.commit()
Exemplo n.º 7
0
def stat_multi_vcf(vcf_lst, target_genes, p_num=5):
    vcfs = list()
    with open(vcf_lst) as f:
        for line in f:
            vcfs.append(line.strip())

    results = list()
    with Pool(p_num) as executor:
        for vcf in vcfs:
            future = executor.submit(target_mutate_status, vcf, target_genes)
            results.append(future)

    merge_result = dict()
    for each in results:
        merge_result.update(each.result())

    df = pd.DataFrame(merge_result)
    df.index.name = 'Genes'
    gene_mutated_ratio = df.apply(
        lambda x: round(sum(bool(v) for v in x) / df.shape[0], 3), axis=0)
    df.loc['mutated_ratio'] = gene_mutated_ratio
    sample_mutated_ratio = df.apply(
        lambda x: round(sum(bool(v) for v in x) / df.shape[1], 3), axis=1)
    df['mutated_ratio'] = sample_mutated_ratio
    order = sorted(df.index)
    df = df.loc[order]
    df.to_csv('stats.csv')
Exemplo n.º 8
0
    def __init__(self, args):
        # current number of processes running
        self.n_workers = 0

        # index of the next dataset to run
        self.dataset_index = 0

        self.exp_args = args

        if self.exp_args.regression:
            if self.exp_args.debug:
                # self.datasets = ['1595_poker', '537_houses', '215_2dplanes', '1096_FacultySalaries']
                self.datasets = [
                    '1096_FacultySalaries', '192_vineyard',
                    '690_visualizing_galaxy', '665_sleuth_case2002',
                    '485_analcatdata_vehicle'
                ]
            else:
                # just using the ones with small number of features for regression, since there is a lot anyway
                self.datasets = small_continuous_regression
        else:
            if self.exp_args.debug:
                self.datasets = [
                    'banana', 'appendicitis', 'diabetes', 'titanic'
                ]
            else:
                self.datasets = full_continuous

        self.n_datasets = len(self.datasets)

        if not os.path.exists(self.exp_args.folder):
            os.makedirs(self.exp_args.folder)

        self.pool = Pool(max_workers=self.exp_args.max_workers)
Exemplo n.º 9
0
def restore_data(target_path, threads=0):
    """
    对指定的路径中的所有文件进行还原提取
    :param target_path: such as s3://epionengs/80011001_HCC_Metastase/Methylation/
    :param threads: thread number, 为0时不使用多线程,默认不使用,因为命令执行很快完成,aws会自动并发还原,无需等待
    :return:
    """

    print('还原该目录: ')
    os.system(f'aws s3 ls --recursive {target_path} > all.files')
    paths = [re.split('\s+', x, 3)[3] for x in open('all.files')]

    with open('all.files.path', 'w') as f:
        _ = [f.write(x) for x in paths]

    with open('restore.cmd.list', 'w') as f:
        cmds = []
        for path in paths:
            cmd = 'aws s3api restore-object '
            cmd += '--bucket epionengs '
            cmd += '--key "{}" '.format(path.strip())
            cmd += """--restore-request '{"Days":2,"GlacierJobParameters":{"Tier":"Standard"}}' """
            cmds.append(cmd)
            f.write(cmd + '\n')

    if threads:
        with Pool(threads) as pool:
            pool.map(run_cmd, cmds)
    else:
        for each in cmds:
            subprocess.check_call(each, shell=True)
Exemplo n.º 10
0
def test_concurrent_reads(tmp_path: Path, fuzz_cachew_impl):
    cache_path = tmp_path / 'cache.sqlite'
    from concurrent.futures import ProcessPoolExecutor as Pool

    count = 10
    # warm up
    _concurrent_helper(cache_path, count, sleep_s=0)

    processes = 4

    import time
    start = time.time()
    with Pool() as pool:
        futures = [
            pool.submit(_concurrent_helper, cache_path, count, 1)
        for _ in range(processes)]

        for f in futures:
            print(f.result())
    end = time.time()

    taken = end - start
    # should be pretty instantaneous
    # if it takes more, most likely means that helper was called again
    assert taken < 5
Exemplo n.º 11
0
def plot_all(limit=None):
    from bokeh.io import export_png, export_svgs, save
    # todo add min/max date?
    # todo multiple threads?

    # todo extract this mock data into HPI
    # use some python library to generate it
    real = True
    if real:
        df = locations_dataframe()
    else:
        # todo move to hpi or something
        idf = pd.DataFrame([{
            'dt':
            datetime.strptime('20200101', '%Y%m%d') +
            timedelta(minutes=30 * x),
            'lat':
            max((0.01 * x) % 90, 0.1),
            'lon':
            max((0.01 * x) % 90, 0.1),
        } for x in range(1, 1000)])
        df = idf.set_index('dt')

    # todo make defensive, collect errors
    def process(day_and_grp):
        day, grp = day_and_grp
        # todo uhoh. chromedriver might die?
        days = day.strftime('%Y%m%d')

        fname = f'output/{days}.png'

        # TODO shit. 20170402 -- float division by zero..
        p = plot(day=days, df=grp)
        if days <= '20170403':
            print(f'skipping {fname}')
            return

        print(f'saving {fname}')
        if True:
            export_png(p, filename=fname)
        else:
            # hmm, this doesn't produce the background (map). but faster to dump?
            p.output_backend = 'svg'
            export_svgs(p, filename=fname)

    inputs = [(day, grp) for day, grp in df.groupby(lambda x: x.date())]

    # todo ugh. pretty sure it's resulting in race conditions... (probably because of shared chromedriver?) need to test it properly
    from concurrent.futures import ThreadPoolExecutor as Pool
    # todo and process pool executor just gets stuck?

    parallel = False

    if parallel:
        with Pool() as pool:
            for _ in pool.map(process, inputs):
                pass
    else:
        for _ in map(process, inputs):
            pass
Exemplo n.º 12
0
def my_test_1():
    # step_list = np.pi * np.linspace(0.001, 0.5, 50)
    step_list = np.pi * np.linspace(0.001, 0.05, 30)

    pool = Pool(max_workers=3)
    infor_list = pool.map(run_many_times, step_list)  # 使用多进程改进

    # infor_list = [run_many_times(step) for step in step_list]

    infor_list = sorted(list(infor_list))
    print(infor_list)

    with open('mydata2.csv', 'w') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'theta', 'avg', 'median', 'best', 'global_best', 'better_count',
            'first_better_generation', 'best_in_many_times',
            'best_x_in_many_times', 'best_y_in_many_times',
            'best_chrom_in_many_times'))

    for s, d in infor_list:
        for k, v in d.items():
            print(k, v)
        with open('mydata2.csv', 'a') as f:
            f.write("{},{},{},{},{},{},{},{},{},{},{}\n".format(
                s.replace('pi', ''), d['avg_fitness'], d['median_fitness'],
                d['best_fitness'], d['global_best_fitness'],
                d['better_fitness_count'],
                d['first_better_fitness_generation'],
                d['best_fitness_in_many_times'],
                d['best_fitness_solution_in_many_times'][0],
                d['best_fitness_solution_in_many_times'][1],
                d['best_fitness_chrom_in_many_times']))
Exemplo n.º 13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('package', type=str, nargs='+')
    parser.add_argument('--target',
                        type=str,
                        choices=SEMVER.keys(),
                        default='major')
    parser.add_argument('--commit',
                        action='store_true',
                        help='Create a commit for each package update')

    args = parser.parse_args()
    target = args.target

    packages = list(map(os.path.abspath, args.package))

    logging.info("Updating packages...")

    # Use threads to update packages concurrently
    with Pool() as p:
        results = list(p.map(lambda pkg: _update(pkg, target), packages))

    logging.info("Finished updating packages.")

    # Commits are created sequentially.
    if args.commit:
        logging.info("Committing updates...")
        list(map(lambda x: _commit(**x), filter(bool, results)))
        logging.info("Finished committing updates")

    count = sum(map(bool, results))
    logging.info("{} package(s) updated".format(count))
Exemplo n.º 14
0
def batch_graphs(smis: Iterable[str], minibatch_size: int = 50,
                 n_workers: int = 1) -> Iterator[BatchMolGraph]:
    """Generate BatchMolGraphs from the SMILES strings

    Uses parallel processing to buffer a chunk of BatchMolGraphs into memory,
    where the chunksize is equal to the number of workers available. Only
    prepares one chunk at a time due to the exceedingly large memory footprint 
    of a BatchMolGraph

    Parameters
    ----------
    smis : Iterable[str]
        the SMILES strings from which to generate BatchMolGraphs
    minibatch_size : int
        the number of molecular graphs in each BatchMolGraph
    n_workers : int
        the number of workers to parallelize BatchMolGraph preparation over
    
    Yields
    ------
    BatchMolGraph
        a batch of molecular graphs of size <minibatch_size>
    """
    # need a dataset if we're going to use features
    # test_data = MoleculeDataset([
    #     MoleculeDatapoint(smiles=smi,) for smi in smis
    # ])
    chunksize = minibatch_size*n_workers
    with Pool(max_workers=n_workers) as pool:
        for chunk_smis in batches(smis, chunksize):
            smis_minibatches = list(batches(chunk_smis, minibatch_size))
            for batch_graph in pool.map(mol2graph, smis_minibatches):
                yield batch_graph
Exemplo n.º 15
0
def test_sqlite_read_with_wal(tmp_path: Path) -> None:
    db = tmp_path / 'db.sqlite'
    # write a bit
    with sqlite3.connect(str(db)) as conn:
        conn.execute('CREATE TABLE testtable (col)')
        for i in range(5):
            conn.execute('INSERT INTO testtable (col) VALUES (?)', str(i))

    # write more in WAL mode
    with sqlite3.connect(str(db)) as conn_db:
        conn.execute('PRAGMA journal_mode=wal;')
        for i in range(5, 10):
            conn_db.execute('INSERT INTO testtable (col) VALUES (?)', str(i))
        conn_db.execute('COMMIT')

        # make sure it has unflushed stuff in wal
        wals = list(db.parent.glob('*-wal'))
        assert len(wals) == 1

        ## now run the tests in separate process to ensure there is no potential for reusing sqlite connections or something
        from concurrent.futures import ProcessPoolExecutor as Pool
        with Pool(1) as pool:
            # merely using it for ctx manager..
            pool.submit(_test_do_copy         , db).result()
            pool.submit(_test_do_immutable    , db).result()
            pool.submit(_test_do_copy_and_open, db).result()
            pool.submit(_test_open_asis       , db).result()
def calculateCorrelationsToCentroidInParallel(voxelIndices,
                                              allVoxelTs,
                                              centroidIndices,
                                              nCPUs=5):
    """
    A wrapper function for calculating the correlation to ROI centroid in parallel across
    ROIs.
    
    Parameters:
    -----------
    voxelIndices: list of np.arrays, each array containing indices of voxels of one ROI; 
               these indices should refer to voxels' 
               locations in the file containing voxel time series; note that the chunk
               must contain at least one voxel
    allVoxelTs: structured np.array with a field name 'roi_voxel_ts' (and possible additional 
                fields), this field contains voxel time series
    centroidIndices: np.array of indices of ROI centroids in allVoxelTs
    nCPUs = int, number of CPUs to be used for the parallel computing (default = 5)
    
    
    Returns:
    --------
    correlationsToCentroid: list of doubles, correlations of the voxel ts defined
                          by voxelIndices to the ROICentroids
    """
    cfg = {'allVoxelTs': allVoxelTs}
    paramSpace = [(cfg, {
        'voxelIndices': voxelInd,
        'centroidIndex': centroidIndex
    }) for voxelInd, centroidIndex in zip(voxelIndices, centroidIndices)]
    pool = Pool(max_workers=nCPUs)
    correlationsToCentroid = list(
        pool.map(calculateCorrelationToCentroid, paramSpace, chunksize=1))
    return correlationsToCentroid
Exemplo n.º 17
0
def fetch_uniprot_data(in_csv, out_csv, protein_ids_to_ignore=None):
    protein_ids_to_ignore = (frozenset(protein_ids_to_ignore)
                             if protein_ids_to_ignore else frozenset())
    with open(in_csv, "r") as in_file:
        # write header
        with open(out_csv, "w") as out_file:
            w = csv.writer(out_file)
            w.writerow(["protein_id", "FASTA header", "FASTA seq"])
        # query uniprot
        with Pool(max_workers=8) as executor:
            for line in in_file.readlines():
                line = line.strip()
                if line:
                    # only one column
                    protein_ids = frozenset(x.strip() for x in line.split(
                        ";")).difference(protein_ids_to_ignore)
                    results = get_fastas(executor, protein_ids)
                    with open(out_csv, "a") as out_file:
                        w = csv.writer(out_file)
                        for protein_id, fasta in results:
                            if fasta:
                                fasta = fasta.split("\n")
                                f_head = fasta[0]
                                f_seq = "".join(fasta[1:])
                                w.writerow([protein_id, f_head, f_seq])
                            else:
                                w.writerow([protein_id, "", ""])
    def run_multithreaded(self):
        bodies = self.get_bodies()
        self.bodies_multithread(bodies)

        with Pool(self.threadcount) as executor:
            self.logger.info("Submitting concurrent tasks")
            futures = {}
            for body in bodies:
                future = executor.submit(self.list_caught, body.get_paper, self.paper)
                futures[future] = body.get_short_name() or body.get_name() + ": Paper"
                future = executor.submit(self.list_caught, body.get_person, self.person)
                futures[future] = body.get_short_name() or body.get_name() + ": Person"
                future = executor.submit(self.list_caught, body.get_organization, self.organization)
                futures[future] = body.get_short_name() or body.get_name() + ": Organization"
                future = executor.submit(self.list_caught, body.get_meeting, self.meeting)
                futures[future] = body.get_short_name() or body.get_name() + ": Meeting"
            self.logger.info("Finished submitting concurrent tasks")
            for future in concurrent.futures.as_completed(futures):
                err_count = future.result()
                if err_count == 0:
                    self.logger.info("Finished Successfully: {}".format(futures[future]))
                else:
                    self.logger.info("Finished with {} errors: {}".format(err_count, futures[future]))

        self.logger.info("Finished creating objects")
        self.add_missing_associations()

        for i in self.errorlist:
            self.logger.error(i)
Exemplo n.º 19
0
    def __init__(self, raw_args=None):
        self.args, self.unknowns = parser.parse_known_args(raw_args)
        os.environ["CLAIMER"] = self.args.claimer_id
        self.benchmark_downloader = DownloadBenchmarks(self.args, getLogger())
        self.adb = ADB(None, self.args.android_dir)
        setLoggerLevel(self.args.logger_level)
        if not self.args.benchmark_db_entry:
            assert (
                self.args.server_addr is not None
            ), "Either server_addr or benchmark_db_entry must be specified"
            while self.args.server_addr[-1] == "/":
                self.args.server_addr = self.args.server_addr[:-1]
            self.args.benchmark_db_entry = self.args.server_addr + "/benchmark/"
        self.db = DBDriver(
            self.args.benchmark_db,
            self.args.app_id,
            self.args.token,
            self.args.benchmark_table,
            self.args.job_queue,
            self.args.test,
            self.args.benchmark_db_entry,
        )
        self.device_manager = DeviceManager(self.args, self.db)
        self.devices = self.device_manager.getLabDevices()

        if self.args.platform.startswith("host"):
            numProcesses = 2
        else:
            numProcesses = multiprocessing.cpu_count() - 1
        self.pool = Pool(max_workers=numProcesses, initializer=hookSignals)
Exemplo n.º 20
0
 def post_processing(self, workspec, jobspec_list, map_type):
     # get logger
     tmpLog = core_utils.make_logger(_logger,
                                     'workerID={0}'.format(
                                         workspec.workerID),
                                     method_name='post_processing')
     try:
         for jobSpec in jobspec_list:
             # check if log is already there
             for fileSpec in jobSpec.outFiles:
                 if fileSpec.fileType == 'log':
                     continue
             logFileInfo = jobSpec.get_logfile_info()
             # make log.tar.gz
             accessPoint = self.get_access_point(workspec, jobSpec.PandaID)
             logFilePath = os.path.join(accessPoint, logFileInfo['lfn'])
             if map_type == WorkSpec.MT_MultiWorkers:
                 # append suffix
                 logFilePath += '._{0}'.format(workspec.workerID)
             tmpLog.debug('making {0}'.format(logFilePath))
             dirs = [
                 os.path.join(accessPoint, name)
                 for name in os.listdir(accessPoint)
                 if os.path.isdir(os.path.join(accessPoint, name))
             ]
             # tar sub dirs
             tmpLog.debug('tar for {0} sub dirs'.format(len(dirs)))
             with Pool(max_workers=multiprocessing.cpu_count()) as pool:
                 retValList = pool.map(tar_directory, dirs)
                 for dirName, (comStr, retCode, stdOut,
                               stdErr) in zip(dirs, retValList):
                     if retCode != 0:
                         tmpLog.warning(
                             'failed to sub-tar {0} with {1} -> {2}:{3}'.
                             format(dirName, comStr, stdOut, stdErr))
             # tar main dir
             tmpLog.debug('tar for main dir')
             comStr, retCode, stdOut, stdErr = tar_directory(
                 accessPoint, logFilePath, 1, ["*.subdir.tar.gz"])
             tmpLog.debug('used command : ' + comStr)
             if retCode != 0:
                 tmpLog.warning(
                     'failed to tar {0} with {1} -> {2}:{3}'.format(
                         accessPoint, comStr, stdOut, stdErr))
             # make json to stage-out the log file
             fileDict = dict()
             fileDict[jobSpec.PandaID] = []
             fileDict[jobSpec.PandaID].append({
                 'path': logFilePath,
                 'type': 'log',
                 'isZip': 0
             })
             jsonFilePath = os.path.join(accessPoint, jsonOutputsFileName)
             with open(jsonFilePath, 'w') as jsonFile:
                 json.dump(fileDict, jsonFile)
             tmpLog.debug('done')
         return True
     except Exception:
         core_utils.dump_error_message(tmpLog)
         return False
Exemplo n.º 21
0
def add_optical_const(n_analogue, n_val, temperature=15):
    # Pre-download using multiple threads
    with Pool(max_workers=pool_size) as e:
        for n_spectrum in e.map(download_optc, n_val):
            temperature = temperature
            add_optc(n_analogue.id, n_spectrum, temperature)
    db.session.commit()
Exemplo n.º 22
0
    def start(self):
        logger.info("start archive upload statistic %s", self.archive_path)
        archive_stat = {
            "slave": {},
            "on": {},
            "dm_dn": {},
        }
        slave_statistic = archive_stat["slave"]
        dm_dn_statistic = archive_stat["dm_dn"]
        on_statistic = archive_stat["on"]
        logger.info("start statistic archive path %s", self.archive_path)

        if not os.path.exists(self.archive_path) or not os.path.isdir(
                self.archive_path):
            logger.error("archive path not exist %s", self.archive_path)
            return None

        last_date = datetime.datetime.now() - datetime.timedelta(days=1)
        last_date_format = last_date.strftime("%Y-%m-%d")

        future_tasks = []
        with Pool(max_workers=10) as executor:
            for archive in os.listdir(self.archive_path):
                last_archive_dir = os.path.join(self.archive_path, archive,
                                                last_date_format)

                if not os.path.exists(last_archive_dir) or not os.path.isdir(
                        last_archive_dir):
                    logging.error("archive dir no exist %s", last_archive_dir)
                    continue

                slave_statistic[archive] = {
                    "name": archive,
                    "num": len(os.listdir(last_archive_dir)) - 1,
                }

                future = executor.submit(self.statistic, last_archive_dir)
                future_tasks.append(future)

            wait(future_tasks)

            for f in future_tasks:
                res = f.result()
                ons = res["on"]
                dm_dns = res["dm_dn"]

                for on in ons:
                    if on in on_statistic:
                        on_statistic[on]["num"] += ons[on]["num"]
                    else:
                        on_statistic[on] = ons[on]

                for dm_dn in dm_dns:
                    if dm_dn in dm_dn_statistic:
                        dm_dn_statistic[dm_dn]["num"] += dm_dns[dm_dn]["num"]
                    else:
                        dm_dn_statistic[dm_dn] = dm_dns[dm_dn]

        self.save(archive_stat)
Exemplo n.º 23
0
 def check_workers(self, workspec_list):
     # make logger
     tmpLog = self.make_logger(baseLogger, method_name='check_workers')
     tmpLog.debug('start nWorkers={0}'.format(len(workspec_list)))
     with Pool() as pool:
         retList = pool.map(check_a_worker, workspec_list)
     tmpLog.debug('done')
     return True, retList
Exemplo n.º 24
0
def add_spec_cont(sc_analogue, sc_val, temperature=15, cont_model='test'):
    # Pre-download using multiple threads
    with Pool(max_workers=pool_size) as e:
        for sc_spectrum in e.map(download_sc, sc_val):
            temperature = temperature
            cont_model = cont_model
            add_sc_single(sc_analogue.id, sc_spectrum, temperature, cont_model)
    db.session.commit()
Exemplo n.º 25
0
 def simple_zip_output(self, jobspec, tmp_log):
     tmp_log.debug('start')
     self.zip_tmp_log = tmp_log
     self.zip_jobSpec = jobspec
     argDictList = []
     try:
         for fileSpec in jobspec.outFiles:
             if self.zipDir == "${SRCDIR}":
                 # the same directory as src
                 zipDir = os.path.dirname(
                     next(iter(fileSpec.associatedFiles)).path)
             elif self.zipDir == "${WORKDIR}":
                 # work dir
                 workSpec = jobspec.get_workspec_list()[0]
                 zipDir = workSpec.get_access_point()
             else:
                 zipDir = self.zipDir
             zipPath = os.path.join(zipDir, fileSpec.lfn)
             argDict = dict()
             argDict['zipPath'] = zipPath
             argDict['associatedFiles'] = []
             for assFileSpec in fileSpec.associatedFiles:
                 if os.path.exists(assFileSpec.path):
                     argDict['associatedFiles'].append(assFileSpec.path)
                 else:
                     assFileSpec.status = 'failed'
             argDictList.append(argDict)
         # parallel execution
         try:
             if hasattr(harvester_config, 'zipper'):
                 nThreadsForZip = harvester_config.zipper.nThreadsForZip
             else:
                 nThreadsForZip = harvester_config.stager.nThreadsForZip
         except Exception:
             nThreadsForZip = multiprocessing.cpu_count()
         with Pool(max_workers=nThreadsForZip) as pool:
             retValList = pool.map(self.make_one_zip, argDictList)
             # check returns
             for fileSpec, retVal in zip(jobspec.outFiles, retValList):
                 tmpRet, errMsg, fileInfo = retVal
                 if tmpRet is True:
                     # set path
                     fileSpec.path = fileInfo['path']
                     fileSpec.fsize = fileInfo['fsize']
                     fileSpec.chksum = fileInfo['chksum']
                     msgStr = 'fileSpec.path - {0}, fileSpec.fsize - {1}, fileSpec.chksum(adler32) - {2}' \
                         .format(fileSpec.path, fileSpec.fsize, fileSpec.chksum)
                     tmp_log.debug(msgStr)
                 else:
                     tmp_log.error(
                         'got {0} with {1} when zipping {2}'.format(
                             tmpRet, errMsg, fileSpec.lfn))
                     return tmpRet, 'failed to zip with {0}'.format(errMsg)
     except Exception:
         errMsg = core_utils.dump_error_message(tmp_log)
         return False, 'failed to zip with {0}'.format(errMsg)
     tmp_log.debug('done')
     return True, ''
Exemplo n.º 26
0
    def update_all(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels()
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(pool.map(self._update_channel, channels))

        self.database.add_videos(videos)
Exemplo n.º 27
0
    def start(self):
        logger.info("start image upload statistic %s", self.snapshots_dir)
        last_date = datetime.datetime.now() - datetime.timedelta(days=1)
        last_date_format = last_date.strftime("%Y-%m-%d")

        last_snapshots_dir = os.path.join(self.snapshots_dir, last_date_format)

        upload_statistic = {
            "company": {},
            "on": {},
            "on_dm_dn": {},
        }

        upload_company_statistic = upload_statistic["company"]
        upload_on_statistic = upload_statistic["on"]
        upload_on_dm_dn_statistic = upload_statistic["on_dm_dn"]

        future_tasks = []
        with Pool(max_workers=self.max_workers) as executor:
            for snapshot in os.listdir(last_snapshots_dir):
                snapshot_dir = os.path.join(last_snapshots_dir, snapshot)
                future = executor.submit(self.snapshots_statistic, snapshot_dir)
                future_tasks.append(future)

            wait(future_tasks)

            for f in future_tasks:
                res = f.result()
                for on in res:
                    dm_dn = res[on]["dm_dn"]
                    num = res[on]["num"]

                    if on not in upload_company_statistic:
                        upload_company_statistic[on] = []

                    if on in upload_on_statistic:
                        upload_on_statistic[on]["num"] += num
                    else:
                        upload_on_statistic[on] = {
                            "name": on,
                            "num": num,
                        }

                    for dm_dn_key in dm_dn:
                        on_dm_dn = "%s__%s" % (on, dm_dn_key)
                        if on_dm_dn in upload_on_dm_dn_statistic:
                            upload_on_dm_dn_statistic[on_dm_dn]["num"] += dm_dn[dm_dn_key]
                        else:
                            upload_on_dm_dn_statistic[on_dm_dn] = {
                                "on": on,
                                "name": dm_dn_key,
                                "num": dm_dn[dm_dn_key]
                            }

                        if dm_dn_key not in upload_company_statistic[on]:
                            upload_company_statistic[on].append(dm_dn_key)
        self.save(upload_statistic)
Exemplo n.º 28
0
        def build_extensions(self):
            self.check_extensions_list(self.extensions)

            ncpus = get_cpu_count()
            if ncpus > 0:
                with Pool(ncpus) as pool:
                    pool.map(self.build_extension, self.extensions)
            else:
                super().build_extensions()
Exemplo n.º 29
0
 def __init__(self, ip='127.0.0.1', port=65432):
     self.ip = ip
     self.port = port
     self.MAX_CLIENTS = 3
     self.messages = Queue()  # synchronized message queue
     self.clients = []  # (client, client_id)
     self.addresses = []  # (ip, port)
     self.executor = Pool(max_workers=self.MAX_CLIENTS + 1)
     enable_logger(dir="logs/server", filename="server")
    def bodies_multithread(self, bodies):
        logging.info("Creating bodies")
        # Ensure all bodies exist when calling the other methods

        with Pool(self.threadcount) as executor:
            results = executor.map(self.body, bodies)

        # Raise those exceptions
        list(results)
        logging.info("Finished creating bodies")