Пример #1
0
def check_org(path: Path):
    # TODO not sure about org?
    org_files = list(sorted(path.rglob('*.org')))

    from concurrent.futures import ProcessPoolExecutor as Pool
    with Pool() as pool:
        for f, res in zip(org_files, pool.map(check_aux, org_files)):
            for x in res:
                # TODO collect errors, report once?
                raise Failed(f, x)
Пример #2
0
def download(files, outdir=os.getcwd(), bucket='epionengs', threads=3):
    if type(files) == str:
        files = [x.strip() for x in open(files)]
    cmds = list()
    for each in files:
        cmd = 'aws s3 cp --only-show-errors s3://{bucket}/{each} {outdir}'.format(
            bucket=bucket, each=each, outdir=outdir)
        cmds.append(cmd)
    with Pool(threads) as pool:
        pool.map(run_cmd, cmds)
Пример #3
0
async def bootstrap_server(app, loop):  # pragma: no cover
    '''
    Preload stuff
    '''

    with open('VERSION', 'r') as f:
        app.VERSION = f.read()

    app.model = Model()
    app.executor = Pool(max_workers=10)
Пример #4
0
 def simple_zip_output(self, jobspec, tmp_log):
     tmp_log.debug('start')
     self.zip_tmp_log = tmp_log
     self.zip_jobSpec = jobspec
     argDictList = []
     try:
         for fileSpec in jobspec.outFiles:
             if self.zipDir == "${SRCDIR}":
                 # the same directory as src
                 zipDir = os.path.dirname(
                     next(iter(fileSpec.associatedFiles)).path)
             elif self.zipDir == "${WORKDIR}":
                 # work dir
                 workSpec = jobspec.get_workspec_list()[0]
                 zipDir = workSpec.get_access_point()
             else:
                 zipDir = self.zipDir
             zipPath = os.path.join(zipDir, fileSpec.lfn)
             argDict = dict()
             argDict['zipPath'] = zipPath
             argDict['associatedFiles'] = []
             for assFileSpec in fileSpec.associatedFiles:
                 if os.path.exists(assFileSpec.path):
                     argDict['associatedFiles'].append(assFileSpec.path)
                 else:
                     assFileSpec.status = 'failed'
             argDictList.append(argDict)
         # parallel execution
         try:
             nThreadsForZip = harvester_config.stager.nThreadsForZip
         except Exception:
             nThreadsForZip = multiprocessing.cpu_count()
         with Pool(max_workers=nThreadsForZip) as pool:
             retValList = pool.map(self.make_one_zip, argDictList)
             # check returns
             for fileSpec, retVal in zip(jobspec.outFiles, retValList):
                 tmpRet, errMsg, fileInfo = retVal
                 if tmpRet is True:
                     # set path
                     fileSpec.path = fileInfo['path']
                     fileSpec.fsize = fileInfo['fsize']
                     fileSpec.chksum = fileInfo['chksum']
                     msgStr = 'fileSpec.path - {0}, fileSpec.fsize - {1}, fileSpec.chksum(adler32) - {2}' \
                         .format(fileSpec.path, fileSpec.fsize, fileSpec.chksum)
                     tmp_log.debug(msgStr)
                 else:
                     tmp_log.error(
                         'got {0} with {1} when zipping {2}'.format(
                             tmpRet, errMsg, fileSpec.lfn))
                     return tmpRet, 'failed to zip with {0}'.format(errMsg)
     except Exception:
         errMsg = core_utils.dump_error_message(tmp_log)
         return False, 'failed to zip with {0}'.format(errMsg)
     tmp_log.debug('done')
     return True, ''
Пример #5
0
def download(urls, dest_dir, jobs=PARALLEL_DOWNLOADS):
    with Pool(max_workers=jobs) as pool:
        futures = [pool.submit(download1, url, dest_dir) for url in urls]
        try:
            for future in as_completed(futures):
                wheel_name = future.result()
                yield wheel_name
        except KeyboardInterrupt:
            for future in futures:
                future.cancel()
            raise
Пример #6
0
def _get_pool(pool_type: Type, max_workers: int) -> Pool:
    if pool_type == ThreadPool:
        pool = ThreadPool(max_workers)
        setattr(pool, "imap", pool.map)
        return pool
    elif pool_type == Pool:
        pool = Pool(max_workers, mp_context=get_context("spawn"))
        setattr(pool, "imap", pool.map)
        return pool
    else:
        raise TypeError(f"Unknown pool type: {pool_type}")
Пример #7
0
    def update_one(self, yt_channel_id) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels_filter(
            yt_channel_id)  #for channel
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        self.database.add_videos(videos)
Пример #8
0
def applybpe(codes, outdir, files):
    outdir = Path(outdir)
    bpe = create_subword_bpe(codes)

    with Pool() as pool:
        for path in files:
            path = Path(path)
            outfile = outdir.joinpath(path.name)
            with open(path, 'r') as fp:
                bped = pool.map(bpe.process_line, fp, chunksize=1024)
                outfile.write_text(''.join(bped))
Пример #9
0
def main():
    output_dict = collections.defaultdict(list)
    nproc = args.nproc
    with Pool(nproc) as pool:
        for i, output_data in enumerate(pool.map(calc_vpf, range(args.Nvpf))):
            if i % nproc == nproc - 1:
                print i
                print str(datetime.now())
            output_dict['vpf'].append(output_data)
    with Pool(nproc) as pool:
        for i, output_data in enumerate(pool.map(calc_ds, range(args.Nds))):
            if i % nproc == nproc - 1:
                print i
                print str(datetime.now())
            output_dict['deltasigma'].append(output_data)

    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(args.outfile, **output_dict)
Пример #10
0
def _tokenize_mp(lines):
    length = len(lines)
    cpus = os.cpu_count()
    chunks = list(map('\n'.join, chunked(lines, 10000)))
    with Pool(cpus) as pool:
        output = pool.map(_tokenize, chunks)

    output = list(chain.from_iterable(i.split('\n') for i in output))
    assert length == len(output), 'Input: {} lines, output: {} lines'.format(
        length, len(output))
    return output
Пример #11
0
def feature_matrix(xs: Iterable[T], featurize: Callable[[T], np.ndarray],
                   ncpu: int = 0) -> np.ndarray:
    """Calculate the feature matrix of xs with the given featurization
    function"""
    if ncpu <= 1:
        X = [featurize(x) for x in tqdm(xs, desc='Featurizing', smoothing=0.)]
    else:
        with Pool(max_workers=ncpu) as pool:
            X = list(tqdm(pool.map(featurize, xs), desc='Featurizing'))
    
    return np.array(X)
Пример #12
0
 def run(self):
     """多进程跑"""
     tasks = []
     ranges = self.cutRange(self.start, self.end, self.workers)
     start_time = time.time()
     with Pool(max_workers=self.workers) as executor:
         for start, end in ranges:
             print("processor start: %s, end: %s" % (start, end))
             tasks.append(executor.submit(self.singleProcess, start, end))
         for task in tasks:
             task.result()
     print("total time: %s" % (time.time() - start_time))
Пример #13
0
    def run(self):
        """Runs emacsclient."""
        if self._lisp is not None:
            self._cmd.append('-e')
            self._cmd.append(self._lisp)
        elif len(self._files) > 0:
            for f in self._files:
                self._cmd.append(f)

        pool = Pool(max_workers=1)
        proc = pool.submit(subprocess.call, self._cmd, env=self._env)
        return proc
Пример #14
0
def main(model_gen_func, fiducial, output_fname):
    global model
    model = model_gen_func()
    global fid
    fid = np.array(fiducial)
    params = fid * np.ones((7 * args.Nparam, 7))

    dp_range = np.array((0.11, 0.05, 0.225, 0.9, 0.12))

    for i in range(5):
        params[args.Nparam * i:args.Nparam * (i + 1),
               i] += (2. * np.random.random(args.Nparam) - 1) * min(
                   dp_range[i], fid[i])
    params[args.Nparam * 5:args.Nparam * 6,
           5] = 2. * np.random.random(args.Nparam) - 1
    params[args.Nparam * 6:args.Nparam * 7,
           6] = 2. * np.random.random(args.Nparam) - 1

    output_dict = collections.defaultdict(list)
    nproc = args.nproc

    global halocat

    with Pool(nproc) as pool:
        if args.simname == 'consuelo20' and args.version == 'all':
            for box in consuelo20_box_list:
                halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \
                                halo_finder = args.halofinder)
                model.populate_mock(halocat)
                for i, output_data in enumerate(
                        pool.map(calc_all_observables, params)):
                    if i % nproc == nproc - 1:
                        print i
                        print str(datetime.now())
                    for name, data in zip(output_names, output_data):
                        output_dict[name].append(data)
                print box
        else:
            halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \
                                halo_finder = args.halofinder)
            model.populate_mock(halocat)
            for i, output_data in enumerate(
                    pool.map(calc_all_observables, params)):
                if i % nproc == nproc - 1:
                    print i
                    print str(datetime.now())
                for name, data in zip(output_names, output_data):
                    output_dict[name].append(data)

    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(output_fname, **output_dict)
Пример #15
0
 def evolve(self, n_pop, env, data=None):
     models = [(self.clone().noise(std=100), i) for i in range(n_pop)]
     with Pool(8) as p:
         rewards = p.map(lambda mdl: env(data, *mdl), models)
     rewards = np.array(list(rewards))
     goods = sum(rewards >= 1)
     total = sum(rewards)
     for m, r in zip(models, rewards):
         m = m[0]
         for tp, mp in zip(self.parameters(), m.parameters()):
             tp.add_(mp * 0.1 * (int(r) / int(total)))
     return goods
Пример #16
0
def main(max_workers):
    logger.info("begin")
    with Pool(max_workers=max_workers) as pool:
        for file in listdir('flv'):
            file = 'flv/' + file
            f = pool.submit(
                call,
                'ffmpeg -loglevel quiet -y -i {0} -vcodec copy -acodec copy {1}.mp4'
                .format(file,
                        splitext(file)[0]),
                shell=True)
            f.add_done_callback(callback)
Пример #17
0
    def work(self):
        try:
            products = self.get_products()

            with Pool(32) as pl:
                pl.map(self.get_data_by_id, products)
        except Exception as why:
            self.logger.error('fail to push wish template  cause of {} '.format(why))
            name = os.path.basename(__file__).split(".")[0]
            raise Exception(f'fail to finish task of {name}')
        finally:
            self.close()
Пример #18
0
def scrape_todos_links(index: Dict) -> Dict:
    index = deepcopy(index)

    def mapper(k: str, v: Dict) -> Tuple[str, Dict]:
        scraper = configuration['scrape_links'](k)  # type: ignore
        return k, scrape_links_topico(v, scraper=scraper)

    print('Scraping links de:\n\t' + "\n\t".join(index))
    with Pool(10) as executor:
        return dict(
            executor.map(mapper, *zip(*index.items()))
        )
Пример #19
0
def main(model_gen_func, params_fname, params_usecols, output_fname):
    global model
    model = model_gen_func()

    median_w = np.median(np.loadtxt(params_fname, usecols=params_usecols),
                         axis=0)
    params = median_w * np.ones((500 + 7 * 1000, 7))  ##take medians

    dp_range = np.array((0.5, 0.5, 0.25, 0.5, 0.5, 1, 1))

    for i in params_usecols:
        params[1000 * i + 500:1000 * i + 1500,
               i] += (2. * np.random.random(1000) - 1) * dp_range[i]

    output_dict = collections.defaultdict(list)
    nproc = args.nproc

    global halocat
    global c

    with Pool(nproc) as pool:
        if args.simname == 'consuelo20' and args.version == 'all':
            for box in consuelo20_box_list:
                halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \
                                halo_finder = args.halofinder)
                model.populate_mock(halocat)
                c = Ngal_estimate(halocat, 150000)
                for i, output_data in enumerate(
                        pool.map(calc_all_observables, params)):
                    if i % nproc == nproc - 1:
                        print i
                        print str(datetime.now())
                    for name, data in zip(output_names, output_data):
                        output_dict[name].append(data)
                print box
        else:
            halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \
                                halo_finder = args.halofinder)
            model.populate_mock(halocat)
            c = Ngal_estimate(halocat, 150000)
            for i, output_data in enumerate(
                    pool.map(calc_all_observables, params)):
                if i % nproc == nproc - 1:
                    print i
                    print str(datetime.now())
                for name, data in zip(output_names, output_data):
                    output_dict[name].append(data)

    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(output_fname, **output_dict)
    def map_par(self, run_num):
        ''' 
        This function maps out what stim and score function pairs should be mapped to be evaluated in parallel
        first it finds the pairs with the highest weights, the maps them and then adds up the score for each stim
        for every individual.
        
        Parameters
        -------------------- 
        run_num: the amount of times neuroGPU has ran for 8 stims
        
        Return
        --------------------
        2d list of scalar scores for each parameter set w/ shape (nindv,nstims)
        '''
        fxnsNStims = utils.top_SFs(
            run_num, score_function_ordered_list, self.weights,
            nGpus)  # 52 stim-sf combinations (stim#,sf#)
        with Pool(nCpus) as p:  # parallel mapping
            res = p.map(self.eval_stim_sf_pair, fxnsNStims)
        res = np.array(
            list(res)
        )  ########## important: map returns results with shape (# of sf stim pairs, nindv)
        res = res[:, :]
        prev_sf_idx = 0
        # look at key of each stim score pair to see how many stims to sum
        #num_selected_stims = len(set([pair[0] for pair in fxnsNStims])) # not always using 8 stims
        last_stim = (run_num +
                     1) * nGpus  # ie: 0th run last_stim = (0+1)*8 = 8
        first_stim = last_stim - nGpus  # on the the last round this will be 24 - 8 = 16
        if last_stim > 18:
            last_stim = 18
        #print(last_stim, first_stim, "last and first")
        for i in range(first_stim, last_stim):  # iterate stims and sum
            num_sfs = sum([1 for pair in fxnsNStims if pair[0] == i
                           ])  #find how many sf indices for this stim
            #print([pair for pair in fxnsNStims if pair[0]==i], "pairs from : ", run_num)
            #print(fxnsNStims[prev_sf_idx:prev_sf_idx+num_sfs], "Currently evaluating")

            if i % nGpus == 0:
                weighted_sums = np.reshape(
                    np.sum(res[prev_sf_idx:prev_sf_idx + num_sfs, :], axis=0),
                    (-1, 1))
            else:
                #print(prev_sf_idx, "stim start idx", num_sfs, "stim end idx")
                curr_stim_sum = np.sum(res[prev_sf_idx:prev_sf_idx +
                                           num_sfs, :],
                                       axis=0)
                curr_stim_sum = np.reshape(curr_stim_sum, (-1, 1))
                weighted_sums = np.append(weighted_sums, curr_stim_sum, axis=1)
                #print(curr_stim_sum.shape," : cur stim sum SHAPE      ", weighted_sums.shape, ": weighted sums shape")
            prev_sf_idx = prev_sf_idx + num_sfs  # update score function tracking index
        return weighted_sums
Пример #21
0
    def process_chunk(wr, chunk, pbar):
        with Pool(int(cpu_count()*args.cpu)) as p:
            chunksize = len(chunk)//10000 + 1
            d = list(set([x[0] for x in chunk]) | set([x[1] for x in chunk]))
            d = {k: v for k, v in zip(
                d, p.map(name_preprocessing, d, chunksize=chunksize))}
            a = [(x[0], d[x[0]]) for x in chunk]
            b = [(x[1], d[x[1]]) for x in chunk]

            for res in p.map(do, zip(a, b), chunksize=chunksize):
                pbar.update(1)
                if res:
                    wr.writerow(res)
Пример #22
0
    def update_all(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels()
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        try:
            self.database.add_videos(videos)
        except sqlalchemy.exc.OperationalError as original:
            raise DatabaseOperationalError() from original
Пример #23
0
    def work(self):
        try:
            tokens = self.get_ebay_token()
            with Pool(2) as pl:
                pl.map(self.update_inventory, tokens)

        except Exception as why:
            self.logger.error(
                'fail to update ebay inventory cause of {} '.format(why))
            name = os.path.basename(__file__).split(".")[0]
            raise Exception(f'fail to finish task of {name}')
        finally:
            self.close()
def main():

    epilog = """
environment variables:
  GITHUB_API_TOKEN\tGitHub API token used when updating github packages
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog)
    parser.add_argument('package', type=str, nargs='+')
    parser.add_argument('--target',
                        type=str,
                        choices=SEMVER.keys(),
                        default='major')
    parser.add_argument('--commit',
                        action='store_true',
                        help='Create a commit for each package update')
    parser.add_argument(
        '--use-pkgs-prefix',
        action='store_true',
        help=
        'Use python3Packages.${pname}: instead of python: ${pname}: when making commits'
    )

    args = parser.parse_args()
    target = args.target

    packages = list(map(os.path.abspath, args.package))

    logging.info("Updating packages...")

    # Use threads to update packages concurrently
    with Pool() as p:
        results = list(
            filter(bool, p.map(lambda pkg: _update(pkg, target), packages)))

    logging.info("Finished updating packages.")

    commit_options = {}
    if args.use_pkgs_prefix:
        logging.info("Using python3Packages. prefix for commits")
        commit_options["pkgs_prefix"] = "python3Packages."

    # Commits are created sequentially.
    if args.commit:
        logging.info("Committing updates...")
        # list forces evaluation
        list(map(lambda x: _commit(**x, **commit_options), results))
        logging.info("Finished committing updates")

    count = len(results)
    logging.info("{} package(s) updated".format(count))
Пример #25
0
def build_extensions(self):
    """Function to monkey-patch
    distutils.command.build_ext.build_ext.build_extensions

    """
    self.check_extensions_list(self.extensions)

    try:
        num_jobs = os.cpu_count()
    except AttributeError:
        num_jobs = multiprocessing.cpu_count()

    with Pool(num_jobs) as pool:
        pool.map(self.build_extension, self.extensions)
Пример #26
0
def main(model_gen_func, params_fname, params_usecols, output_fname):
    global model
    model = model_gen_func()

    nparams = args.Nreal * 77
    median_w = np.median(np.loadtxt(params_fname, usecols=params_usecols),
                         axis=0)
    params = median_w * np.ones((nparams, 7))  ##take medians

    for i in params_usecols:
        for j in range(11):
            params[11 * args.Nreal * i + args.Nreal * j:11 * args.Nreal * i +
                   args.Nreal * j + args.Nreal,
                   i] += args.stepsize[i] * (j - 5)

    output_dict = collections.defaultdict(list)
    nproc = args.nproc

    global halocat

    with Pool(nproc) as pool:
        if args.simname == 'consuelo20' and args.version == 'all':
            for box in consuelo20_box_list:
                halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \
                                halo_finder = args.halofinder)
                model.populate_mock(halocat)
                for i, output_data in enumerate(
                        pool.map(calc_all_observables, params)):
                    if i % nproc == nproc - 1:
                        print i
                        print str(datetime.now())
                    for name, data in zip(output_names, output_data):
                        output_dict[name].append(data)
                print box
        else:
            halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \
                                halo_finder = args.halofinder)
            model.populate_mock(halocat)
            for i, output_data in enumerate(
                    pool.map(calc_all_observables, params)):
                if i % nproc == nproc - 1:
                    print i
                    print str(datetime.now())
                for name, data in zip(output_names, output_data):
                    output_dict[name].append(data)

    for name in output_names:
        output_dict[name] = np.array(output_dict[name])

    np.savez(output_fname, **output_dict)
Пример #27
0
 def __init__(self, server, nickname, port, channel, password):
     self.socket = socket.socket()
     self.server = server
     self.nickname = nickname
     self.port = port
     self.channel = channel
     self.password = password
     self.config = Config()
     self.osu = OsuApi(self.config.osu_api_key)
     self.pool = Pool(8)
     self.engine = create_engine(self.config.engine_str,
                                 **self.config.engine_args)
     self.Session = scoped_session(sessionmaker(bind=self.engine))
     self.senders = {}
Пример #28
0
def fortran_execute():

    from concurrent.futures import ProcessPoolExecutor as Pool

    args = "sleep 2; echo complete"

    pool = Pool(max_workers=1)
    future = pool.submit(test)
    future.run_type = "run_type"
    future.jid = "jid"
    future.add_done_callback(fortran_callback)

    print("Fortran executed")
    return 111
Пример #29
0
def QA_Fetcher(code, type_):
    with Pool(max_workers=40) as executor:
        future_tasks = [executor.submit(single_task, code, type_)]
        for f in future_tasks:
            if f.running():
                print('%s is running' % str(f))
        for f in as_completed(future_tasks):
            try:
                if f.done():
                    data = f.result()
                    return data
            except Exception as e:
                f.cancel()
                print(str(e))
Пример #30
0
def run_predictor(input_folder: str):
    pool = Pool(max_workers=1)
    f = pool.submit(
        subprocess.call,
        f"""python predict.py \
            --sensitive --Transformation TPS --FeatureExtraction ResNet \
            --SequenceModeling BiLSTM --Prediction Attn \
            --saved_model /app/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth \
            --workers 1 \
            --image_folder {input_folder}""",
        cwd="/app/deep-text-recognition-benchmark",
        shell=True,
    )
    f.add_done_callback(callback)
    pool.shutdown(wait=True)