Пример #1
0
async def test_flat_map_square_workers_async_5(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    async def _generator_async(x):
        yield x
        yield x + 1
        await asyncio.sleep(0.01)
        yield x + 2

    async def nums_generator():
        for x in nums:
            yield x

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.task.map(lambda x: x**2, nums_generator())
    nums_pl = pl.task.flat_map(_generator_async,
                               nums_pl,
                               workers=3,
                               timeout=0.1,
                               maxsize=0)
    nums_pl = await nums_pl

    assert nums_py == [] or sorted(nums_pl) == sorted(nums_py)
Пример #2
0
def run(args):
    # Set logging level.
    logging_debug_opt = False
    LOGGER.addHandler(create_logging_handler(logging_debug_opt))
    LOGGER.setLevel(logging.DEBUG)

    LOGGER.info("Using configuration {}.".format(args.config_filename))
    cfg = ConfigParser()
    cfg.read(args.config_filename)

    in_fname = cfg['data']['modules'] if not args.input else args.input
    LOGGER.info("Loading modules from {}.".format(in_fname))
    # Loading from YAML is extremely slow. Therefore this is a potential performance improvement.
    # Potential improvements are switching to JSON or to use a CLoader:
    # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml
    if in_fname.endswith('.yaml'):
        modules = load_from_yaml(in_fname)
    else:
        with open(in_fname, 'rb') as f:
            modules = pickle.load(f)
    # Filter out modules with to few genes.
    min_genes = int(cfg['parameters']['min_genes'])
    modules = list(filter(lambda m: len(m) >= min_genes, modules))

    LOGGER.info("Loading databases.")

    def name(fname):
        return os.path.splitext(os.path.basename(fname))[0]

    db_fnames = list(mapcat(glob.glob, cfg['data']['databases'].split(";")))
    dbs = [
        RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames
    ]

    LOGGER.info("Calculating regulons.")
    motif_annotations_fname = cfg['data']['motif_annotations']
    mode = cfg['parameters']['mode']
    with ProgressBar() if mode == "dask_multiprocessing" else NoProgressBar():
        df = prune2df(dbs,
                      modules,
                      motif_annotations_fname,
                      rank_threshold=int(cfg['parameters']['rank_threshold']),
                      auc_threshold=float(cfg['parameters']['auc_threshold']),
                      nes_threshold=float(cfg['parameters']['nes_threshold']),
                      client_or_address=mode,
                      module_chunksize=cfg['parameters']['chunk_size'],
                      num_workers=args.num_workers)

    LOGGER.info("Writing results to file.")
    df.to_csv(cfg['parameters']['output'] if not args.output else args.output)
Пример #3
0
def test_flat_map_square_workers(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.sync.map(lambda x: x**2, nums)
    nums_pl = pl.sync.flat_map(_generator, nums_pl, workers=3)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #4
0
def test_flat_map_square(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.sync.map(lambda x: x**2, nums)
    nums_pl = pl.sync.flat_map(_generator, nums_pl)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
Пример #5
0
def test_flat_map_square_workers(nums: tp.List[int]):
    def generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.thread.map(lambda x: x**2, nums)
    nums_pl = pl.thread.flat_map(generator, nums_pl, workers=3)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #6
0
def test_flat_map_square(nums: tp.List[int]):
    def generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.thread.map(lambda x: x**2, nums)
    nums_pl = pl.thread.flat_map(generator, nums_pl)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
Пример #7
0
def test_flat_map_square_filter_workers(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x ** 2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.task.map(lambda x: x ** 2, nums)
    nums_pl = pl.task.flat_map(_generator, nums_pl, workers=2)
    nums_pl = pl.task.filter(lambda x: x > 1, nums_pl)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #8
0
def test_flat_map_square_filter_workers_pipe(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = (nums
               | pl.sync.map(lambda x: x**2)
               | pl.sync.flat_map(_generator, workers=3)
               | pl.sync.filter(lambda x: x > 1)
               | list)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #9
0
def test_flat_map_square_filter_workers_pipe(nums: tp.List[int]):
    def generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = (nums
               | pl.thread.map(lambda x: x**2)
               | pl.thread.flat_map(generator, workers=3)
               | pl.thread.filter(lambda x: x > 1)
               | list)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #10
0
async def test_flat_map_square_async_2(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    async def _generator_async(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.task.map(lambda x: x**2, nums)
    nums_pl = pl.task.flat_map(_generator_async, nums_pl)
    nums_pl = await nums_pl

    assert nums_pl == nums_py
Пример #11
0
def test_flat_map_square_workers_async_1(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    async def _generator_async(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.task.map(lambda x: x**2, nums)
    nums_pl = pl.task.flat_map(_generator_async, nums_pl, workers=3)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Пример #12
0
async def test_flat_map_square_filter_workers_pipe_3(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x ** 2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    async def gt1(x):
        return x > 1

    nums_pl = await (
        nums
        | pl.task.map(lambda x: x ** 2)
        | pl.task.flat_map(_generator, workers=3)
        | pl.task.filter(gt1)
    )

    assert sorted(nums_pl) == sorted(nums_py)
Пример #13
0
async def test_flat_map_square_async_4(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    async def _generator_async(x):
        return [x, x + 1, x + 2]

    async def nums_generator():
        for x in nums:
            yield x

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.task.map(lambda x: x**2, nums_generator())
    nums_pl = pl.task.flat_map(_generator_async, nums_pl)
    nums_pl = await nums_pl

    assert nums_pl == nums_py
Пример #14
0
 def _wrapped(total):
     batches = partition_all(num, total)
     combined = mapcat(func, batches)
     return list(combined)
Пример #15
0
def systems_get(systems):
    """Get data for systems."""
    batches = partition_all(100, systems)
    combined = mapcat(_systems_get, batches)
    return list(combined)