def compute_up(expr, data, **kwargs): leaf = expr._leaves()[0] chunk = symbol( 'chunk', DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, )))) (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr, chunk=chunk) inds = tuple(range(ndim(leaf))) dtype = expr.dshape.measure.to_numpy_dtype() tmp = atop( curry(compute_it, chunk_expr, [chunk], **kwargs), inds, data, inds, dtype=dtype, ) return atop( compose( curry(compute_it, agg_expr, [agg], **kwargs), curry(_concatenate2, axes=expr.axis), ), tuple(i for i in inds if i not in expr.axis), tmp, inds, dtype=dtype, )
def test_introspect_curry_py3(): if not PY3: return f = toolz.curry(make_func('')) assert num_required_args(f) == 0 assert is_arity(0, f) assert has_varargs(f) is False assert has_keywords(f) is False f = toolz.curry(make_func('x')) assert num_required_args(f) == 0 assert is_arity(0, f) is False assert is_arity(1, f) is False assert has_varargs(f) is False assert has_keywords(f) # A side-effect of being curried f = toolz.curry(make_func('x, y, z=0')) assert num_required_args(f) == 0 assert is_arity(0, f) is False assert is_arity(1, f) is False assert is_arity(2, f) is False assert is_arity(3, f) is False assert has_varargs(f) is False assert has_keywords(f) f = toolz.curry(make_func('*args, **kwargs')) assert num_required_args(f) == 0 assert has_varargs(f) assert has_keywords(f)
def load_objects_from_storage(metadata_filename: str, class_factory: GenericObject, root_directory: Path) -> dict: return excepting_pipe(list_files(root_directory, metadata_filename), curry(map)(load_json), curry(map)(class_factory.from_json), curry(map)(lambda _: (_.id, _)), dict)
def compute_up(expr, data, **kwargs): leaf = expr._leaves()[0] chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure,)))) (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr, chunk=chunk) inds = tuple(range(ndim(leaf))) dtype = expr.dshape.measure.to_numpy_dtype() tmp = atop( curry(compute_it, chunk_expr, [chunk], **kwargs), inds, data, inds, dtype=dtype, ) return atop( compose( curry(compute_it, agg_expr, [agg], **kwargs), curry(_concatenate2, axes=expr.axis), ), tuple(i for i in inds if i not in expr.axis), tmp, inds, dtype=dtype, )
def fold(self, binop, combine=None, initial=no_default, split_every=None): """ Parallelizable reduction Fold is like the builtin function ``reduce`` except that it works in parallel. Fold takes two binary operator functions, one to reduce each partition of our dataset and another to combine results between partitions 1. ``binop``: Binary operator to reduce within each partition 2. ``combine``: Binary operator to combine results from binop Sequentially this would look like the following: >>> intermediates = [reduce(binop, part) for part in partitions] # doctest: +SKIP >>> final = reduce(combine, intermediates) # doctest: +SKIP If only one function is given then it is used for both functions ``binop`` and ``combine`` as in the following example to compute the sum: >>> def add(x, y): ... return x + y >>> b = from_sequence(range(5)) >>> b.fold(add).compute() # doctest: +SKIP 10 In full form we provide both binary operators as well as their default arguments >>> b.fold(binop=add, combine=add, initial=0).compute() # doctest: +SKIP 10 More complex binary operators are also doable >>> def add_to_set(acc, x): ... ''' Add new element x to set acc ''' ... return acc | set([x]) >>> b.fold(add_to_set, set.union, initial=set()).compute() # doctest: +SKIP {1, 2, 3, 4, 5} See Also -------- Bag.foldby """ token = tokenize(self, binop, combine, initial) combine = combine or binop a = 'foldbinop-{0}-{1}'.format(funcname(binop), token) b = 'foldcombine-{0}-{1}'.format(funcname(combine), token) initial = quote(initial) if initial is not no_default: return self.reduction(curry(_reduce, binop, initial=initial), curry(_reduce, combine), split_every=split_every) else: from toolz.curried import reduce return self.reduction(reduce(binop), reduce(combine), split_every=split_every)
def make_fold_vectorize(complexity=3, nbits=15, fold=None, boundaries=None): """Curry parameters in vectorizer.""" vec = Vectorizer(complexity=complexity, nbits=nbits) vectorize = curry(lambda vec, graphs: vec.transform(graphs))(vec) cwindow_reweight = curry(_window_reweight)(boundaries) fold_vectorize = compose(vectorize, map(cwindow_reweight), fold) return fold_vectorize
def fold(self, binop, combine=None, initial=no_default, split_every=None): """ Parallelizable reduction Fold is like the builtin function ``reduce`` except that it works in parallel. Fold takes two binary operator functions, one to reduce each partition of our dataset and another to combine results between partitions 1. ``binop``: Binary operator to reduce within each partition 2. ``combine``: Binary operator to combine results from binop Sequentially this would look like the following: >>> intermediates = [reduce(binop, part) for part in partitions] # doctest: +SKIP >>> final = reduce(combine, intermediates) # doctest: +SKIP If only one function is given then it is used for both functions ``binop`` and ``combine`` as in the following example to compute the sum: >>> def add(x, y): ... return x + y >>> b = from_sequence(range(5)) >>> b.fold(add).compute() # doctest: +SKIP 10 In full form we provide both binary operators as well as their default arguments >>> b.fold(binop=add, combine=add, initial=0).compute() # doctest: +SKIP 10 More complex binary operators are also doable >>> def add_to_set(acc, x): ... ''' Add new element x to set acc ''' ... return acc | set([x]) >>> b.fold(add_to_set, set.union, initial=set()).compute() # doctest: +SKIP {1, 2, 3, 4, 5} See Also -------- Bag.foldby """ combine = combine or binop initial = quote(initial) if initial is not no_default: return self.reduction(curry(_reduce, binop, initial=initial), curry(_reduce, combine), split_every=split_every) else: from toolz.curried import reduce return self.reduction(reduce(binop), reduce(combine), split_every=split_every)
def getgraphs(aid): if aid == 'bursi': return list(gspan.gspan_to_eden("bursi.pos.gspan")), list( gspan.gspan_to_eden("bursi.neg.gspan")) download_active = curry(download)(active=True, stepsize=50) download_inactive = curry(download)(active=False, stepsize=50) active = pipe(aid, download_active, sdf_to_nx, list) inactive = pipe(aid, download_inactive, sdf_to_nx, list) return active, inactive
def test_urlzsource(self): lines4 = [] with URLZSource('http://www.google.com/robots.txt').open() as f: take_and_rstrip = compose(curry(map, lambda l: rstrip(l, '\n')), curry(take, 4)) lines4 = list(take_and_rstrip(f)) print(str(lines4)) self.assertGreaterEqual(len(lines4), 1, "Failed to get more than 0 lines")
def collect_ast(): type_spec: dict = collections.defaultdict(set) def is_python(path: str): return path.endswith('.py') or Path(path).is_dir() all_python_files = linq.Flow( Path(Redy.__file__).parent().collect(is_python)).concat( Path(flask.__file__).parent().collect(is_python))._ for each in all_python_files: with each.open('r', encoding='utf8') as file: try: ast_of_src_code = ast.parse(file.read()) service = CollectASTTypeStub(type_spec) feature(service).just_apply_ast_transformation(ast_of_src_code) except SyntaxError: # other py version pass def snd(tp): return tp[1] stub_code = \ (linq.Flow(type_spec) .map(lambda class_name, fields: linq.Flow(fields) .group_by(lambda fst, snd: fst) .map(lambda field_name, pairs: '{}: {}'.format( field_name, compose( str, curry(reduce)(lambda a, b: a.union(b)), curry(map)(snd))(pairs))) .then( compose( 'class {}(AST):\n'.format(class_name).__add__, lambda _: textwrap.indent(_, " " * 4), lambda any_code: any_code if any_code else 'pass', '\n'.join)) ._) .then( compose( 'import typing, abc\nNoneType = None\n'.__add__, 'class AST(abc.ABC):\n def __init__(self, *args, lineno: int=None, colno: int=None, **kwargs): pass\n'.__add__, '\n'.join )))._ with Path('./').into('ast.pyi').open('w', encoding='utf8') as stub: stub.write(stub_code) with Path(ast.__file__).parent().into('ast.pyi').open( 'w', encoding='utf8') as stub: stub.write(stub_code)
def list_snapshots_filtered(self, model_id: UUID, git_commit_id: str, query: dict) -> Mapping[UUID, ModelSnapshot]: return excepting_pipe( self._snapshots.values(), curry(filter)(lambda v: v.model.id == ensure_uuid(model_id)), curry(filter)(lambda v: v.model_git_commit == git_commit_id), curry(sorted, key=lambda snap: getattr(snap, query['sortby']), reverse=query['order']), curry(partition_all)(query['limit'] if query['limit'] > 0 else len( self._snapshots.values())), list, curry(get, default=[])(query['offset']))
def list_results(self, query: dict) -> Iterator[Result]: """ List all list_results :param query: :return: """ return excepting_pipe( self._results.values(), curry(sorted, key=lambda x: getattr(x, query['sortby']), reverse=query['order']), curry(partition_all)(query['limit'] if query['limit'] > 0 else len( self._results.values())), list, curry(get, default=[])(query['offset']))
def test_normalize_function(): def f1(a, b, c=1): pass def f2(a, b=1, c=2): pass def f3(a): pass assert normalize_function(f2) f = lambda a: a assert normalize_function(f) assert (normalize_function(partial(f2, b=2)) == normalize_function(partial(f2, b=2))) assert (normalize_function(partial(f2, b=2)) != normalize_function(partial(f2, b=3))) assert (normalize_function(partial(f1, b=2)) != normalize_function(partial(f2, b=2))) assert (normalize_function(compose(f2, f3)) == normalize_function(compose(f2, f3))) assert (normalize_function(compose(f2, f3)) != normalize_function(compose(f2, f1))) assert normalize_function(curry(f2)) == normalize_function(curry(f2)) assert normalize_function(curry(f2)) != normalize_function(curry(f1)) assert (normalize_function(curry(f2, b=1)) == normalize_function(curry(f2, b=1))) assert (normalize_function(curry(f2, b=1)) != normalize_function(curry(f2, b=2)))
def graph_embed(data, target=None, confidence=1, n_iter=20, sample_fraction=.7, sample_p=None, feature_fraction=1, feature_p=None, alpha=0, gamma=1, beta=30): """Provide 2D embedding of high dimensional data.""" # set parameters in all functions if sample_p is not None: sample_p = sample_p / np.sum(sample_p) sample_p = sample_p + 0.01 sample_p = sample_p / np.sum(sample_p) if feature_p is not None: feature_p = feature_p / np.sum(feature_p) feature_p = feature_p + 0.01 feature_p = feature_p / np.sum(feature_p) _sample = curry(sample)(sample_fraction=sample_fraction, sample_p=sample_p, feature_fraction=feature_fraction, feature_p=feature_p) _step_func = curry(step_func)(alpha=alpha, gamma=gamma, beta=beta) _add_edge_length = curry(add_edge_length)(func=_step_func) _make_knn_graph = curry(make_knn_graph)(size=len(data), target=target, confidence=confidence) def make_graph(make_tree): count_dict = defaultdict(lambda: defaultdict(int)) lengths_dict = defaultdict(lambda: defaultdict(list)) for i in range(n_iter): sample_data, sample_ids, sample_feature_ids = _sample(data) sample_id_inv_map = make_id_map(sample_ids) distance_mtx = euclidean_distances(sample_data) rank_mtx = compute_ranks(distance_mtx) tree = make_tree(distance_mtx, sample_ids) _add_edge_length(tree, rank_mtx, sample_id_inv_map) update(count_dict, lengths_dict, tree) graph = _make_knn_graph(count_dict, lengths_dict) return graph mst_graph = make_graph(make_mst_tree) qks_graph = make_graph(make_qks_tree) graph = nx.compose(mst_graph, qks_graph) return graph
def list_snapshots(self, query: dict) -> Iterator[ModelSnapshot]: """ List all datasets :param query: :return: """ # TODO: Not completely pure pipeline return excepting_pipe( self._snapshots.values(), curry(sorted, key=lambda snap: getattr(snap, query['sortby']), reverse=query['order']), curry(partition_all)(query['limit'] if query['limit'] > 0 else len( self._snapshots.values())), list, curry(get, default=[])(query['offset']))
def test_EqualityHashKey_callable_key(): # Common simple hash key functions. EqualityHashLen = curry(EqualityHashKey, len) EqualityHashType = curry(EqualityHashKey, type) EqualityHashId = curry(EqualityHashKey, id) EqualityHashFirst = curry(EqualityHashKey, first) data1 = [[], [1], (), (1, ), {}, {1: 2}] data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}] assert list(unique(data1 * 3, key=EqualityHashLen)) == data1 assert list(unique(data2 * 3, key=EqualityHashLen)) == data2 assert list(unique(data1 * 3, key=EqualityHashType)) == data1 assert list(unique(data2 * 3, key=EqualityHashType)) == data2 assert list(unique(data1 * 3, key=EqualityHashId)) == data1 assert list(unique(data2 * 3, key=EqualityHashId)) == data2 assert list(unique(data2 * 3, key=EqualityHashFirst)) == data2
def test_EqualityHashKey_callable_key(): # Common simple hash key functions. EqualityHashLen = curry(EqualityHashKey, len) EqualityHashType = curry(EqualityHashKey, type) EqualityHashId = curry(EqualityHashKey, id) EqualityHashFirst = curry(EqualityHashKey, first) data1 = [[], [1], (), (1,), {}, {1: 2}] data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}] assert list(unique(data1*3, key=EqualityHashLen)) == data1 assert list(unique(data2*3, key=EqualityHashLen)) == data2 assert list(unique(data1*3, key=EqualityHashType)) == data1 assert list(unique(data2*3, key=EqualityHashType)) == data2 assert list(unique(data1*3, key=EqualityHashId)) == data1 assert list(unique(data2*3, key=EqualityHashId)) == data2 assert list(unique(data2*3, key=EqualityHashFirst)) == data2
def test_check_curry_1(): # Ensure the decorator works when curried _func_sig = check(REQUIRE, data=[str, str])(func_sig) fc = toolz.curry(_func_sig, **{REQUIRE: {'data': ['nir', 'red']}, OUTPUT: {'data': ['ndvi']}}) fc({})
def internal_tessssst_oneclass(): # python -c "import score as s; s.internal_tessssst_oneclass()" # lets get sum data from toolz import curry, pipe from eden_chem.io.pubchem import download from eden_chem.io.rdkitutils import sdf_to_nx download_active = curry(download)(active=True) download_inactive = curry(download)(active=False) def get_pos_graphs(assay_id): return pipe(assay_id, download_active, sdf_to_nx, list) assay_id = '624249' gr = get_pos_graphs(assay_id) est = OneClassEstimator().fit(gr) print(est.decision_function(gr))
def versions(self, dataset_id: UUID, query: dict) -> Iterator[DatasetVersion]: """ Lists all versions of a given dataset :param dataset_id: :param query: :return: """ # TODO: Not completely pure pipeline return excepting_pipe( self._versions.values(), curry(filter)(lambda v: v.parent_id == ensure_uuid(dataset_id)), curry(sorted, key=lambda ds: getattr(ds, query['sortby']), reverse=query['order']), partition_versions(query), list, curry(get, default=[])(query['offset']))
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None): """ General version of reductions >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False) # doctest: +SKIP """ if axis is None: axis = tuple(range(x.ndim)) if isinstance(axis, int): axis = (axis,) chunk2 = partial(chunk, axis=axis, keepdims=True) aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims) inds = tuple(range(x.ndim)) tmp = atop(chunk2, next(names), inds, x, inds) inds2 = tuple(i for i in inds if i not in axis) result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)), next(names), inds2, tmp, inds, dtype=dtype) if keepdims: dsk = result.dask.copy() for k in flatten(result._keys()): k2 = (k[0],) + insert_many(k[1:], axis, 0) dsk[k2] = dsk.pop(k) blockdims = insert_many(result.blockdims, axis, [1]) return Array(dsk, result.name, blockdims=blockdims, dtype=dtype) else: return result
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop(curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data), dtype=expr.dshape.measure.to_numpy_dtype())
def get(self): # type: () -> PluginType """Return the currently active plugin.""" if self._options: return curry(self._active, **self._options) else: return self._active
def test_EqualityHashKey_default_key(): EqualityHashDefault = curry(EqualityHashKey, None) L1 = [1] L2 = [2] data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] set1 = set(map(EqualityHashDefault, data1)) set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) assert set1 == set2 assert len(set1) == 5 # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` T0 = () T1 = (1,) data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)])) data2.extend([T0, T1, (), (1,)]) set3 = set(data2) assert set3 == set([(), (1,), EqualityHashDefault(()), EqualityHashDefault((1,))]) assert len(set3) == 4 assert EqualityHashDefault(()) in set3 assert EqualityHashDefault((1,)) in set3 # Miscellaneous E1 = EqualityHashDefault(L1) E2 = EqualityHashDefault(L2) assert str(E1) == '=[1]=' assert repr(E1) == '=[1]=' assert E1 != E2 assert not (E1 == E2) assert E1 == EqualityHashDefault(L1) assert not (E1 != EqualityHashDefault(L1)) assert E1 != L1 assert not (E1 == L1)
def main(haunted_place_file, cache_file, output_file): load_dotenv(find_dotenv()) google_api_key = os.environ.get("GOOGLE_MAPS_API_KEY") geo_cache = set() if cache_file: geo_cache = load_geo_cache(csv.reader(cache_file)) haunted_places = pd.read_csv(haunted_place_file) null_state = haunted_places.state.isnull() null_city = haunted_places.city.isnull() haunted_place_locations = haunted_places.loc[ ~null_state & ~null_city, ['state','city'] ].drop_duplicates() create_geo_request = curry(_create_geo_request)(google_api_key) writer = csv.writer(output_file) for _,row in haunted_place_locations.iterrows(): # Skip if it's already in the cache. if (row["state"], row["city"]) in geo_cache: continue # Otherwise write the row. writer.writerow([ row["state"], row["city"], create_geo_request(row["state"], row["city"]) ])
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None): """ General version of reductions >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False) # doctest: +SKIP """ if axis is None: axis = tuple(range(x.ndim)) if isinstance(axis, int): axis = (axis, ) chunk2 = partial(chunk, axis=axis, keepdims=True) aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims) inds = tuple(range(x.ndim)) tmp = atop(chunk2, next(names), inds, x, inds) inds2 = tuple(i for i in inds if i not in axis) result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)), next(names), inds2, tmp, inds, dtype=dtype) if keepdims: dsk = result.dask.copy() for k in flatten(result._keys()): k2 = (k[0], ) + insert_many(k[1:], axis, 0) dsk[k2] = dsk.pop(k) blockdims = insert_many(result.blockdims, axis, [1]) return Array(dsk, result.name, blockdims=blockdims, dtype=dtype) else: return result
def wrap_func_size_as_kwarg(func, *args, **kwargs): """ Transform np.random function into blocked version """ if 'shape' in kwargs and 'size' not in kwargs: kwargs['size'] = kwargs.pop('shape') if 'size' not in kwargs: args, size = args[:-1], args[-1] else: size = kwargs.pop('size') if not isinstance(size, (tuple, list)): size = (size,) blockshape = kwargs.pop('blockshape', None) blockdims = kwargs.pop('blockdims', None) name = kwargs.pop('name', None) if not blockdims and blockshape: blockdims = blockdims_from_blockshape(size, blockshape) name = name or next(names) keys = product([name], *[range(len(bd)) for bd in blockdims]) sizes = product(*blockdims) if not kwargs: vals = ((func,) + args + (size,) for size in sizes) else: vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes) dsk = dict(zip(keys, vals)) return Array(dsk, name, shape=size, blockdims=blockdims)
def draw_target(self): '''Usage:aimmat.draw_fps()(center)''' def draw(center, img): center = (int(center[0]), int(center[1])) cv2.circle(img, center, 5, (50, 200, 200), -1) return img return curry(draw)
def load_all_users(): ''' Returns a pd.DataFrame with the information of all the users''' map = tlz.curry(map) dataset = tlz.pipe(users, map(parse_exp03_filename), map(user_pipe), accumulate_users) dataset.insert(0, 'user', sorted(users * 3)) return dataset
def compute_down(expr, data, chunksize=2**20, map=map, **kwargs): leaf = expr._leaves()[0] # If the bottom expression is a projection or field then want to do # compute_up first children = set(e for e in expr._traverse() if isinstance(e, Expr) and any(i is expr._leaves()[0] for i in e._inputs)) if len(children) == 1 and isinstance(first(children), (Field, Projection)): raise NotImplementedError() chunk = symbol('chunk', chunksize * leaf.schema) (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk) data_parts = partitions(data, chunksize=(chunksize,)) parts = list(map(curry(compute_chunk, data, chunk, chunk_expr), data_parts)) if isinstance(parts[0], np.ndarray): intermediate = np.concatenate(parts) elif isinstance(parts[0], pd.DataFrame): intermediate = pd.concat(parts) elif isinstance(parts[0], Iterable): intermediate = list(concat(parts)) else: raise TypeError( "Don't know how to concatenate objects of type %s" % type(parts[0])) return compute(agg_expr, {agg: intermediate})
def map(self, func): name = next(names) if takes_multiple_arguments(func): func = curry(apply, func) dsk = dict(((name, i), (list, (map, func, (self.name, i)))) for i in range(self.npartitions)) return Bag(merge(self.dask, dsk), name, self.npartitions)
def wrap_func_shape_as_first_arg(func, *args, **kwargs): """ Transform np.random function into blocked version """ if 'shape' not in kwargs: shape, args = args[0], args[1:] else: shape = kwargs.pop('shape') if not isinstance(shape, (tuple, list)): shape = (shape,) chunks = kwargs.pop('chunks', None) chunks = normalize_chunks(chunks, shape) name = kwargs.pop('name', None) dtype = kwargs.pop('dtype', None) if dtype is None: dtype = func(shape, *args, **kwargs).dtype name = name or next(names) keys = product([name], *[range(len(bd)) for bd in chunks]) shapes = product(*chunks) func = curry(func, dtype=dtype, **kwargs) vals = ((func,) + (s,) + args for s in shapes) dsk = dict(zip(keys, vals)) return Array(dsk, name, chunks, dtype=dtype)
def wrap_func_size_as_kwarg(func, *args, **kwargs): """ Transform np.random function into blocked version """ if 'shape' in kwargs and 'size' not in kwargs: kwargs['size'] = kwargs.pop('shape') if 'size' not in kwargs: args, size = args[:-1], args[-1] else: size = kwargs.pop('size') if not isinstance(size, (tuple, list)): size = (size, ) blockshape = kwargs.pop('blockshape', None) blockdims = kwargs.pop('blockdims', None) name = kwargs.pop('name', None) if not blockdims and blockshape: blockdims = blockdims_from_blockshape(size, blockshape) name = name or next(names) keys = product([name], *[range(len(bd)) for bd in blockdims]) sizes = product(*blockdims) if not kwargs: vals = ((func, ) + args + (size, ) for size in sizes) else: vals = ((curry(func, *args, size=size, **kwargs), ) for size in sizes) dsk = dict(zip(keys, vals)) return Array(dsk, name, shape=size, blockdims=blockdims)
def use_with(function, transformers): """Accepts a function fn and a list of transformer functions and returns a new curried function. When the new function is invoked, it calls the function fn with parameters consisting of the result of calling each supplied handler on successive arguments to the new function. If more arguments are passed to the returned function than transformer functions, those arguments are passed directly to fn as additional parameters. If you expect additional arguments that don't need to be transformed, although you can ignore them, it's best to pass an identity function so that the new function reports the correct arity""" try: args = inspect.getfullargspec(function).args except TypeError: args = ["argument" + str(i) for i, x in enumerate(transformers)] F = {function.__name__: function} run = [] for i, t in enumerate(transformers): F[t.__name__] = t try: args[i] except IndexError: args.append("argument" + str(i)) run.append(t.__name__ + "(" + args[i] + ")") f = ("lambda " + ", ".join(args[:len(transformers)]) + ": " + function.__name__ + "(" + ",".join(run) + ")") return curry(eval(f, F))
def compute_down(expr, data, chunksize=None, map=map, **kwargs): leaf = expr._leaves()[0] if chunksize is None: chunksize = get_chunksize(data) # If the bottom expression is a projection or field then want to do # compute_up first children = set(e for e in expr._traverse() if isinstance(e, Expr) and any(i is expr._leaves()[0] for i in e._inputs)) if len(children) == 1 and isinstance(first(children), (Field, Projection)): raise MDNotImplementedError() chunk = symbol('chunk', chunksize * leaf.schema) (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk) data_parts = partitions(data, chunksize=(chunksize, )) parts = list(map(curry(compute_chunk, data, chunk, chunk_expr), data_parts)) if isinstance(parts[0], np.ndarray): intermediate = np.concatenate(parts) elif isinstance(parts[0], pd.DataFrame): intermediate = pd.concat(parts) elif isinstance(parts[0], Iterable): intermediate = list(concat(parts)) else: raise TypeError("Don't know how to concatenate objects of type %r" % type(parts[0]).__name__) return compute(agg_expr, {agg: intermediate})
def __init__(self, filename, *, label_only=False, **pdr_kwargs): # default values for attributes hopefully set by PDR self.LABEL = {} self.pointers = [] if not label_only: super().__init__(filename, **pdr_kwargs) else: # this might be a messy option, but implementing it now for # convenience. the label_only flag allows this class to be used # as _just_ a label reader / converter setattr(self, "filename", filename) # Try PDS3 options setattr(self, "LABEL", pdr.read_label(filename)) setattr( self, "pointers", [k for k in self.LABEL.keys() if k[0] == "^"], ) _ = [ setattr( self, pointer[1:] if pointer.startswith("^") else pointer, pdr.WORKING_POINTER_TO_FUNCTION_MAP[pointer](filename, self.LABEL), ) for pointer in self.pointers ] setattr(self, "convert_label", curry(_convert_label)(self))
def curry_pipeline_task(func, spec): return curry( func, **{ REQUIRE: spec[REQUIRE], OUTPUT: spec[OUTPUT], 'config': spec.get('config', {}) })
def create_geocoder(city_file): """ Creates a geocoder function for cities that takes a city name and region and returns the latitude and longitude. """ reader = DictReader(city_file) # Create a blank hash to load. # (state_iso,city_name) => (lat, lon, records) # state/city collisions are resolved by whichever one has the most records. # Not 100% that's the right call but it's a start. geocoder_hash = {} for row in reader: row_key = ( row["country_iso_code"].lower(), row["city_name"].lower(), ) if (row_key not in geocoder_hash) or (int(row["num_blocks"]) > geocoder_hash[row_key][2]): geocoder_hash[row_key] = ( float(row["latitude"]), float(row["longitude"]), int(row["num_blocks"]), ) print(row_key) print(geocoder_hash[row_key]) # Bind the geocoder hash to the geocoder template. return curry(_geocoder_template)(geocoder_hash)
def wrap_func_size_as_kwarg(func, *args, **kwargs): """ Transform np.random function into blocked version """ if 'shape' in kwargs and 'size' not in kwargs: kwargs['size'] = kwargs.pop('shape') if 'size' not in kwargs: args, size = args[:-1], args[-1] else: size = kwargs.pop('size') if not isinstance(size, (tuple, list)): size = (size,) chunks = kwargs.pop('chunks', None) chunks = normalize_chunks(chunks, size) name = kwargs.pop('name', None) dtype = kwargs.pop('dtype', None) if dtype is None: kw = kwargs.copy(); kw['size'] = (0,) dtype = func(*args, **kw).dtype name = name or next(names) keys = product([name], *[range(len(bd)) for bd in chunks]) sizes = product(*chunks) if not kwargs: vals = ((func,) + args + (size,) for size in sizes) else: vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes) dsk = dict(zip(keys, vals)) return Array(dsk, name, chunks, dtype=dtype)
def test_EqualityHashKey_default_key(): EqualityHashDefault = curry(EqualityHashKey, None) L1 = [1] L2 = [2] data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] set1 = set(map(EqualityHashDefault, data1)) set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) assert set1 == set2 assert len(set1) == 5 # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` T0 = () T1 = (1, ) data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )])) data2.extend([T0, T1, (), (1, )]) set3 = set(data2) assert set3 == set([(), (1, ), EqualityHashDefault(()), EqualityHashDefault((1, ))]) assert len(set3) == 4 assert EqualityHashDefault(()) in set3 assert EqualityHashDefault((1, )) in set3 # Miscellaneous E1 = EqualityHashDefault(L1) E2 = EqualityHashDefault(L2) assert str(E1) == '=[1]=' assert repr(E1) == '=[1]=' assert E1 != E2 assert not (E1 == E2) assert E1 == EqualityHashDefault(L1) assert not (E1 != EqualityHashDefault(L1)) assert E1 != L1 assert not (E1 == L1)
def wrap_func_shape_as_first_arg(func, *args, **kwargs): """ Transform np.random function into blocked version """ if 'shape' not in kwargs: shape, args = args[0], args[1:] else: shape = kwargs.pop('shape') dtype = kwargs.pop('dtype', None) if not isinstance(shape, (tuple, list)): shape = (shape, ) blockshape = kwargs.pop('blockshape', None) blockdims = kwargs.pop('blockdims', None) name = kwargs.pop('name', None) if not blockdims and blockshape: blockdims = blockdims_from_blockshape(shape, blockshape) if dtype is None: dtype = func(shape, *args, **kwargs).dtype name = name or next(names) keys = product([name], *[range(len(bd)) for bd in blockdims]) shapes = product(*blockdims) func = curry(func, dtype=dtype, **kwargs) vals = ((func, ) + (s, ) + args for s in shapes) dsk = dict(zip(keys, vals)) return Array(dsk, name, shape=shape, blockdims=blockdims, dtype=dtype)
def nd2_reader(path): """Take a path or list of paths and return a list of LayerData tuples. Readers are expected to return data as a list of tuples, where each tuple is (data, [add_kwargs, [layer_type]]), "add_kwargs" and "layer_type" are both optional. Parameters ---------- path : str or list of str Path to file, or list of paths. Returns ------- layer_data : list of tuples A list of LayerData tuples where each tuple in the list contains (data, metadata, layer_type), where data is a numpy array, metadata is a dict of keyword arguments for the corresponding viewer.add_* method in napari, and layer_type is a lower-case string naming the type of layer. Both "meta", and "layer_type" are optional. napari will default to layer_type=="image" if not provided """ with ND2Reader(path) as nd2_data: channels = nd2_data.metadata['channels'] n_timepoints = nd2_data.sizes['t'] z_depth = nd2_data.sizes['z'] frame_shape = (z_depth, *nd2_data.frame_shape) frame_dtype = nd2_data._dtype nd2vol = tz.curry(get_nd2reader_nd2_vol) layer_list = get_layer_list(channels, nd2vol, path, frame_shape, frame_dtype, n_timepoints) return layer_list
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop( curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data) )
def rowterator(sel, chunksize=chunksize): with sel.bind.connect() as conn: result = conn.execute(sel) yield result.keys() for rows in iter_except(curry(result.fetchmany, size=chunksize), sa.exc.ResourceClosedError): yield rows
def pickle_apply_async(apply_async, func, args=(), kwds={}, func_loads=None, func_dumps=None): dumps = func_dumps or _globals.get('func_dumps') or _dumps sfunc = dumps(func) sargs = dumps(args) skwds = dumps(kwds) return apply_async(curry(apply_func, loads=func_loads), args=[sfunc, sargs, skwds])
def test_EqualityHashKey_index_key(): d1 = {'firstname': 'Alice', 'age': 21, 'data': {}} d2 = {'firstname': 'Alice', 'age': 34, 'data': {}} d3a = {'firstname': 'Bob', 'age': 56, 'data': {}} d3b = {'firstname': 'Bob', 'age': 56, 'data': {}} EqualityHashFirstname = curry(EqualityHashKey, 'firstname') assert list(unique(3*[d1, d2, d3a, d3b], key=EqualityHashFirstname)) == [d1, d2, d3a] EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age']) assert list(unique(3*[d1, d2, d3a, d3b], key=EqualityHashFirstnameAge)) == [d1, d2, d3a] list1 = [0] * 10 list2 = [0] * 100 list3a = [1] * 10 list3b = [1] * 10 EqualityHash0 = curry(EqualityHashKey, 0) assert list(unique(3*[list1, list2, list3a, list3b], key=EqualityHash0)) == [list1, list2, list3a]
def rowiterator(sel, chunksize=chunksize): with getbind(sel, bind).connect() as conn: result = conn.execute(sel) for rows in iter_except(curry(result.fetchmany, size=chunksize), sa.exc.ResourceClosedError): if rows: yield rows else: return
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop( curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data), dtype=expr.dshape.measure.to_numpy_dtype() )
def append_text_to_s3(s3, data, multipart=False, part_size=5 << 20, **kwargs): if multipart: with start_multipart_upload_operation(s3) as multipart_upload: with open(data.path, 'rb') as f: for part_number, part in enumerate(iter(curry(f.read, part_size), ''), start=1): multipart_upload.upload_part_from_file(BytesIO(part), part_num=part_number) return s3 s3.object.set_contents_from_filename(data.path) return s3
def __call__(cls, *args, **kwargs): """This is where the currying magic occurs. The __call__ in a metaclass is analogous to __new__ in a regular class. """ @wraps(cls, updated=[]) def currier(*a, **k): return super(Curried, cls).__call__(*a, **k) # there's odd behavior when composed with other # metaclasses if done as one function call... return curry(currier)(*args, **kwargs)
def topk(self, k, key=None): a = next(names) b = next(names) if key: topk = curry(heapq.nlargest, key=key) else: topk = heapq.nlargest dsk = dict(((a, i), (list, (topk, k, (self.name, i)))) for i in range(self.npartitions)) dsk2 = {(b, 0): (list, (topk, k, (concat, list(dsk.keys()))))} return Bag(merge(self.dask, dsk, dsk2), b, 1)
def distinct(self): """ Distinct elements of collection Unordered without repeats. >>> b = from_sequence(['Alice', 'Bob', 'Alice']) >>> sorted(b.distinct()) ['Alice', 'Bob'] """ return self.reduction(set, curry(apply, set.union), out_type=Bag, name='distinct')
def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False): if not PY3: return orig_check_valid = check_valid check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs) f = toolz.curry(make_func('x, y, z=0')) assert check_valid(f) assert check_valid(f, 1) assert check_valid(f, 1, 2) assert check_valid(f, 1, 2, 3) assert check_valid(f, 1, 2, 3, 4) is False assert check_valid(f, invalid_keyword=True) is False assert check_valid(f(1)) assert check_valid(f(1), 2) assert check_valid(f(1), 2, 3) assert check_valid(f(1), 2, 3, 4) is False assert check_valid(f(1), x=2) is False assert check_valid(f(1), y=2) assert check_valid(f(x=1), 2) is False assert check_valid(f(x=1), y=2) assert check_valid(f(y=2), 1) assert check_valid(f(y=2), 1, z=3) assert check_valid(f(y=2), 1, 3) is False f = toolz.curry(make_func('x, y, z=0'), 1, x=1) assert check_valid(f) is False assert check_valid(f, z=3) is False f = toolz.curry(make_func('x, y, *args, z')) assert check_valid(f) assert check_valid(f, 0) assert check_valid(f(1), 0) assert check_valid(f(1, 2), 0) assert check_valid(f(1, 2, 3), 0) assert check_valid(f(1, 2, 3, 4), 0) assert check_valid(f(1, 2, 3, 4), z=4) assert check_valid(f(x=1)) assert check_valid(f(x=1), 1) is False assert check_valid(f(x=1), y=2)
def wrap(wrap_func, func, **kwargs): f = curry(wrap_func, func, **kwargs) f.__doc__ = """ Blocked variant of %(name)s Follows the signature of %(name)s exactly except that it also requires a keyword argument chunks=(...) Original signature follows below. """ % {'name': func.__name__} + func.__doc__ f.__name__ = 'blocked_' + func.__name__ return f
def map(self, func): """ Map a function across all elements in collection >>> import dask.bag as db >>> b = db.from_sequence(range(5)) >>> list(b.map(lambda x: x * 10)) # doctest: +SKIP [0, 10, 20, 30, 40] """ name = next(names) if takes_multiple_arguments(func): func = curry(apply, func) dsk = dict(((name, i), (list, (map, func, (self.name, i)))) for i in range(self.npartitions)) return Bag(merge(self.dask, dsk), name, self.npartitions)
def compute_down(expr, data, map=map, **kwargs): leaf = expr._leaves()[0] (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr) parts = list(map(curry(compute_chunk, chunk, chunk_expr), data)) if isinstance(parts[0], np.ndarray): intermediate = np.concatenate(parts) elif isinstance(parts[0], pd.DataFrame): intermediate = pd.concat(parts) elif isinstance(parts[0], (Iterable, Iterator)): intermediate = concat(parts) return compute(agg_expr, {agg: intermediate})
def pickle_apply_async(apply_async, func, args=(), func_loads=None, func_dumps=None): # XXX: To deal with deserialization errors of tasks, this version of # apply_async doesn't actually match that of `pool.apply_async`. It's # customized to fit the signature of `dask.async.execute_task`, which is # the only function ever actually passed as `func`. This is a bit of a # hack, but it works pretty well. If the signature of `execute_task` # changes, then this will need to be changed as well. dumps = func_dumps or _globals.get('func_dumps') or _dumps key, task, data, queue, get_id, raise_on_exception = args sfunc = dumps(func) may_fail = dumps((task, data)) wont_fail = dumps((key, queue, get_id, raise_on_exception)) return apply_async(curry(apply_func, loads=func_loads), args=[sfunc, may_fail, wont_fail])