Пример #1
0
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol(
        'chunk',
        DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child,
                                                 expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Пример #2
0
def test_introspect_curry_py3():
    if not PY3:
        return
    f = toolz.curry(make_func(''))
    assert num_required_args(f) == 0
    assert is_arity(0, f)
    assert has_varargs(f) is False
    assert has_keywords(f) is False

    f = toolz.curry(make_func('x'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)  # A side-effect of being curried

    f = toolz.curry(make_func('x, y, z=0'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert is_arity(2, f) is False
    assert is_arity(3, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)

    f = toolz.curry(make_func('*args, **kwargs'))
    assert num_required_args(f) == 0
    assert has_varargs(f)
    assert has_keywords(f)
Пример #3
0
def test_introspect_curry_py3():
    if not PY3:
        return
    f = toolz.curry(make_func(''))
    assert num_required_args(f) == 0
    assert is_arity(0, f)
    assert has_varargs(f) is False
    assert has_keywords(f) is False

    f = toolz.curry(make_func('x'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)  # A side-effect of being curried

    f = toolz.curry(make_func('x, y, z=0'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert is_arity(2, f) is False
    assert is_arity(3, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)

    f = toolz.curry(make_func('*args, **kwargs'))
    assert num_required_args(f) == 0
    assert has_varargs(f)
    assert has_keywords(f)
Пример #4
0
def load_objects_from_storage(metadata_filename: str,
                              class_factory: GenericObject,
                              root_directory: Path) -> dict:
    return excepting_pipe(list_files(root_directory, metadata_filename),
                          curry(map)(load_json),
                          curry(map)(class_factory.from_json),
                          curry(map)(lambda _: (_.id, _)), dict)
Пример #5
0
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) +
                                        (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Пример #6
0
    def fold(self, binop, combine=None, initial=no_default, split_every=None):
        """ Parallelizable reduction

        Fold is like the builtin function ``reduce`` except that it works in
        parallel.  Fold takes two binary operator functions, one to reduce each
        partition of our dataset and another to combine results between
        partitions

        1.  ``binop``: Binary operator to reduce within each partition
        2.  ``combine``:  Binary operator to combine results from binop

        Sequentially this would look like the following:

        >>> intermediates = [reduce(binop, part) for part in partitions]  # doctest: +SKIP
        >>> final = reduce(combine, intermediates)  # doctest: +SKIP

        If only one function is given then it is used for both functions
        ``binop`` and ``combine`` as in the following example to compute the
        sum:

        >>> def add(x, y):
        ...     return x + y

        >>> b = from_sequence(range(5))
        >>> b.fold(add).compute()  # doctest: +SKIP
        10

        In full form we provide both binary operators as well as their default
        arguments

        >>> b.fold(binop=add, combine=add, initial=0).compute()  # doctest: +SKIP
        10

        More complex binary operators are also doable

        >>> def add_to_set(acc, x):
        ...     ''' Add new element x to set acc '''
        ...     return acc | set([x])
        >>> b.fold(add_to_set, set.union, initial=set()).compute()  # doctest: +SKIP
        {1, 2, 3, 4, 5}

        See Also
        --------

        Bag.foldby
        """
        token = tokenize(self, binop, combine, initial)
        combine = combine or binop
        a = 'foldbinop-{0}-{1}'.format(funcname(binop), token)
        b = 'foldcombine-{0}-{1}'.format(funcname(combine), token)
        initial = quote(initial)
        if initial is not no_default:
            return self.reduction(curry(_reduce, binop, initial=initial),
                                  curry(_reduce, combine),
                                  split_every=split_every)
        else:
            from toolz.curried import reduce
            return self.reduction(reduce(binop), reduce(combine),
                                  split_every=split_every)
Пример #7
0
def make_fold_vectorize(complexity=3, nbits=15, fold=None, boundaries=None):
    """Curry parameters in vectorizer."""
    vec = Vectorizer(complexity=complexity, nbits=nbits)
    vectorize = curry(lambda vec, graphs: vec.transform(graphs))(vec)

    cwindow_reweight = curry(_window_reweight)(boundaries)
    fold_vectorize = compose(vectorize, map(cwindow_reweight), fold)
    return fold_vectorize
Пример #8
0
    def fold(self, binop, combine=None, initial=no_default, split_every=None):
        """ Parallelizable reduction

        Fold is like the builtin function ``reduce`` except that it works in
        parallel.  Fold takes two binary operator functions, one to reduce each
        partition of our dataset and another to combine results between
        partitions

        1.  ``binop``: Binary operator to reduce within each partition
        2.  ``combine``:  Binary operator to combine results from binop

        Sequentially this would look like the following:

        >>> intermediates = [reduce(binop, part) for part in partitions]  # doctest: +SKIP
        >>> final = reduce(combine, intermediates)  # doctest: +SKIP

        If only one function is given then it is used for both functions
        ``binop`` and ``combine`` as in the following example to compute the
        sum:

        >>> def add(x, y):
        ...     return x + y

        >>> b = from_sequence(range(5))
        >>> b.fold(add).compute()  # doctest: +SKIP
        10

        In full form we provide both binary operators as well as their default
        arguments

        >>> b.fold(binop=add, combine=add, initial=0).compute()  # doctest: +SKIP
        10

        More complex binary operators are also doable

        >>> def add_to_set(acc, x):
        ...     ''' Add new element x to set acc '''
        ...     return acc | set([x])
        >>> b.fold(add_to_set, set.union, initial=set()).compute()  # doctest: +SKIP
        {1, 2, 3, 4, 5}

        See Also
        --------

        Bag.foldby
        """
        combine = combine or binop
        initial = quote(initial)
        if initial is not no_default:
            return self.reduction(curry(_reduce, binop, initial=initial),
                                  curry(_reduce, combine),
                                  split_every=split_every)
        else:
            from toolz.curried import reduce
            return self.reduction(reduce(binop),
                                  reduce(combine),
                                  split_every=split_every)
Пример #9
0
def getgraphs(aid):
    if aid == 'bursi':
        return list(gspan.gspan_to_eden("bursi.pos.gspan")), list(
            gspan.gspan_to_eden("bursi.neg.gspan"))
    download_active = curry(download)(active=True, stepsize=50)
    download_inactive = curry(download)(active=False, stepsize=50)
    active = pipe(aid, download_active, sdf_to_nx, list)
    inactive = pipe(aid, download_inactive, sdf_to_nx, list)
    return active, inactive
Пример #10
0
    def test_urlzsource(self):
        lines4 = []
        with URLZSource('http://www.google.com/robots.txt').open() as f:
            take_and_rstrip = compose(curry(map, lambda l: rstrip(l, '\n')),
                                      curry(take, 4))
            lines4 = list(take_and_rstrip(f))

        print(str(lines4))
        self.assertGreaterEqual(len(lines4), 1,
                                "Failed to get more than 0 lines")
Пример #11
0
def collect_ast():
    type_spec: dict = collections.defaultdict(set)

    def is_python(path: str):
        return path.endswith('.py') or Path(path).is_dir()

    all_python_files = linq.Flow(
        Path(Redy.__file__).parent().collect(is_python)).concat(
            Path(flask.__file__).parent().collect(is_python))._

    for each in all_python_files:
        with each.open('r', encoding='utf8') as file:
            try:
                ast_of_src_code = ast.parse(file.read())
                service = CollectASTTypeStub(type_spec)
                feature(service).just_apply_ast_transformation(ast_of_src_code)
            except SyntaxError:
                # other py version
                pass

    def snd(tp):
        return tp[1]

    stub_code = \
    (linq.Flow(type_spec)
         .map(lambda class_name, fields:
                linq.Flow(fields)
                    .group_by(lambda fst, snd: fst)
                    .map(lambda field_name, pairs:
                            '{}: {}'.format(
                            field_name,
                            compose(
                                str,
                                curry(reduce)(lambda a, b: a.union(b)),
                                curry(map)(snd))(pairs)))
                    .then(
                        compose(
                            'class {}(AST):\n'.format(class_name).__add__,
                            lambda _: textwrap.indent(_, " " * 4),
                            lambda any_code: any_code if any_code else 'pass',
                            '\n'.join))
                    ._)
         .then(
            compose(
                'import typing, abc\nNoneType = None\n'.__add__,
                'class AST(abc.ABC):\n    def __init__(self, *args, lineno: int=None, colno: int=None, **kwargs): pass\n'.__add__,
                '\n'.join
                )))._

    with Path('./').into('ast.pyi').open('w', encoding='utf8') as stub:
        stub.write(stub_code)

    with Path(ast.__file__).parent().into('ast.pyi').open(
            'w', encoding='utf8') as stub:
        stub.write(stub_code)
Пример #12
0
 def list_snapshots_filtered(self, model_id: UUID, git_commit_id: str,
                             query: dict) -> Mapping[UUID, ModelSnapshot]:
     return excepting_pipe(
         self._snapshots.values(),
         curry(filter)(lambda v: v.model.id == ensure_uuid(model_id)),
         curry(filter)(lambda v: v.model_git_commit == git_commit_id),
         curry(sorted,
               key=lambda snap: getattr(snap, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._snapshots.values())), list,
         curry(get, default=[])(query['offset']))
Пример #13
0
 def list_results(self, query: dict) -> Iterator[Result]:
     """
     List all list_results
     :param query:
     :return:
     """
     return excepting_pipe(
         self._results.values(),
         curry(sorted,
               key=lambda x: getattr(x, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._results.values())), list,
         curry(get, default=[])(query['offset']))
Пример #14
0
def test_normalize_function():
    def f1(a, b, c=1):
        pass
    def f2(a, b=1, c=2):
        pass
    def f3(a):
        pass

    assert normalize_function(f2)

    f = lambda a: a
    assert normalize_function(f)

    assert (normalize_function(partial(f2, b=2)) ==
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(partial(f2, b=2)) !=
            normalize_function(partial(f2, b=3)))

    assert (normalize_function(partial(f1, b=2)) !=
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(compose(f2, f3)) ==
            normalize_function(compose(f2, f3)))

    assert (normalize_function(compose(f2, f3)) !=
            normalize_function(compose(f2, f1)))

    assert normalize_function(curry(f2)) == normalize_function(curry(f2))
    assert normalize_function(curry(f2)) != normalize_function(curry(f1))
    assert (normalize_function(curry(f2, b=1)) ==
            normalize_function(curry(f2, b=1)))
    assert (normalize_function(curry(f2, b=1)) !=
            normalize_function(curry(f2, b=2)))
Пример #15
0
def graph_embed(data,
                target=None,
                confidence=1,
                n_iter=20,
                sample_fraction=.7,
                sample_p=None,
                feature_fraction=1,
                feature_p=None,
                alpha=0,
                gamma=1,
                beta=30):
    """Provide 2D embedding of high dimensional data."""
    # set parameters in all functions
    if sample_p is not None:
        sample_p = sample_p / np.sum(sample_p)
        sample_p = sample_p + 0.01
        sample_p = sample_p / np.sum(sample_p)
    if feature_p is not None:
        feature_p = feature_p / np.sum(feature_p)
        feature_p = feature_p + 0.01
        feature_p = feature_p / np.sum(feature_p)
    _sample = curry(sample)(sample_fraction=sample_fraction,
                            sample_p=sample_p,
                            feature_fraction=feature_fraction,
                            feature_p=feature_p)
    _step_func = curry(step_func)(alpha=alpha, gamma=gamma, beta=beta)
    _add_edge_length = curry(add_edge_length)(func=_step_func)
    _make_knn_graph = curry(make_knn_graph)(size=len(data),
                                            target=target,
                                            confidence=confidence)

    def make_graph(make_tree):
        count_dict = defaultdict(lambda: defaultdict(int))
        lengths_dict = defaultdict(lambda: defaultdict(list))
        for i in range(n_iter):
            sample_data, sample_ids, sample_feature_ids = _sample(data)
            sample_id_inv_map = make_id_map(sample_ids)
            distance_mtx = euclidean_distances(sample_data)
            rank_mtx = compute_ranks(distance_mtx)
            tree = make_tree(distance_mtx, sample_ids)
            _add_edge_length(tree, rank_mtx, sample_id_inv_map)
            update(count_dict, lengths_dict, tree)
        graph = _make_knn_graph(count_dict, lengths_dict)
        return graph

    mst_graph = make_graph(make_mst_tree)
    qks_graph = make_graph(make_qks_tree)
    graph = nx.compose(mst_graph, qks_graph)
    return graph
Пример #16
0
 def list_snapshots(self, query: dict) -> Iterator[ModelSnapshot]:
     """
     List all datasets
     :param query:
     :return:
     """
     # TODO: Not completely pure pipeline
     return excepting_pipe(
         self._snapshots.values(),
         curry(sorted,
               key=lambda snap: getattr(snap, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._snapshots.values())), list,
         curry(get, default=[])(query['offset']))
Пример #17
0
def test_EqualityHashKey_callable_key():
    # Common simple hash key functions.
    EqualityHashLen = curry(EqualityHashKey, len)
    EqualityHashType = curry(EqualityHashKey, type)
    EqualityHashId = curry(EqualityHashKey, id)
    EqualityHashFirst = curry(EqualityHashKey, first)
    data1 = [[], [1], (), (1, ), {}, {1: 2}]
    data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
    assert list(unique(data1 * 3, key=EqualityHashLen)) == data1
    assert list(unique(data2 * 3, key=EqualityHashLen)) == data2
    assert list(unique(data1 * 3, key=EqualityHashType)) == data1
    assert list(unique(data2 * 3, key=EqualityHashType)) == data2
    assert list(unique(data1 * 3, key=EqualityHashId)) == data1
    assert list(unique(data2 * 3, key=EqualityHashId)) == data2
    assert list(unique(data2 * 3, key=EqualityHashFirst)) == data2
Пример #18
0
def test_EqualityHashKey_callable_key():
    # Common simple hash key functions.
    EqualityHashLen = curry(EqualityHashKey, len)
    EqualityHashType = curry(EqualityHashKey, type)
    EqualityHashId = curry(EqualityHashKey, id)
    EqualityHashFirst = curry(EqualityHashKey, first)
    data1 = [[], [1], (), (1,), {}, {1: 2}]
    data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
    assert list(unique(data1*3, key=EqualityHashLen)) == data1
    assert list(unique(data2*3, key=EqualityHashLen)) == data2
    assert list(unique(data1*3, key=EqualityHashType)) == data1
    assert list(unique(data2*3, key=EqualityHashType)) == data2
    assert list(unique(data1*3, key=EqualityHashId)) == data1
    assert list(unique(data2*3, key=EqualityHashId)) == data2
    assert list(unique(data2*3, key=EqualityHashFirst)) == data2
Пример #19
0
def test_check_curry_1():
    # Ensure the decorator works when curried
    _func_sig = check(REQUIRE, data=[str, str])(func_sig)
    fc = toolz.curry(_func_sig,
                     **{REQUIRE: {'data': ['nir', 'red']},
                        OUTPUT: {'data': ['ndvi']}})
    fc({})
Пример #20
0
def internal_tessssst_oneclass():
    # python -c "import score as s; s.internal_tessssst_oneclass()"
    # lets get sum data
    from toolz import curry, pipe
    from eden_chem.io.pubchem import download
    from eden_chem.io.rdkitutils import sdf_to_nx
    download_active = curry(download)(active=True)
    download_inactive = curry(download)(active=False)

    def get_pos_graphs(assay_id):
        return pipe(assay_id, download_active, sdf_to_nx, list)

    assay_id = '624249'
    gr = get_pos_graphs(assay_id)
    est = OneClassEstimator().fit(gr)
    print(est.decision_function(gr))
Пример #21
0
 def versions(self, dataset_id: UUID,
              query: dict) -> Iterator[DatasetVersion]:
     """
     Lists all versions of a given dataset
     :param dataset_id:
     :param query:
     :return:
     """
     # TODO: Not completely pure pipeline
     return excepting_pipe(
         self._versions.values(),
         curry(filter)(lambda v: v.parent_id == ensure_uuid(dataset_id)),
         curry(sorted,
               key=lambda ds: getattr(ds, query['sortby']),
               reverse=query['order']), partition_versions(query), list,
         curry(get, default=[])(query['offset']))
Пример #22
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis,)

    chunk2 = partial(chunk, axis=axis, keepdims=True)
    aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims)

    inds = tuple(range(x.ndim))
    tmp = atop(chunk2, next(names), inds, x, inds)

    inds2 = tuple(i for i in inds if i not in axis)

    result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)),
                  next(names), inds2, tmp, inds, dtype=dtype)

    if keepdims:
        dsk = result.dask.copy()
        for k in flatten(result._keys()):
            k2 = (k[0],) + insert_many(k[1:], axis, 0)
            dsk[k2] = dsk.pop(k)
        blockdims = insert_many(result.blockdims, axis, [1])
        return Array(dsk, result.name, blockdims=blockdims, dtype=dtype)
    else:
        return result
Пример #23
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(curry(compute_it, expr, leaves, **kwargs),
                expr_inds,
                *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
                dtype=expr.dshape.measure.to_numpy_dtype())
Пример #24
0
 def get(self):
     # type: () -> PluginType
     """Return the currently active plugin."""
     if self._options:
         return curry(self._active, **self._options)
     else:
         return self._active
Пример #25
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1,)
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)]))
    data2.extend([T0, T1, (), (1,)])
    set3 = set(data2)
    assert set3 == set([(), (1,), EqualityHashDefault(()),
                        EqualityHashDefault((1,))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1,)) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
def main(haunted_place_file, cache_file, output_file):

    load_dotenv(find_dotenv())

    google_api_key = os.environ.get("GOOGLE_MAPS_API_KEY")

    geo_cache = set()
    if cache_file:
        geo_cache = load_geo_cache(csv.reader(cache_file))

    haunted_places = pd.read_csv(haunted_place_file)

    null_state = haunted_places.state.isnull()
    null_city = haunted_places.city.isnull()

    haunted_place_locations = haunted_places.loc[
        ~null_state & ~null_city,
        ['state','city']
    ].drop_duplicates()

    create_geo_request = curry(_create_geo_request)(google_api_key)

    writer = csv.writer(output_file)

    for _,row in haunted_place_locations.iterrows():
        # Skip if it's already in the cache.
        if (row["state"], row["city"]) in geo_cache: continue

        # Otherwise write the row.
        writer.writerow([
            row["state"],
            row["city"],
            create_geo_request(row["state"], row["city"])
        ])
Пример #27
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis, )

    chunk2 = partial(chunk, axis=axis, keepdims=True)
    aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims)

    inds = tuple(range(x.ndim))
    tmp = atop(chunk2, next(names), inds, x, inds)

    inds2 = tuple(i for i in inds if i not in axis)

    result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)),
                  next(names),
                  inds2,
                  tmp,
                  inds,
                  dtype=dtype)

    if keepdims:
        dsk = result.dask.copy()
        for k in flatten(result._keys()):
            k2 = (k[0], ) + insert_many(k[1:], axis, 0)
            dsk[k2] = dsk.pop(k)
        blockdims = insert_many(result.blockdims, axis, [1])
        return Array(dsk, result.name, blockdims=blockdims, dtype=dtype)
    else:
        return result
Пример #28
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size,)

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)
    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(size, blockshape)

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    sizes = product(*blockdims)
    if not kwargs:
        vals = ((func,) + args + (size,) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=size, blockdims=blockdims)
Пример #29
0
 def draw_target(self):
     '''Usage:aimmat.draw_fps()(center)'''
     def draw(center, img):
         center = (int(center[0]), int(center[1]))
         cv2.circle(img, center, 5, (50, 200, 200), -1)
         return img
     return curry(draw)
Пример #30
0
def load_all_users():
    ''' Returns a pd.DataFrame with the information of all the users'''
    map = tlz.curry(map)
    dataset = tlz.pipe(users, map(parse_exp03_filename), map(user_pipe),
                       accumulate_users)
    dataset.insert(0, 'user', sorted(users * 3))
    return dataset
Пример #31
0
def compute_down(expr, data, chunksize=2**20, map=map, **kwargs):
    leaf = expr._leaves()[0]

    # If the bottom expression is a projection or field then want to do
    # compute_up first
    children = set(e for e in expr._traverse()
                   if isinstance(e, Expr)
                   and any(i is expr._leaves()[0] for i in e._inputs))
    if len(children) == 1 and isinstance(first(children), (Field, Projection)):
        raise NotImplementedError()


    chunk = symbol('chunk', chunksize * leaf.schema)
    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk)

    data_parts = partitions(data, chunksize=(chunksize,))

    parts = list(map(curry(compute_chunk, data, chunk, chunk_expr),
                           data_parts))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], Iterable):
        intermediate = list(concat(parts))
    else:
        raise TypeError(
        "Don't know how to concatenate objects of type %s" % type(parts[0]))

    return compute(agg_expr, {agg: intermediate})
Пример #32
0
 def map(self, func):
     name = next(names)
     if takes_multiple_arguments(func):
         func = curry(apply, func)
     dsk = dict(((name, i), (list, (map, func, (self.name, i))))
                     for i in range(self.npartitions))
     return Bag(merge(self.dask, dsk), name, self.npartitions)
Пример #33
0
def wrap_func_shape_as_first_arg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' not in kwargs:
        shape, args = args[0], args[1:]
    else:
        shape = kwargs.pop('shape')

    if not isinstance(shape, (tuple, list)):
        shape = (shape,)

    chunks = kwargs.pop('chunks', None)
    chunks = normalize_chunks(chunks, shape)
    name = kwargs.pop('name', None)

    dtype = kwargs.pop('dtype', None)
    if dtype is None:
        dtype = func(shape, *args, **kwargs).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in chunks])
    shapes = product(*chunks)
    func = curry(func, dtype=dtype, **kwargs)
    vals = ((func,) + (s,) + args for s in shapes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, chunks, dtype=dtype)
Пример #34
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size, )

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)
    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(size, blockshape)

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    sizes = product(*blockdims)
    if not kwargs:
        vals = ((func, ) + args + (size, ) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs), ) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=size, blockdims=blockdims)
Пример #35
0
def use_with(function, transformers):
    """Accepts a function fn and a list of transformer functions and returns a
    new curried function. When the new function is invoked, it calls the
    function fn with parameters consisting of the result of calling each
    supplied handler on successive arguments to the new function.
    If more arguments are passed to the returned function than transformer
    functions, those arguments are passed directly to fn as additional
    parameters. If you expect additional arguments that don't need to be
    transformed, although you can ignore them, it's best to pass an identity
    function so that the new function reports the correct arity"""
    try:
        args = inspect.getfullargspec(function).args
    except TypeError:
        args = ["argument" + str(i) for i, x in enumerate(transformers)]

    F = {function.__name__: function}

    run = []
    for i, t in enumerate(transformers):
        F[t.__name__] = t
        try:
            args[i]
        except IndexError:
            args.append("argument" + str(i))

        run.append(t.__name__ + "(" + args[i] + ")")

    f = ("lambda " + ", ".join(args[:len(transformers)]) + ": " +
         function.__name__ + "(" + ",".join(run) + ")")
    return curry(eval(f, F))
Пример #36
0
def compute_down(expr, data, chunksize=None, map=map, **kwargs):
    leaf = expr._leaves()[0]

    if chunksize is None:
        chunksize = get_chunksize(data)

    # If the bottom expression is a projection or field then want to do
    # compute_up first
    children = set(e for e in expr._traverse()
                   if isinstance(e, Expr) and any(i is expr._leaves()[0]
                                                  for i in e._inputs))
    if len(children) == 1 and isinstance(first(children), (Field, Projection)):
        raise MDNotImplementedError()

    chunk = symbol('chunk', chunksize * leaf.schema)
    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk)

    data_parts = partitions(data, chunksize=(chunksize, ))

    parts = list(map(curry(compute_chunk, data, chunk, chunk_expr),
                     data_parts))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], Iterable):
        intermediate = list(concat(parts))
    else:
        raise TypeError("Don't know how to concatenate objects of type %r" %
                        type(parts[0]).__name__)

    return compute(agg_expr, {agg: intermediate})
 def __init__(self, filename, *, label_only=False, **pdr_kwargs):
     # default values for attributes hopefully set by PDR
     self.LABEL = {}
     self.pointers = []
     if not label_only:
         super().__init__(filename, **pdr_kwargs)
     else:
         # this might be a messy option, but implementing it now for
         # convenience. the label_only flag allows this class to be used
         # as _just_ a label reader / converter
         setattr(self, "filename", filename)
         # Try PDS3 options
         setattr(self, "LABEL", pdr.read_label(filename))
         setattr(
             self,
             "pointers",
             [k for k in self.LABEL.keys() if k[0] == "^"],
         )
         _ = [
             setattr(
                 self,
                 pointer[1:] if pointer.startswith("^") else pointer,
                 pdr.WORKING_POINTER_TO_FUNCTION_MAP[pointer](filename,
                                                              self.LABEL),
             ) for pointer in self.pointers
         ]
     setattr(self, "convert_label", curry(_convert_label)(self))
Пример #38
0
def curry_pipeline_task(func, spec):
    return curry(
        func, **{
            REQUIRE: spec[REQUIRE],
            OUTPUT: spec[OUTPUT],
            'config': spec.get('config', {})
        })
Пример #39
0
def load_all_users():
    ''' Returns a pd.DataFrame with the information of all the users'''
    map = tlz.curry(map)
    dataset = tlz.pipe(users, map(parse_exp03_filename), map(user_pipe),
                       accumulate_users)
    dataset.insert(0, 'user', sorted(users * 3))
    return dataset
def create_geocoder(city_file):
    """ Creates a geocoder function for cities that takes a city name and region
        and returns the latitude and longitude.
    """
    reader = DictReader(city_file)

    # Create a blank hash to load.
    # (state_iso,city_name) => (lat, lon, records)
    # state/city collisions are resolved by whichever one has the most records.
    # Not 100% that's the right call but it's a start.
    geocoder_hash = {}

    for row in reader:
        row_key = (
            row["country_iso_code"].lower(),
            row["city_name"].lower(),
        )

        if (row_key not in geocoder_hash) or (int(row["num_blocks"]) >
                                              geocoder_hash[row_key][2]):
            geocoder_hash[row_key] = (
                float(row["latitude"]),
                float(row["longitude"]),
                int(row["num_blocks"]),
            )
            print(row_key)
            print(geocoder_hash[row_key])

    # Bind the geocoder hash to the geocoder template.
    return curry(_geocoder_template)(geocoder_hash)
Пример #41
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size,)

    chunks = kwargs.pop('chunks', None)
    chunks = normalize_chunks(chunks, size)
    name = kwargs.pop('name', None)

    dtype = kwargs.pop('dtype', None)
    if dtype is None:
        kw = kwargs.copy(); kw['size'] = (0,)
        dtype = func(*args, **kw).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in chunks])
    sizes = product(*chunks)
    if not kwargs:
        vals = ((func,) + args + (size,) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, chunks, dtype=dtype)
Пример #42
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1, )
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )]))
    data2.extend([T0, T1, (), (1, )])
    set3 = set(data2)
    assert set3 == set([(), (1, ),
                        EqualityHashDefault(()),
                        EqualityHashDefault((1, ))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1, )) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
Пример #43
0
 def get(self):
     # type: () -> PluginType
     """Return the currently active plugin."""
     if self._options:
         return curry(self._active, **self._options)
     else:
         return self._active
Пример #44
0
def wrap_func_shape_as_first_arg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' not in kwargs:
        shape, args = args[0], args[1:]
    else:
        shape = kwargs.pop('shape')

    dtype = kwargs.pop('dtype', None)

    if not isinstance(shape, (tuple, list)):
        shape = (shape, )

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)

    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(shape, blockshape)

    if dtype is None:
        dtype = func(shape, *args, **kwargs).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    shapes = product(*blockdims)
    func = curry(func, dtype=dtype, **kwargs)
    vals = ((func, ) + (s, ) + args for s in shapes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=shape, blockdims=blockdims, dtype=dtype)
Пример #45
0
def nd2_reader(path):
    """Take a path or list of paths and return a list of LayerData tuples.

    Readers are expected to return data as a list of tuples, where each tuple
    is (data, [add_kwargs, [layer_type]]), "add_kwargs" and "layer_type" are
    both optional.

    Parameters
    ----------
    path : str or list of str
        Path to file, or list of paths.

    Returns
    -------
    layer_data : list of tuples
        A list of LayerData tuples where each tuple in the list contains
        (data, metadata, layer_type), where data is a numpy array, metadata is
        a dict of keyword arguments for the corresponding viewer.add_* method
        in napari, and layer_type is a lower-case string naming the type of layer.
        Both "meta", and "layer_type" are optional. napari will default to
        layer_type=="image" if not provided
    """
    with ND2Reader(path) as nd2_data:
        channels = nd2_data.metadata['channels']
        n_timepoints = nd2_data.sizes['t']
        z_depth = nd2_data.sizes['z']
        frame_shape = (z_depth, *nd2_data.frame_shape)
        frame_dtype = nd2_data._dtype
        nd2vol = tz.curry(get_nd2reader_nd2_vol)
        layer_list = get_layer_list(channels, nd2vol, path, frame_shape,
                                    frame_dtype, n_timepoints)

    return layer_list
Пример #46
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data)
    )
Пример #47
0
Файл: sql.py Проект: Will-So/odo
    def rowterator(sel, chunksize=chunksize):
        with sel.bind.connect() as conn:
            result = conn.execute(sel)
            yield result.keys()

            for rows in iter_except(curry(result.fetchmany, size=chunksize),
                                    sa.exc.ResourceClosedError):
                yield rows
Пример #48
0
def pickle_apply_async(apply_async, func, args=(), kwds={},
                            func_loads=None, func_dumps=None):
    dumps = func_dumps or _globals.get('func_dumps') or _dumps
    sfunc = dumps(func)
    sargs = dumps(args)
    skwds = dumps(kwds)
    return apply_async(curry(apply_func, loads=func_loads),
                       args=[sfunc, sargs, skwds])
Пример #49
0
def test_EqualityHashKey_index_key():
    d1 = {'firstname': 'Alice', 'age': 21, 'data': {}}
    d2 = {'firstname': 'Alice', 'age': 34, 'data': {}}
    d3a = {'firstname': 'Bob', 'age': 56, 'data': {}}
    d3b = {'firstname': 'Bob', 'age': 56, 'data': {}}
    EqualityHashFirstname = curry(EqualityHashKey, 'firstname')
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstname)) == [d1, d2, d3a]
    EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age'])
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstnameAge)) == [d1, d2, d3a]
    list1 = [0] * 10
    list2 = [0] * 100
    list3a = [1] * 10
    list3b = [1] * 10
    EqualityHash0 = curry(EqualityHashKey, 0)
    assert list(unique(3*[list1, list2, list3a, list3b],
                       key=EqualityHash0)) == [list1, list2, list3a]
Пример #50
0
 def rowiterator(sel, chunksize=chunksize):
     with getbind(sel, bind).connect() as conn:
         result = conn.execute(sel)
         for rows in iter_except(curry(result.fetchmany, size=chunksize),
                                 sa.exc.ResourceClosedError):
             if rows:
                 yield rows
             else:
                 return
Пример #51
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
        dtype=expr.dshape.measure.to_numpy_dtype()
    )
Пример #52
0
def append_text_to_s3(s3, data, multipart=False, part_size=5 << 20, **kwargs):
    if multipart:
        with start_multipart_upload_operation(s3) as multipart_upload:
            with open(data.path, 'rb') as f:
                for part_number, part in enumerate(iter(curry(f.read, part_size), ''), start=1):
                    multipart_upload.upload_part_from_file(BytesIO(part), part_num=part_number)
        return s3

    s3.object.set_contents_from_filename(data.path)
    return s3
Пример #53
0
 def __call__(cls, *args, **kwargs):
     """This is where the currying magic occurs. The __call__ in a metaclass
     is analogous to __new__ in a regular class.
     """
     @wraps(cls, updated=[])
     def currier(*a, **k):
         return super(Curried, cls).__call__(*a, **k)
     # there's odd behavior when composed with other
     # metaclasses if done as one function call...
     return curry(currier)(*args, **kwargs)
Пример #54
0
 def topk(self, k, key=None):
     a = next(names)
     b = next(names)
     if key:
         topk = curry(heapq.nlargest, key=key)
     else:
         topk = heapq.nlargest
     dsk = dict(((a, i), (list, (topk, k, (self.name, i))))
                     for i in range(self.npartitions))
     dsk2 = {(b, 0): (list, (topk, k, (concat, list(dsk.keys()))))}
     return Bag(merge(self.dask, dsk, dsk2), b, 1)
Пример #55
0
    def distinct(self):
        """ Distinct elements of collection

        Unordered without repeats.

        >>> b = from_sequence(['Alice', 'Bob', 'Alice'])
        >>> sorted(b.distinct())
        ['Alice', 'Bob']
        """
        return self.reduction(set, curry(apply, set.union), out_type=Bag,
                              name='distinct')
Пример #56
0
def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False):
    if not PY3:
        return
    orig_check_valid = check_valid
    check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs)

    f = toolz.curry(make_func('x, y, z=0'))
    assert check_valid(f)
    assert check_valid(f, 1)
    assert check_valid(f, 1, 2)
    assert check_valid(f, 1, 2, 3)
    assert check_valid(f, 1, 2, 3, 4) is False
    assert check_valid(f, invalid_keyword=True) is False
    assert check_valid(f(1))
    assert check_valid(f(1), 2)
    assert check_valid(f(1), 2, 3)
    assert check_valid(f(1), 2, 3, 4) is False
    assert check_valid(f(1), x=2) is False
    assert check_valid(f(1), y=2)
    assert check_valid(f(x=1), 2) is False
    assert check_valid(f(x=1), y=2)
    assert check_valid(f(y=2), 1)
    assert check_valid(f(y=2), 1, z=3)
    assert check_valid(f(y=2), 1, 3) is False

    f = toolz.curry(make_func('x, y, z=0'), 1, x=1)
    assert check_valid(f) is False
    assert check_valid(f, z=3) is False

    f = toolz.curry(make_func('x, y, *args, z'))
    assert check_valid(f)
    assert check_valid(f, 0)
    assert check_valid(f(1), 0)
    assert check_valid(f(1, 2), 0)
    assert check_valid(f(1, 2, 3), 0)
    assert check_valid(f(1, 2, 3, 4), 0)
    assert check_valid(f(1, 2, 3, 4), z=4)
    assert check_valid(f(x=1))
    assert check_valid(f(x=1), 1) is False
    assert check_valid(f(x=1), y=2)
Пример #57
0
def wrap(wrap_func, func, **kwargs):
    f = curry(wrap_func, func, **kwargs)
    f.__doc__ = """
    Blocked variant of %(name)s

    Follows the signature of %(name)s exactly except that it also requires a
    keyword argument chunks=(...)

    Original signature follows below.
    """ % {'name': func.__name__} + func.__doc__

    f.__name__ = 'blocked_' + func.__name__
    return f
Пример #58
0
Файл: core.py Проект: esc/dask
    def map(self, func):
        """ Map a function across all elements in collection

        >>> import dask.bag as db
        >>> b = db.from_sequence(range(5))
        >>> list(b.map(lambda x: x * 10))  # doctest: +SKIP
        [0, 10, 20, 30, 40]
        """
        name = next(names)
        if takes_multiple_arguments(func):
            func = curry(apply, func)
        dsk = dict(((name, i), (list, (map, func, (self.name, i))))
                        for i in range(self.npartitions))
        return Bag(merge(self.dask, dsk), name, self.npartitions)
Пример #59
0
def compute_down(expr, data, map=map, **kwargs):
    leaf = expr._leaves()[0]

    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr)

    parts = list(map(curry(compute_chunk, chunk, chunk_expr), data))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], (Iterable, Iterator)):
        intermediate = concat(parts)

    return compute(agg_expr, {agg: intermediate})
Пример #60
0
def pickle_apply_async(apply_async, func, args=(),
                       func_loads=None, func_dumps=None):
    # XXX: To deal with deserialization errors of tasks, this version of
    # apply_async doesn't actually match that of `pool.apply_async`. It's
    # customized to fit the signature of `dask.async.execute_task`, which is
    # the only function ever actually passed as `func`. This is a bit of a
    # hack, but it works pretty well. If the signature of `execute_task`
    # changes, then this will need to be changed as well.
    dumps = func_dumps or _globals.get('func_dumps') or _dumps
    key, task, data, queue, get_id, raise_on_exception = args
    sfunc = dumps(func)
    may_fail = dumps((task, data))
    wont_fail = dumps((key, queue, get_id, raise_on_exception))
    return apply_async(curry(apply_func, loads=func_loads),
                       args=[sfunc, may_fail, wont_fail])