Exemplo n.º 1
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1, )
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )]))
    data2.extend([T0, T1, (), (1, )])
    set3 = set(data2)
    assert set3 == {(), (1, ),
                    EqualityHashDefault(()),
                    EqualityHashDefault((1, ))}
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1, )) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
Exemplo n.º 2
0
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.

    inputs:

    ``binop``     - associative operator. The associative property allows us to
                    leverage a parallel map to perform reductions in parallel.
    ``seq``       - a sequence to be aggregated
    ``default``   - an identity element like 0 for ``add`` or 1 for mul

    ``map``       - an implementation of ``map``. This may be parallel and
                    determines how work is distributed.
    ``chunksize`` - Number of elements of ``seq`` that should be handled
                    within a single function call
    ``combine``   - Binary operator to combine two intermediate results.
                    If ``binop`` is of type (total, item) -> total
                    then ``combine`` is of type (total, total) -> total
                    Defaults to ``binop`` for common case of operators like add

    Fold chunks up the collection into blocks of size ``chunksize`` and then
    feeds each of these to calls to ``reduce``. This work is distributed
    with a call to ``map``, gathered back and then refolded to finish the
    computation. In this way ``fold`` specifies only how to chunk up data but
    leaves the distribution of this work to an externally provided ``map``
    function. This function can be sequential or rely on multithreading,
    multiprocessing, or even distributed solutions.

    If ``map`` intends to serialize functions it should be prepared to accept
    and serialize lambdas. Note that the standard ``pickle`` module fails
    here.

    Example
    -------

    >>> # Provide a parallel map to accomplish a parallel sum
    >>> from operator import add
    >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
    10
    """
    if combine is None:
        combine = binop

    chunks = partition_all(chunksize, seq)

    # Evaluate sequence in chunks via map
    if default == no_default:
        results = map(lambda chunk: reduce(binop, chunk), chunks)
    else:
        results = map(lambda chunk: reduce(binop, chunk, default), chunks)

    results = list(results)  # TODO: Support complete laziness

    if len(results) == 1:  # Return completed result
        return results[0]
    else:  # Recurse to reaggregate intermediate results
        return fold(combine, results, map=map, chunksize=chunksize)
Exemplo n.º 3
0
def pluck(ind, seqs, default=no_default):
    """ plucks an element or several elements from each item in a sequence.

    ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
    elements of each item in the sequence.

    This is equivalent to running `map(curried.get(ind), seqs)`

    ``ind`` can be either a single string/index or a list of strings/indices.
    ``seqs`` should be sequence containing sequences or dicts.

    e.g.

    >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
    >>> list(pluck('name', data))
    ['Cheese', 'Pies']
    >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
    [(1, 2), (4, 5)]

    See Also:
        get
        map
    """
    if default == no_default:
        get = getter(ind)
        return map(get, seqs)
    elif isinstance(ind, list):
        return (tuple(_get(item, seq, default) for item in ind)
                for seq in seqs)
    return (_get(ind, seq, default) for seq in seqs)
Exemplo n.º 4
0
    def _to_lists(seq, n=10):
        """iter of iters -> finite list of finite lists
        """
        def initial(s):
            return list(take(n, s))

        return initial(map(initial, seq))
Exemplo n.º 5
0
def mapcat(func, seqs):
    """ Apply func to each sequence in seqs, concatenating results.

    >>> list(mapcat(lambda s: [c.upper() for c in s],
    ...             [["a", "b"], ["c", "d", "e"]]))
    ['A', 'B', 'C', 'D', 'E']
    """
    return concat(map(func, seqs))
Exemplo n.º 6
0
def interleave(seqs):
    """ Interleave a sequence of sequences

    >>> list(interleave([[1, 2], [3, 4]]))
    [1, 3, 2, 4]

    >>> ''.join(interleave(('ABC', 'XY')))
    'AXBYC'

    Both the individual sequences and the sequence of sequences may be infinite

    Returns a lazy iterator
    """
    iters = itertools.cycle(map(iter, seqs))
    while True:
        try:
            for itr in iters:
                yield next(itr)
            return
        except StopIteration:
            predicate = partial(operator.is_not, itr)
            iters = itertools.cycle(itertools.takewhile(predicate, iters))
Exemplo n.º 7
0
def diff(*seqs, **kwargs):
    """ Return those items that differ between sequences

    >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
    [(3, 10)]

    Shorter sequences may be padded with a ``default`` value:

    >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
    [(3, 10), (None, 100)]

    A ``key`` function may also be applied to each item to use during
    comparisons:

    >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
    [('bananas', 'Oranges')]
    """
    N = len(seqs)
    if N == 1 and isinstance(seqs[0], list):
        seqs = seqs[0]
        N = len(seqs)
    if N < 2:
        raise TypeError('Too few sequences given (min 2 required)')
    default = kwargs.get('default', no_default)
    if default == no_default:
        iters = zip(*seqs)
    else:
        iters = zip_longest(*seqs, fillvalue=default)
    key = kwargs.get('key', None)
    if key is None:
        for items in iters:
            if items.count(items[0]) != N:
                yield items
    else:
        for items in iters:
            vals = tuple(map(key, items))
            if vals.count(vals[0]) != N:
                yield items
Exemplo n.º 8
0
def test_map_filter_are_lazy():
    def bad(x):
        raise Exception()

    map(bad, [1, 2, 3])
    filter(bad, [1, 2, 3])