示例#1
0
文件: parallel.py 项目: 183amir/toolz
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.

    inputs:

    ``binop``     - associative operator. The associative property allows us to
                    leverage a parallel map to perform reductions in parallel.
    ``seq``       - a sequence to be aggregated
    ``default``   - an identity element like 0 for ``add`` or 1 for mul

    ``map``       - an implementation of ``map``. This may be parallel and
                    determines how work is distributed.
    ``chunksize`` - Number of elements of ``seq`` that should be handled
                    within a single function call
    ``combine``   - Binary operator to combine two intermediate results.
                    If ``binop`` is of type (total, item) -> total
                    then ``combine`` is of type (total, total) -> total
                    Defaults to ``binop`` for common case of operators like add

    Fold chunks up the collection into blocks of size ``chunksize`` and then
    feeds each of these to calls to ``reduce``. This work is distributed
    with a call to ``map``, gathered back and then refolded to finish the
    computation. In this way ``fold`` specifies only how to chunk up data but
    leaves the distribution of this work to an externally provided ``map``
    function. This function can be sequential or rely on multithreading,
    multiprocessing, or even distributed solutions.

    If ``map`` intends to serialize functions it should be prepared to accept
    and serialize lambdas. Note that the standard ``pickle`` module fails
    here.

    Example
    -------

    >>> # Provide a parallel map to accomplish a parallel sum
    >>> from operator import add
    >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
    10
    """
    if combine is None:
        combine = binop

    chunks = partition_all(chunksize, seq)

    # Evaluate sequence in chunks via map
    if default is no_default:
        results = map(lambda chunk: reduce(binop, chunk), chunks)
    else:
        results = map(lambda chunk: reduce(binop, chunk, default), chunks)

    results = list(results)  # TODO: Support complete laziness

    if len(results) == 1:    # Return completed result
        return results[0]
    else:                    # Recurse to reaggregate intermediate results
        return fold(combine, results, map=map, chunksize=chunksize)
示例#2
0
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.

    inputs:

    ``binop``     - associative operator. The associative property allows us to
                    leverage a parallel map to perform reductions in parallel.
    ``seq``       - a sequence to be aggregated
    ``default``   - an identity element like 0 for ``add`` or 1 for mul

    ``map``       - an implementation of ``map``. This may be parallel and
                    determines how work is distributed.
    ``chunksize`` - Number of elements of ``seq`` that should be handled
                    within a single function call
    ``combine``   - Binary operator to combine two intermediate results.
                    If ``binop`` is of type (total, item) -> total
                    then ``combine`` is of type (total, total) -> total
                    Defaults to ``binop`` for common case of operators like add

    Fold chunks up the collection into blocks of size ``chunksize`` and then
    feeds each of these to calls to ``reduce``. This work is distributed
    with a call to ``map``, gathered back and then refolded to finish the
    computation. In this way ``fold`` specifies only how to chunk up data but
    leaves the distribution of this work to an externally provided ``map``
    function. This function can be sequential or rely on multithreading,
    multiprocessing, or even distributed solutions.

    If ``map`` intends to serialize functions it should be prepared to accept
    and serialize lambdas. Note that the standard ``pickle`` module fails
    here.

    Example
    -------

    >>> # Provide a parallel map to accomplish a parallel sum
    >>> from operator import add
    >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
    10
    """
    if combine is None:
        combine = binop

    chunks = partition_all(chunksize, seq)

    # Evaluate sequence in chunks via map
    if default is no_default:
        results = map(lambda chunk: reduce(binop, chunk), chunks)
    else:
        results = map(lambda chunk: reduce(binop, chunk, default), chunks)

    results = list(results)  # TODO: Support complete laziness

    if len(results) == 1:  # Return completed result
        return results[0]
    else:  # Recurse to reaggregate intermediate results
        return fold(combine, results, map=map, chunksize=chunksize)
示例#3
0
def get_in(keys, coll, default=None, no_default=False):
    """
    Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.

    If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
    ``no_default`` is specified, then it raises KeyError or IndexError.

    ``get_in`` is a generalization of ``operator.getitem`` for nested data
    structures such as dictionaries and lists.

    >>> transaction = {'name': 'Alice',
    ...                'purchase': {'items': ['Apple', 'Orange'],
    ...                             'costs': [0.50, 1.25]},
    ...                'credit card': '5555-1234-1234-1234'}
    >>> get_in(['purchase', 'items', 0], transaction)
    'Apple'
    >>> get_in(['name'], transaction)
    'Alice'
    >>> get_in(['purchase', 'total'], transaction)
    >>> get_in(['purchase', 'items', 'apple'], transaction)
    >>> get_in(['purchase', 'items', 10], transaction)
    >>> get_in(['purchase', 'total'], transaction, 0)
    0
    >>> get_in(['y'], {}, no_default=True)  # doctest: +ELLIPSIS
    Traceback (most recent call last):
        ...
    KeyError: 'y'

    >>> class Person():
    ...     def __init__(self, age):
    ...         self.age = age
    >>> class Age():
    ...     def __init__(self, years, days):
    ...         self.years = years
    ...         self.days = days
    >>> alice = Person(Age(30, 100))
    >>> get_in(['age', 'days'], alice)
    100
    >>> get_in(['age', 'hours'], alice)
    >>> get_in(['age', 'hours'], alice, 4)
    4
    >>> get_in(['occupation'], alice, no_default=True)
    Traceback (most recent call last):
        ...
    AttributeError: Person instance has no attribute 'occupation'

    See Also:
        itertoolz.get
        operator.getitem
    """
    def get(d, key):
        return d[key] if hasattr(d, '__getitem__') else getattr(d, key)

    try:
        return reduce(get, keys, coll)
    except (KeyError, IndexError, AttributeError, TypeError):
        if no_default:
            raise
        return default
示例#4
0
文件: core.py 项目: whilo/toolz
def select_keys(d, keys):
    """
    Returns a new dictionary containing only specified keys.

    The collection keys is converted to a set, so any compatible collection will work.

    >>> select_keys({'a':1,'b':2}, ['a'])
    {'a': 1}
    """
    def mutable_assoc(nd,k):
        nd[k] = d[k]
        return nd

    isec = set(d).intersection(set(keys))
    return reduce(mutable_assoc, isec, {})
示例#5
0
def get_in(keys, coll, default=None, no_default=False):
    """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.

    If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
    ``no_default`` is specified, then it raises KeyError or IndexError.

    ``get_in`` is a generalization of ``operator.getitem`` for nested data
    structures such as dictionaries and lists.

    >>> transaction = {'name': 'Alice',
    ...                'purchase': {'items': ['Apple', 'Orange'],
    ...                             'costs': [0.50, 1.25]},
    ...                'credit card': '5555-1234-1234-1234'}
    >>> get_in(['purchase', 'items', 0], transaction)
    'Apple'
    >>> get_in(['name'], transaction)
    'Alice'
    >>> get_in(['purchase', 'total'], transaction)
    >>> get_in(['purchase', 'items', 'apple'], transaction)
    >>> get_in(['purchase', 'items', 10], transaction)
    >>> get_in(['purchase', 'total'], transaction, 0)
    0
    >>> get_in(['y'], {}, no_default=True)
    Traceback (most recent call last):
        ...
    KeyError: 'y'

    >>> class C:
    ...     def __init__(self, x):
    ...         self.x = x
    >>> a = C(C(1))
    >>> get_in(['x', 'x'], a)
    1
    >>> get_in(['x', 'b'], a, 2)
    2

    See Also:
        itertoolz.get
        operator.getitem
    """
    reducer = flip(
        partial(get, default=(utils.no_default if no_default else default)))
    return reduce(reducer, keys, coll)
示例#6
0
def get_in(keys, coll, default=None, no_default=False):
    """
    Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.

    If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
    ``no_default`` is specified, then it raises KeyError or IndexError.

    ``get_in`` is a generalization of ``operator.getitem`` for nested data
    structures such as dictionaries and lists.

    >>> transaction = {'name': 'Alice',
    ...                'purchase': {'items': ['Apple', 'Orange'],
    ...                             'costs': [0.50, 1.25]},
    ...                'credit card': '5555-1234-1234-1234'}
    >>> get_in(['purchase', 'items', 0], transaction)
    'Apple'
    >>> get_in(['name'], transaction)
    'Alice'
    >>> get_in(['purchase', 'total'], transaction)
    >>> get_in(['purchase', 'items', 'apple'], transaction)
    >>> get_in(['purchase', 'items', 10], transaction)
    >>> get_in(['purchase', 'total'], transaction, 0)
    0
    >>> get_in(['y'], {}, no_default=True)
    Traceback (most recent call last):
        ...
    KeyError: 'y'

    See Also:
        itertoolz.get
        operator.getitem
    """
    try:
        return reduce(operator.getitem, keys, coll)
    except (KeyError, IndexError, TypeError):
        if no_default:
            raise
        return default
示例#7
0
文件: core.py 项目: joyrexus/toolz
def get_in(keys, coll, default=None, no_default=False):
    """
    Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.

    If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
    ``no_default`` is specified, then it raises KeyError or IndexError.

    ``get_in`` is a generalization of ``operator.getitem`` for nested data
    structures such as dictionaries and lists.

    >>> transaction = {'name': 'Alice',
    ...                'purchase': {'items': ['Apple', 'Orange'],
    ...                             'costs': [0.50, 1.25]},
    ...                'credit card': '5555-1234-1234-1234'}
    >>> get_in(['purchase', 'items', 0], transaction)
    'Apple'
    >>> get_in(['name'], transaction)
    'Alice'
    >>> get_in(['purchase', 'total'], transaction)
    >>> get_in(['purchase', 'items', 'apple'], transaction)
    >>> get_in(['purchase', 'items', 10], transaction)
    >>> get_in(['purchase', 'total'], transaction, 0)
    0
    >>> get_in(['y'], {}, no_default=True)
    Traceback (most recent call last):
        ...
    KeyError: 'y'

    See Also:
        itertoolz.get
        operator.getitem
    """
    try:
        return reduce(operator.getitem, keys, coll)
    except (KeyError, IndexError, TypeError):
        if no_default:
            raise
        return default