示例#1
0
 def first(self):
     """
     Return the first element of an array
     """
     from bolt.local.array import BoltArrayLocal
     rdd = self._rdd if self._ordered else self._rdd.sortByKey()
     return BoltArrayLocal(rdd.values().first())
示例#2
0
    def reduce(self, func, axis=(0, )):
        """
        Reduce an array along an axis.

        Applies a function of two arguments
        cumlutatively to all arrays along an axis.

        Parameters
        ----------
        func : function
            Function of two arrays that returns a single array

        axis : tuple or int, optional, default=(0,)
            Axis or multiple axes to reduce along.

        Returns
        -------
        BoltSparkArray
        """
        from bolt.local.array import BoltArrayLocal
        from numpy import ndarray

        axis = tupleize(axis)
        swapped = self._align(axis)
        arr = swapped._rdd.values().reduce(func)

        if not isinstance(arr, ndarray):
            # the result of a reduce can also be a scalar
            return arr
        elif arr.shape == (1, ):
            # ndarrays with single values in them should be converted into scalars
            return arr[0]

        return BoltArrayLocal(arr)
示例#3
0
    def _stat(self, axis=None, func=None, name=None):
        """
        Compute a statistic over an axis.

        Can provide either a function (for use in a reduce)
        or a name (for use by a stat counter).

        Parameters
        ----------
        axis : tuple or int, optional, default=None
            Axis to compute statistic over, if None
            will compute over all axes

        func : function, optional, default=None
            Function for reduce, see BoltArraySpark.reduce

        name : str
            A named statistic, see StatCounter
        """
        if axis is None:
            axis = list(range(len(self.shape)))
        axis = tupleize(axis)

        if func and not name:
            return self.reduce(func, axis)

        if name and not func:
            from bolt.local.array import BoltArrayLocal

            swapped = self._align(axis)

            def reducer(left, right):
                return left.combine(right)

            counter = swapped._rdd.values()\
                             .mapPartitions(lambda i: [StatCounter(values=i, stats=name)])\
                             .reduce(reducer)
            res = BoltArrayLocal(getattr(counter, name))
            return res.toscalar()

        else:
            raise ValueError(
                'Must specify either a function or a statistic name.')
示例#4
0
文件: array.py 项目: andrewosh/bolt
    def _stat(self, axis=None, func=None, name=None):
        """
        Compute a statistic over an axis.

        Can provide either a function (for use in a reduce)
        or a name (for use by a stat counter).

        Parameters
        ----------
        axis : tuple or int, optional, default=None
            Axis to compute statistic over, if None
            will compute over all axes

        func : function, optional, default=None
            Function for reduce, see BoltArraySpark.reduce

        name : str
            A named statistic, see StatCounter
        """
        if axis is None:
            axis = list(range(len(self.shape)))
        axis = tupleize(axis)

        if func and not name:
            return self.reduce(func, axis)

        if name and not func:
            from bolt.local.array import BoltArrayLocal

            swapped = self._align(axis)

            def reducer(left, right):
                return left.combine(right)

            counter = swapped._rdd.values()\
                             .mapPartitions(lambda i: [StatCounter(values=i, stats=name)])\
                             .reduce(reducer)
            res = BoltArrayLocal(getattr(counter, name))
            return res.toscalar()

        else:
            raise ValueError('Must specify either a function or a statistic name.')
示例#5
0
    def concatenate(arrays, axis=0):
        """
        Join a sequence of arrays together.

        Parameters
        ----------
        arrays : tuple
            A sequence of array-like e.g. (a1, a2, ...)

        axis : int, optional, default=0
            The axis along which the arrays will be joined.

        Returns
        -------
        BoltArrayLocal
        """
        if not isinstance(arrays, tuple):
            raise ValueError("data type not understood")
        arrays = tuple([asarray(a) for a in arrays])
        from numpy import concatenate
        return BoltArrayLocal(concatenate(arrays, axis))
示例#6
0
    def reduce(self, func, axis=(0, ), keepdims=False):
        """
        Reduce an array along an axis.

        Applies a commutative/associative function of two
        arguments cumulatively to all arrays along an axis.
        Array will be aligned so that the desired set of axes
        are in the keys, which may incur a swap.

        Parameters
        ----------
        func : function
            Function of two arrays that returns a single array

        axis : tuple or int, optional, default=(0,)
            Axis or multiple axes to reduce along.

        Returns
        -------
        BoltArraySpark
        """
        from bolt.local.array import BoltArrayLocal
        from numpy import ndarray

        axis = tupleize(axis)
        swapped = self._align(axis)
        arr = swapped._rdd.values().treeReduce(func, depth=3)

        if keepdims:
            for i in axis:
                arr = expand_dims(arr, axis=i)

        if not isinstance(arr, ndarray):
            # the result of a reduce can also be a scalar
            return arr
        elif arr.shape == (1, ):
            # ndarrays with single values in them should be converted into scalars
            return arr[0]

        return BoltArrayLocal(arr)
示例#7
0
    def array(a, dtype=None, order='C'):
        """
        Create a local bolt array.

        Parameters
        ----------
        a : array-like
            An array, any object exposing the array interface, an
            object whose __array__ method returns an array, or any
            (nested) sequence.

        dtype : data-type, optional, default=None
            The desired data-type for the array. If None, will
            be determined from the data. (see numpy)

        order : {'C', 'F', 'A'}, optional, default='C'
            The order of the array. (see numpy)

        Returns
        -------
        BoltArrayLocal
        """
        return BoltArrayLocal(asarray(a, dtype, order))
示例#8
0
 def tolocal(self):
     """
     Returns a local bolt array by first collecting as an array.
     """
     from bolt.local.array import BoltArrayLocal
     return BoltArrayLocal(self.toarray())
示例#9
0
文件: array.py 项目: kr-hansen/bolt
 def first(self):
     """
     Return the first element of an array
     """
     from bolt.local.array import BoltArrayLocal
     return BoltArrayLocal(self._rdd.values().first())
示例#10
0
 def _wrap(func, shape, dtype, order):
     return BoltArrayLocal(func(shape, dtype, order))
示例#11
0
 def tolocal(self):
     from bolt.local.array import BoltArrayLocal
     return BoltArrayLocal(self.toarray())