Exemplo n.º 1
0
        def array(self, data, row_labels=None, column_labels=None, names=None):
            r"""

            Parameters
            ----------
            data : str
                range for data
            row_labels : str, optional
                range for row labels
            column_labels : str, optional
                range for column labels
            names : list of str, optional

            Returns
            -------
            Array
            """
            if row_labels is not None:
                row_labels = np.asarray(self[row_labels])
            if column_labels is not None:
                column_labels = np.asarray(self[column_labels])
            if names is not None:
                labels = (row_labels, column_labels)
                axes = [
                    Axis(axis_labels, name)
                    for axis_labels, name in zip(labels, names)
                ]
            else:
                axes = (row_labels, column_labels)
            # _converted_value is used implicitly via Range.__array__
            return Array(np.asarray(self[data]), axes)
Exemplo n.º 2
0
    def wrapper(*args, **kwargs):
        raw_bcast_args, raw_bcast_kwargs, res_axes = make_args_broadcastable(
            args, kwargs)

        # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those cases itself
        # via __array_wrap__

        # There is a problem with np.clip though (and possibly other ufuncs): np.clip is roughly equivalent to
        # np.maximum(np.minimum(np.asarray(la), high), low)
        # the np.asarray(la) is problematic because it lose original labels
        # and then tries to get them back from high, where they are possibly
        # incomplete if broadcasting happened

        # It fails on "np.minimum(ndarray, Array)" because it calls __array_wrap__(high, result) which cannot work if
        # there was broadcasting involved (high has potentially less labels than result).
        # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__
        res_data = func(*raw_bcast_args, **raw_bcast_kwargs)
        if res_axes:
            if isinstance(res_data, tuple):
                return tuple(Array(res_arr, res_axes) for res_arr in res_data)
            else:
                return Array(res_data, res_axes)
        else:
            return res_data
Exemplo n.º 3
0
        def load(self,
                 header=True,
                 convert_float=True,
                 nb_axes=None,
                 index_col=None,
                 fill_value=nan,
                 sort_rows=False,
                 sort_columns=False,
                 wide=True):
            if not self.ndim:
                return Array([])

            list_data = self._converted_value(convert_float=convert_float)

            if header:
                return from_lists(list_data,
                                  nb_axes=nb_axes,
                                  index_col=index_col,
                                  fill_value=fill_value,
                                  sort_rows=sort_rows,
                                  sort_columns=sort_columns,
                                  wide=wide)
            else:
                return Array(list_data)
Exemplo n.º 4
0
def permutation(x, axis=0):
    r"""
    Randomly permute a sequence along an axis, or return a permuted range.

    Parameters
    ----------
    x : int or array_like
        If `x` is an integer, randomly permute ``sequence(x)``.
        If `x` is an array, returns a randomly shuffled copy.
    axis : int, str or Axis, optional
        Axis along which to permute. Defaults to the first axis.

    Returns
    -------
    Array
        Permuted sequence or array range.

    Examples
    --------
    >>> la.random.permutation(10)                               # doctest: +SKIP
    {0}*  0  1  2  3  4  5  6  7  8  9
          6  8  0  9  4  7  1  5  3  2
    >>> la.random.permutation([1, 4, 9, 12, 15])                # doctest: +SKIP
    {0}*  0   1   2  3  4
          1  15  12  9  4
    >>> la.random.permutation(la.ndtest(5))                     # doctest: +SKIP
    a  a3  a1  a2  a4  a0
        3   1   2   4   0
    >>> arr = la.ndtest((3, 3))                                 # doctest: +SKIP
    >>> la.random.permutation(arr)                              # doctest: +SKIP
    a\b  b0  b1  b2
     a1   3   4   5
     a2   6   7   8
     a0   0   1   2
    >>> la.random.permutation(arr, axis='b')                    # doctest: +SKIP
    a\b  b1  b2  b0
     a0   1   2   0
     a1   4   5   3
     a2   7   8   6
    """
    if isinstance(x, (int, np.integer)):
        return Array(np.random.permutation(x))
    else:
        x = asarray(x)
        axis = x.axes[axis]
        g = axis.i[np.random.permutation(len(axis))]
        return x[g]
Exemplo n.º 5
0
def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs):
    r"""
    Converts Pandas Series into Array.

    Parameters
    ----------
    s : Pandas Series
        Input Pandas Series.
    sort_rows : bool, optional
        Whether or not to sort the rows alphabetically. Defaults to False.
    fill_value : scalar, optional
        Value used to fill cells corresponding to label combinations which are not present in the input Series.
        Defaults to NaN.
    meta : list of pairs or dict or OrderedDict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array

    See Also
    --------
    Array.to_series

    Examples
    --------
    >>> from larray import ndtest
    >>> s = ndtest((2, 2, 2), dtype=float).to_series()
    >>> s                                                                             # doctest: +NORMALIZE_WHITESPACE
    a   b   c
    a0  b0  c0    0.0
            c1    1.0
        b1  c0    2.0
            c1    3.0
    a1  b0  c0    4.0
            c1    5.0
        b1  c0    6.0
            c1    7.0
    dtype: float64
    >>> from_series(s)
     a  b\c   c0   c1
    a0   b0  0.0  1.0
    a0   b1  2.0  3.0
    a1   b0  4.0  5.0
    a1   b1  6.0  7.0
    """
    if isinstance(s.index, pd.MultiIndex):
        # TODO: use argument sort=False when it will be available
        # (see https://github.com/pandas-dev/pandas/issues/15105)
        df = s.unstack(level=-1, fill_value=fill_value)
        # pandas (un)stack and pivot(_table) methods return a Dataframe/Series with sorted index and columns
        if not sort_rows:
            labels = index_to_labels(s.index, sort=False)
            if isinstance(df.index, pd.MultiIndex):
                index = pd.MultiIndex.from_tuples(list(product(*labels[:-1])),
                                                  names=s.index.names[:-1])
            else:
                index = labels[0]
            columns = labels[-1]
            df = df.reindex(index=index,
                            columns=columns,
                            fill_value=fill_value)
        return from_frame(df,
                          sort_rows=sort_rows,
                          sort_columns=sort_rows,
                          fill_value=fill_value,
                          meta=meta,
                          **kwargs)
    else:
        name = decode(s.name, 'utf8') if s.name is not None else decode(
            s.index.name, 'utf8')
        if sort_rows:
            s = s.sort_index()
        return Array(s.values, Axis(s.index.values, name), meta=meta)
Exemplo n.º 6
0
def from_frame(df,
               sort_rows=False,
               sort_columns=False,
               parse_header=False,
               unfold_last_axis_name=False,
               fill_value=nan,
               meta=None,
               cartesian_prod=True,
               **kwargs):
    r"""
    Converts Pandas DataFrame into Array.

    Parameters
    ----------
    df : pandas.DataFrame
        Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the
        columns Index of the dataframe unless argument unfold_last_axis_name is set to True.
    sort_rows : bool, optional
        Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
        Must be False if `cartesian_prod` is set to True.
        Defaults to False.
    sort_columns : bool, optional
        Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
        Must be False if `cartesian_prod` is set to True.
        Defaults to False.
    parse_header : bool, optional
        Whether or not to parse columns labels. Pandas treats column labels as strings.
        If True, column labels are converted into int, float or boolean when possible. Defaults to False.
    unfold_last_axis_name : bool, optional
        Whether or not to extract the names of the last two axes by splitting the name of the last index column of the
        dataframe using ``\``. Defaults to False.
    fill_value : scalar, optional
        Value used to fill cells corresponding to label combinations which are not present in the input DataFrame.
        Defaults to NaN.
    meta : list of pairs or dict or OrderedDict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.
    cartesian_prod : bool, optional
        Whether or not to expand the dataframe to a cartesian product dataframe as needed by Array.
        This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already
        well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False.
        Defaults to True.

    Returns
    -------
    Array

    See Also
    --------
    Array.to_frame

    Examples
    --------
    >>> from larray import ndtest
    >>> df = ndtest((2, 2, 2)).to_frame()
    >>> df                                                                             # doctest: +NORMALIZE_WHITESPACE
    c      c0  c1
    a  b
    a0 b0   0   1
       b1   2   3
    a1 b0   4   5
       b1   6   7
    >>> from_frame(df)
     a  b\c  c0  c1
    a0   b0   0   1
    a0   b1   2   3
    a1   b0   4   5
    a1   b1   6   7

    Names of the last two axes written as ``before_last_axis_name\\last_axis_name``

    >>> df = ndtest((2, 2, 2)).to_frame(fold_last_axis_name=True)
    >>> df                                                                             # doctest: +NORMALIZE_WHITESPACE
            c0  c1
    a  b\c
    a0 b0    0   1
       b1    2   3
    a1 b0    4   5
       b1    6   7
    >>> from_frame(df, unfold_last_axis_name=True)
     a  b\c  c0  c1
    a0   b0   0   1
    a0   b1   2   3
    a1   b0   4   5
    a1   b1   6   7
    """
    axes_names = [
        decode(name, 'utf8') if isinstance(name, bytes) else name
        for name in df.index.names
    ]

    # handle 2 or more dimensions with the last axis name given using \
    if unfold_last_axis_name:
        if isinstance(axes_names[-1], str) and '\\' in axes_names[-1]:
            last_axes = [name.strip() for name in axes_names[-1].split('\\')]
            axes_names = axes_names[:-1] + last_axes
        else:
            axes_names += [None]
    else:
        axes_names += [df.columns.name]

    if cartesian_prod:
        df, axes_labels = cartesian_product_df(df,
                                               sort_rows=sort_rows,
                                               sort_columns=sort_columns,
                                               fill_value=fill_value,
                                               **kwargs)
    else:
        if sort_rows or sort_columns:
            raise ValueError(
                'sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. '
                'Please call the method sort_axes on the returned array to sort rows or columns'
            )
        axes_labels = index_to_labels(df.index, sort=False)

    # Pandas treats column labels as column names (strings) so we need to convert them to values
    last_axis_labels = [parse(cell) for cell in df.columns.values
                        ] if parse_header else list(df.columns.values)
    axes_labels.append(last_axis_labels)

    axes = AxisCollection(
        [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)])
    data = df.values.reshape(axes.shape)
    return Array(data, axes, meta=meta)
Exemplo n.º 7
0
 def __larray__(self):
     return Array(self._converted_value())
Exemplo n.º 8
0
def choice(choices=None, axes=None, replace=True, p=None, meta=None):
    r"""
    Generates a random sample from given choices

    Parameters
    ----------
    choices : 1-D array-like or int, optional
        Values to choose from.
        If an array, a random sample is generated from its elements.
        If an int n, the random sample is generated as if choices was la.sequence(n)
        If p is a 1-D Array, choices are taken from its axis.
    axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional
        Axes (or shape) of the resulting array. If ``axes`` is None (the default), a single value is returned.
        Otherwise, if the resulting axes have a shape of, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn.
    replace : boolean, optional
        Whether the sample is with or without replacement.
    p : array-like, optional
        The probabilities associated with each entry in choices.
        If p is a 1-D Array, choices are taken from its axis labels. If p is an N-D Array, each cell represents the
        probability that the combination of labels will occur.
        If not given the sample assumes a uniform distribution over all entries in choices.
    meta : list of pairs or dict or OrderedDict or Metadata, optional
        Metadata (title, description, author, creation_date, ...) associated with the array.
        Keys must be strings. Values must be of type string, int, float, date, time or datetime.

    Returns
    -------
    Array or scalar
        The generated random samples with given ``axes`` (or shape).

    Raises
    ------
    ValueError
        If choices is an int and less than zero, if choices or p are not 1-dimensional,
        if choices is an array-like of size 0, if p is not a vector of probabilities,
        if choices and p have different lengths, or
        if replace=False and the sample size is greater than the population size.

    See Also
    --------
    randint, permutation

    Examples
    --------
    Generate one random value out of given choices (each choice has the same probability of occurring):

    >>> la.random.choice(['hello', 'world', '!'])                                       # doctest: +SKIP
    hello

    With given probabilities:

    >>> la.random.choice(['hello', 'world', '!'], p=[0.1, 0.8, 0.1])                    # doctest: +SKIP
    world

    Generate a 2 x 3 array with given axes and values drawn from the given choices using given probabilities:

    >>> la.random.choice([5, 10, 15], p=[0.3, 0.5, 0.2], axes='a=a0,a1;b=b0..b2')       # doctest: +SKIP
    a\b  b0  b1  b2
     a0  15  10  10
     a1  10   5  10

    Same as above with labels and probabilities given as a one dimensional Array

    >>> proba = Array([0.3, 0.5, 0.2], Axis([5, 10, 15], 'outcome'))                   # doctest: +SKIP
    >>> proba                                                                           # doctest: +SKIP
    outcome    5   10   15
             0.3  0.5  0.2
    >>> choice(p=proba, axes='a=a0,a1;b=b0..b2')                                        # doctest: +SKIP
    a\b  b0  b1  b2
     a0  10  15   5
     a1  10   5  10

    Generate a uniform random sample of size 3 from la.sequence(5):

    >>> la.random.choice(5, 3)                                                          # doctest: +SKIP
    {0}*  0  1  2
          3  2  0
    >>> # This is equivalent to la.random.randint(0, 5, 3)

    Generate a non-uniform random sample of size 3 from the given choices without replacement:

    >>> la.random.choice(['hello', 'world', '!'], 3, replace=False, p=[0.1, 0.6, 0.3])  # doctest: +SKIP
    {0}*      0  1      2
          world  !  hello

    Using an N-dimensional array as probabilities:

    >>> proba = Array([[0.15, 0.25, 0.10],
    ...                 [0.20, 0.10, 0.20]], 'a=a0,a1;b=b0..b2')                        # doctest: +SKIP
    >>> proba                                                                           # doctest: +SKIP
    a\b    b0    b1   b2
     a0  0.15  0.25  0.1
     a1   0.2   0.1  0.2
    >>> choice(p=proba, axes='draw=d0..d5')                                             # doctest: +SKIP
    draw\axis   a   b
           d0  a1  b2
           d1  a1  b1
           d2  a0  b1
           d3  a0  b0
           d4  a1  b2
           d5  a0  b1
    """
    axes = AxisCollection(axes)
    if isinstance(p, Array):
        if choices is not None:
            raise ValueError("choices argument cannot be used when p argument is an Array")

        if p.ndim > 1:
            flat_p = p.data.reshape(-1)
            flat_indices = choice(p.size, axes=axes, replace=replace, p=flat_p)
            return p.axes._flat_lookup(flat_indices)
        else:
            choices = p.axes[0].labels
            p = p.data
    if choices is None:
        raise ValueError("choices argument must be provided unless p is an Array")
    return Array(np.random.choice(choices, axes.shape, replace, p), axes, meta=meta)
Exemplo n.º 9
0
def generic_random(np_func, args, min_axes, meta):
    args, res_axes = raw_broadcastable(args, min_axes=min_axes)
    res_data = np_func(*args, size=res_axes.shape)
    return Array(res_data, res_axes, meta=meta)