def array(self, data, row_labels=None, column_labels=None, names=None): r""" Parameters ---------- data : str range for data row_labels : str, optional range for row labels column_labels : str, optional range for column labels names : list of str, optional Returns ------- Array """ if row_labels is not None: row_labels = np.asarray(self[row_labels]) if column_labels is not None: column_labels = np.asarray(self[column_labels]) if names is not None: labels = (row_labels, column_labels) axes = [ Axis(axis_labels, name) for axis_labels, name in zip(labels, names) ] else: axes = (row_labels, column_labels) # _converted_value is used implicitly via Range.__array__ return Array(np.asarray(self[data]), axes)
def wrapper(*args, **kwargs): raw_bcast_args, raw_bcast_kwargs, res_axes = make_args_broadcastable( args, kwargs) # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those cases itself # via __array_wrap__ # There is a problem with np.clip though (and possibly other ufuncs): np.clip is roughly equivalent to # np.maximum(np.minimum(np.asarray(la), high), low) # the np.asarray(la) is problematic because it lose original labels # and then tries to get them back from high, where they are possibly # incomplete if broadcasting happened # It fails on "np.minimum(ndarray, Array)" because it calls __array_wrap__(high, result) which cannot work if # there was broadcasting involved (high has potentially less labels than result). # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__ res_data = func(*raw_bcast_args, **raw_bcast_kwargs) if res_axes: if isinstance(res_data, tuple): return tuple(Array(res_arr, res_axes) for res_arr in res_data) else: return Array(res_data, res_axes) else: return res_data
def load(self, header=True, convert_float=True, nb_axes=None, index_col=None, fill_value=nan, sort_rows=False, sort_columns=False, wide=True): if not self.ndim: return Array([]) list_data = self._converted_value(convert_float=convert_float) if header: return from_lists(list_data, nb_axes=nb_axes, index_col=index_col, fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, wide=wide) else: return Array(list_data)
def permutation(x, axis=0): r""" Randomly permute a sequence along an axis, or return a permuted range. Parameters ---------- x : int or array_like If `x` is an integer, randomly permute ``sequence(x)``. If `x` is an array, returns a randomly shuffled copy. axis : int, str or Axis, optional Axis along which to permute. Defaults to the first axis. Returns ------- Array Permuted sequence or array range. Examples -------- >>> la.random.permutation(10) # doctest: +SKIP {0}* 0 1 2 3 4 5 6 7 8 9 6 8 0 9 4 7 1 5 3 2 >>> la.random.permutation([1, 4, 9, 12, 15]) # doctest: +SKIP {0}* 0 1 2 3 4 1 15 12 9 4 >>> la.random.permutation(la.ndtest(5)) # doctest: +SKIP a a3 a1 a2 a4 a0 3 1 2 4 0 >>> arr = la.ndtest((3, 3)) # doctest: +SKIP >>> la.random.permutation(arr) # doctest: +SKIP a\b b0 b1 b2 a1 3 4 5 a2 6 7 8 a0 0 1 2 >>> la.random.permutation(arr, axis='b') # doctest: +SKIP a\b b1 b2 b0 a0 1 2 0 a1 4 5 3 a2 7 8 6 """ if isinstance(x, (int, np.integer)): return Array(np.random.permutation(x)) else: x = asarray(x) axis = x.axes[axis] g = axis.i[np.random.permutation(len(axis))] return x[g]
def from_series(s, sort_rows=False, fill_value=nan, meta=None, **kwargs): r""" Converts Pandas Series into Array. Parameters ---------- s : Pandas Series Input Pandas Series. sort_rows : bool, optional Whether or not to sort the rows alphabetically. Defaults to False. fill_value : scalar, optional Value used to fill cells corresponding to label combinations which are not present in the input Series. Defaults to NaN. meta : list of pairs or dict or OrderedDict or Metadata, optional Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. Returns ------- Array See Also -------- Array.to_series Examples -------- >>> from larray import ndtest >>> s = ndtest((2, 2, 2), dtype=float).to_series() >>> s # doctest: +NORMALIZE_WHITESPACE a b c a0 b0 c0 0.0 c1 1.0 b1 c0 2.0 c1 3.0 a1 b0 c0 4.0 c1 5.0 b1 c0 6.0 c1 7.0 dtype: float64 >>> from_series(s) a b\c c0 c1 a0 b0 0.0 1.0 a0 b1 2.0 3.0 a1 b0 4.0 5.0 a1 b1 6.0 7.0 """ if isinstance(s.index, pd.MultiIndex): # TODO: use argument sort=False when it will be available # (see https://github.com/pandas-dev/pandas/issues/15105) df = s.unstack(level=-1, fill_value=fill_value) # pandas (un)stack and pivot(_table) methods return a Dataframe/Series with sorted index and columns if not sort_rows: labels = index_to_labels(s.index, sort=False) if isinstance(df.index, pd.MultiIndex): index = pd.MultiIndex.from_tuples(list(product(*labels[:-1])), names=s.index.names[:-1]) else: index = labels[0] columns = labels[-1] df = df.reindex(index=index, columns=columns, fill_value=fill_value) return from_frame(df, sort_rows=sort_rows, sort_columns=sort_rows, fill_value=fill_value, meta=meta, **kwargs) else: name = decode(s.name, 'utf8') if s.name is not None else decode( s.index.name, 'utf8') if sort_rows: s = s.sort_index() return Array(s.values, Axis(s.index.values, name), meta=meta)
def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False, fill_value=nan, meta=None, cartesian_prod=True, **kwargs): r""" Converts Pandas DataFrame into Array. Parameters ---------- df : pandas.DataFrame Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the columns Index of the dataframe unless argument unfold_last_axis_name is set to True. sort_rows : bool, optional Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Must be False if `cartesian_prod` is set to True. Defaults to False. sort_columns : bool, optional Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting). Must be False if `cartesian_prod` is set to True. Defaults to False. parse_header : bool, optional Whether or not to parse columns labels. Pandas treats column labels as strings. If True, column labels are converted into int, float or boolean when possible. Defaults to False. unfold_last_axis_name : bool, optional Whether or not to extract the names of the last two axes by splitting the name of the last index column of the dataframe using ``\``. Defaults to False. fill_value : scalar, optional Value used to fill cells corresponding to label combinations which are not present in the input DataFrame. Defaults to NaN. meta : list of pairs or dict or OrderedDict or Metadata, optional Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. cartesian_prod : bool, optional Whether or not to expand the dataframe to a cartesian product dataframe as needed by Array. This is an expensive operation but is absolutely required if you cannot guarantee your dataframe is already well formed. If True, arguments `sort_rows` and `sort_columns` must be set to False. Defaults to True. Returns ------- Array See Also -------- Array.to_frame Examples -------- >>> from larray import ndtest >>> df = ndtest((2, 2, 2)).to_frame() >>> df # doctest: +NORMALIZE_WHITESPACE c c0 c1 a b a0 b0 0 1 b1 2 3 a1 b0 4 5 b1 6 7 >>> from_frame(df) a b\c c0 c1 a0 b0 0 1 a0 b1 2 3 a1 b0 4 5 a1 b1 6 7 Names of the last two axes written as ``before_last_axis_name\\last_axis_name`` >>> df = ndtest((2, 2, 2)).to_frame(fold_last_axis_name=True) >>> df # doctest: +NORMALIZE_WHITESPACE c0 c1 a b\c a0 b0 0 1 b1 2 3 a1 b0 4 5 b1 6 7 >>> from_frame(df, unfold_last_axis_name=True) a b\c c0 c1 a0 b0 0 1 a0 b1 2 3 a1 b0 4 5 a1 b1 6 7 """ axes_names = [ decode(name, 'utf8') if isinstance(name, bytes) else name for name in df.index.names ] # handle 2 or more dimensions with the last axis name given using \ if unfold_last_axis_name: if isinstance(axes_names[-1], str) and '\\' in axes_names[-1]: last_axes = [name.strip() for name in axes_names[-1].split('\\')] axes_names = axes_names[:-1] + last_axes else: axes_names += [None] else: axes_names += [df.columns.name] if cartesian_prod: df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value, **kwargs) else: if sort_rows or sort_columns: raise ValueError( 'sort_rows and sort_columns cannot not be used when cartesian_prod is set to False. ' 'Please call the method sort_axes on the returned array to sort rows or columns' ) axes_labels = index_to_labels(df.index, sort=False) # Pandas treats column labels as column names (strings) so we need to convert them to values last_axis_labels = [parse(cell) for cell in df.columns.values ] if parse_header else list(df.columns.values) axes_labels.append(last_axis_labels) axes = AxisCollection( [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]) data = df.values.reshape(axes.shape) return Array(data, axes, meta=meta)
def __larray__(self): return Array(self._converted_value())
def choice(choices=None, axes=None, replace=True, p=None, meta=None): r""" Generates a random sample from given choices Parameters ---------- choices : 1-D array-like or int, optional Values to choose from. If an array, a random sample is generated from its elements. If an int n, the random sample is generated as if choices was la.sequence(n) If p is a 1-D Array, choices are taken from its axis. axes : int, tuple of int, str, Axis or tuple/list/AxisCollection of Axis, optional Axes (or shape) of the resulting array. If ``axes`` is None (the default), a single value is returned. Otherwise, if the resulting axes have a shape of, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. replace : boolean, optional Whether the sample is with or without replacement. p : array-like, optional The probabilities associated with each entry in choices. If p is a 1-D Array, choices are taken from its axis labels. If p is an N-D Array, each cell represents the probability that the combination of labels will occur. If not given the sample assumes a uniform distribution over all entries in choices. meta : list of pairs or dict or OrderedDict or Metadata, optional Metadata (title, description, author, creation_date, ...) associated with the array. Keys must be strings. Values must be of type string, int, float, date, time or datetime. Returns ------- Array or scalar The generated random samples with given ``axes`` (or shape). Raises ------ ValueError If choices is an int and less than zero, if choices or p are not 1-dimensional, if choices is an array-like of size 0, if p is not a vector of probabilities, if choices and p have different lengths, or if replace=False and the sample size is greater than the population size. See Also -------- randint, permutation Examples -------- Generate one random value out of given choices (each choice has the same probability of occurring): >>> la.random.choice(['hello', 'world', '!']) # doctest: +SKIP hello With given probabilities: >>> la.random.choice(['hello', 'world', '!'], p=[0.1, 0.8, 0.1]) # doctest: +SKIP world Generate a 2 x 3 array with given axes and values drawn from the given choices using given probabilities: >>> la.random.choice([5, 10, 15], p=[0.3, 0.5, 0.2], axes='a=a0,a1;b=b0..b2') # doctest: +SKIP a\b b0 b1 b2 a0 15 10 10 a1 10 5 10 Same as above with labels and probabilities given as a one dimensional Array >>> proba = Array([0.3, 0.5, 0.2], Axis([5, 10, 15], 'outcome')) # doctest: +SKIP >>> proba # doctest: +SKIP outcome 5 10 15 0.3 0.5 0.2 >>> choice(p=proba, axes='a=a0,a1;b=b0..b2') # doctest: +SKIP a\b b0 b1 b2 a0 10 15 5 a1 10 5 10 Generate a uniform random sample of size 3 from la.sequence(5): >>> la.random.choice(5, 3) # doctest: +SKIP {0}* 0 1 2 3 2 0 >>> # This is equivalent to la.random.randint(0, 5, 3) Generate a non-uniform random sample of size 3 from the given choices without replacement: >>> la.random.choice(['hello', 'world', '!'], 3, replace=False, p=[0.1, 0.6, 0.3]) # doctest: +SKIP {0}* 0 1 2 world ! hello Using an N-dimensional array as probabilities: >>> proba = Array([[0.15, 0.25, 0.10], ... [0.20, 0.10, 0.20]], 'a=a0,a1;b=b0..b2') # doctest: +SKIP >>> proba # doctest: +SKIP a\b b0 b1 b2 a0 0.15 0.25 0.1 a1 0.2 0.1 0.2 >>> choice(p=proba, axes='draw=d0..d5') # doctest: +SKIP draw\axis a b d0 a1 b2 d1 a1 b1 d2 a0 b1 d3 a0 b0 d4 a1 b2 d5 a0 b1 """ axes = AxisCollection(axes) if isinstance(p, Array): if choices is not None: raise ValueError("choices argument cannot be used when p argument is an Array") if p.ndim > 1: flat_p = p.data.reshape(-1) flat_indices = choice(p.size, axes=axes, replace=replace, p=flat_p) return p.axes._flat_lookup(flat_indices) else: choices = p.axes[0].labels p = p.data if choices is None: raise ValueError("choices argument must be provided unless p is an Array") return Array(np.random.choice(choices, axes.shape, replace, p), axes, meta=meta)
def generic_random(np_func, args, min_axes, meta): args, res_axes = raw_broadcastable(args, min_axes=min_axes) res_data = np_func(*args, size=res_axes.shape) return Array(res_data, res_axes, meta=meta)