def broadcast_to(arg1, arg2, to_pd=None, index_from=None, columns_from=None, **kwargs): """Broadcast `arg1` to `arg2`. Keyword arguments `**kwargs` are passed to `broadcast`. ## Example ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast_to >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([4, 5, 6], index=pd.Index(['x', 'y', 'z']), name='a') >>> broadcast_to(a, sr) x 1 y 2 z 3 Name: a, dtype: int64 >>> broadcast_to(sr, a) array([4, 5, 6]) ``` """ if not checks.is_array(arg1): arg1 = np.asarray(arg1) if not checks.is_array(arg2): arg2 = np.asarray(arg2) if to_pd is None: to_pd = checks.is_pandas(arg2) if to_pd: # Take index and columns from arg2 if index_from is None: index_from = index_fns.get_index(arg2, 0) if columns_from is None: columns_from = index_fns.get_index(arg2, 1) return broadcast(arg1, to_shape=arg2.shape, to_pd=to_pd, index_from=index_from, columns_from=columns_from, **kwargs)
def tile(arg, n, axis=1): """Repeat the whole `arg` `n` times along the specified axis.""" if not checks.is_array(arg): arg = np.asarray(arg) if axis == 0: if arg.ndim == 2: if checks.is_pandas(arg): return array_wrapper.ArrayWrapper.from_obj(arg).wrap( np.tile(arg.values, (n, 1)), index=index_fns.tile_index(arg.index, n)) return np.tile(arg, (n, 1)) if checks.is_pandas(arg): return array_wrapper.ArrayWrapper.from_obj(arg).wrap( np.tile(arg.values, n), index=index_fns.tile_index(arg.index, n)) return np.tile(arg, n) elif axis == 1: arg = to_2d(arg) if checks.is_pandas(arg): return array_wrapper.ArrayWrapper.from_obj(arg).wrap( np.tile(arg.values, (1, n)), columns=index_fns.tile_index(arg.columns, n)) return np.tile(arg, (1, n)) else: raise ValueError("Only axis 0 and 1 are supported")
def update_data(self, data): """Update the data of the plot efficiently. Args: data (array_like): Data in any format that can be converted to NumPy. Must be of shape (`x_labels`, `trace_names`). Examples: ```py fig = pd.Series([1, 2], index=['x', 'y'], name='a').vbt.Bar() fig.update_data([2, 1]) fig.show() ``` ![](img/Bar_updated.png) """ if not checks.is_array(data): data = np.asarray(data) data = reshape_fns.to_2d(data) checks.assert_same_shape(data, self._x_labels, along_axis=(0, 0)) checks.assert_same_shape(data, self._trace_names, along_axis=(1, 0)) # Update traces with self.batch_update(): for i, bar in enumerate(self.data): bar.y = data[:, i] if bar.marker.colorscale is not None: bar.marker.color = data[:, i]
def wrap_array(arg, index=None, columns=None, dtype=None, default_index=None, default_columns=None, to_ndim=None): """Wrap array into a series/dataframe.""" if not checks.is_array(arg): arg = np.asarray(arg) if to_ndim is not None: arg = soft_broadcast_to_ndim(arg, to_ndim) if index is None: index = default_index if columns is None: columns = default_columns if columns is not None and len(columns) == 1: name = columns[0] else: name = None # Perform checks if index is not None: checks.assert_same_shape(arg, index, along_axis=(0, 0)) if arg.ndim == 2 and columns is not None: checks.assert_same_shape(arg, columns, along_axis=(1, 0)) if arg.ndim == 1: return pd.Series(arg, index=index, name=name, dtype=dtype) return pd.DataFrame(arg, index=index, columns=columns, dtype=dtype)
def wrap_metric(self, a): if checks.is_frame(self.ts): return pd.Series(a, index=self.ts.columns) # Single value if checks.is_array(a): return a[0] return a
def broadcast_to_axis_of(arg1, arg2, axis, writeable=False, copy_kwargs={}): """Broadcast `arg1` to an axis of `arg2`. If `arg2` has less dimensions than requested, will broadcast `arg1` to a single number. For other keyword arguments, see `broadcast`.""" if not checks.is_array(arg2): arg2 = np.asarray(arg2) if arg2.ndim < axis + 1: return np.broadcast_to(arg1, (1, ))[0] # to a single number arg1 = np.broadcast_to(arg1, (arg2.shape[axis], )) return np.array(arg1, copy=writeable, **copy_kwargs) # to shape of axis
def from_values(values, name=None, value_names=None): """Create index using array of values.""" if value_names is not None: checks.assert_same_shape(values, value_names, along_axis=0) return pd.Index(value_names, name=name) # just return the names value_names = [] for i, v in enumerate(values): if not checks.is_array(v): v = np.asarray(v) if np.all(v == v.item(0)): value_names.append(v.item(0)) else: value_names.append('mix_%d' % i) return pd.Index(value_names, name=name)
def broadcast_to_axis_of(arg1, arg2, axis, require_kwargs=None): """Broadcast `arg1` to an axis of `arg2`. If `arg2` has less dimensions than requested, will broadcast `arg1` to a single number. For other keyword arguments, see `broadcast`.""" if require_kwargs is None: require_kwargs = {} if not checks.is_array(arg2): arg2 = np.asarray(arg2) if arg2.ndim < axis + 1: return np.broadcast_to(arg1, (1, ))[0] # to a single number arg1 = np.broadcast_to(arg1, (arg2.shape[axis], )) arg1 = np.require(arg1, **require_kwargs) return arg1
def soft_to_ndim(arg, ndim): """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2).""" if not checks.is_array(arg): arg = np.asarray(arg) if ndim == 1: if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] # downgrade if ndim == 2: if arg.ndim == 1: if checks.is_series(arg): return arg.to_frame() return arg[:, None] # upgrade return arg # do nothing
def repeat(arg, n, axis=1): """Repeat each element in `arg` `n` times along the specified axis.""" if not checks.is_array(arg): arg = np.asarray(arg) if axis == 0: if checks.is_pandas(arg): return array_wrapper.ArrayWrapper.from_obj(arg).wrap( np.repeat(arg.values, n, axis=0), index=index_fns.repeat_index(arg.index, n)) return np.repeat(arg, n, axis=0) elif axis == 1: arg = to_2d(arg) if checks.is_pandas(arg): return array_wrapper.ArrayWrapper.from_obj(arg).wrap( np.repeat(arg.values, n, axis=1), columns=index_fns.repeat_index(arg.columns, n)) return np.repeat(arg, n, axis=1) else: raise ValueError("Only axis 0 and 1 are supported")
def to_1d(arg, raw=False): """Reshape argument to one dimension. If `raw` is True, returns NumPy array. If 2-dim, will collapse along axis 1 (i.e., DataFrame with one column to Series).""" if raw or not checks.is_array(arg): arg = np.asarray(arg) if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] if arg.ndim == 1: return arg elif arg.ndim == 0: return arg.reshape((1,)) raise ValueError(f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
def update_data(self, data): """Update the data of the plot efficiently. Args: data (array_like): Data in any format that can be converted to NumPy. Must be of shape (`x_labels`, `trace_names`). """ if not checks.is_array(data): data = np.asarray(data) data = reshape_fns.to_2d(data) checks.assert_same_shape(data, self._x_labels, along_axis=(0, 0)) checks.assert_same_shape(data, self._trace_names, along_axis=(1, 0)) # Update traces with self.batch_update(): for i, scatter in enumerate(self.data): scatter.y = data[:, i]
def to_2d(arg, raw=False, expand_axis=1): """Reshape argument to two dimensions. If `raw` is True, returns NumPy array. If 1-dim, will expand along axis 1 (i.e., Series to DataFrame with one column).""" if raw or not checks.is_array(arg): arg = np.asarray(arg) if arg.ndim == 2: return arg elif arg.ndim == 1: if checks.is_series(arg): if expand_axis == 0: return pd.DataFrame(arg.values[None, :], columns=arg.index) elif expand_axis == 1: return arg.to_frame() return np.expand_dims(arg, expand_axis) elif arg.ndim == 0: return arg.reshape((1, 1)) raise ValueError(f"Cannot reshape a {arg.ndim}-dimensional array to 2 dimensions")
def update_data(self, data): """Update the data of the plot efficiently. Args: data (array_like): Data in any format that can be converted to NumPy. Must be of shape (`y_labels`, `x_labels`). """ if not checks.is_array(data): data = np.asarray(data) data = reshape_fns.to_2d(data) checks.assert_same_shape(data, self._x_labels, along_axis=(1, 0)) checks.assert_same_shape(data, self._y_labels, along_axis=(0, 0)) # Update traces with self.batch_update(): heatmap = self.data[0] if self._horizontal: heatmap.z = data.transpose() else: heatmap.z = data
def update_data(self, data): """Update the data of the plot efficiently. Args: data (array_like): Data in any format that can be converted to NumPy. Must be of shape (any, `trace_names`). """ if not checks.is_array(data): data = np.asarray(data) data = reshape_fns.to_2d(data) checks.assert_same_shape(data, self._trace_names, along_axis=(1, 0)) # Update traces with self.batch_update(): for i, histogram in enumerate(self.data): if self._horizontal: histogram.x = None histogram.y = data[:, i] else: histogram.x = data[:, i] histogram.y = None
def broadcast(*args, to_shape=None, to_pd=None, to_frame=None, align_index=None, align_columns=None, index_from='default', columns_from='default', require_kwargs=None, keep_raw=False, return_meta=False, **kwargs): """Bring any array-like object in `args` to the same shape by using NumPy broadcasting. See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`. to_pd (bool, tuple or list): Whether to convert all output arrays to pandas, otherwise returns raw NumPy arrays. If None, converts only if there is at least one pandas object among them. to_frame (bool): Whether to convert all Series to DataFrames. align_index (bool): Whether to align index of pandas objects using multi-index. align_columns (bool): Whether to align columns of pandas objects using multi-index. index_from (any): Broadcasting rule for index. columns_from (any): Broadcasting rule for columns. require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`. keep_raw (bool, tuple or list): Whether to keep the unbroadcasted version of the array. Only makes sure that the array can be broadcast to the target shape. return_meta (bool): If True, will also return new shape, index and columns. **kwargs: Keyword arguments passed to `broadcast_index`. For defaults, see `vectorbt.settings.broadcasting`. ## Example Without broadcasting index and columns: ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast >>> v = 0 >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a') >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=pd.Index(['x2', 'y2', 'z2']), ... columns=pd.Index(['a2', 'b2', 'c2'])) >>> for i in broadcast( ... v, a, sr, df, ... index_from=None, ... columns_from=None, ... ): print(i) 0 1 2 0 0 0 0 1 0 0 0 2 0 0 0 0 1 2 0 1 2 3 1 1 2 3 2 1 2 3 a a a x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x2 1 2 3 y2 4 5 6 z2 7 8 9 ``` Taking new index and columns from position: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=2, ... columns_from=3 ... ): print(i) a2 b2 c2 x 0 0 0 y 0 0 0 z 0 0 0 a2 b2 c2 x 1 2 3 y 1 2 3 z 1 2 3 a2 b2 c2 x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x 1 2 3 y 4 5 6 z 7 8 9 ``` Broadcasting index and columns through stacking: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from='stack', ... columns_from='stack' ... ): print(i) a2 b2 c2 x x2 0 0 0 y y2 0 0 0 z z2 0 0 0 a2 b2 c2 x x2 1 2 3 y y2 1 2 3 z z2 1 2 3 a2 b2 c2 x x2 1 1 1 y y2 2 2 2 z z2 3 3 3 a2 b2 c2 x x2 1 2 3 y y2 4 5 6 z z2 7 8 9 ``` Setting index and columns manually: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=['a', 'b', 'c'], ... columns_from=['d', 'e', 'f'] ... ): print(i) d e f a 0 0 0 b 0 0 0 c 0 0 0 d e f a 1 2 3 b 1 2 3 c 1 2 3 d e f a 1 1 1 b 2 2 2 c 3 3 3 d e f a 1 2 3 b 4 5 6 c 7 8 9 ``` """ from vectorbt import settings is_pd = False is_2d = False args = list(args) if require_kwargs is None: require_kwargs = {} if align_index is None: align_index = settings.broadcasting['align_index'] if align_columns is None: align_columns = settings.broadcasting['align_columns'] if isinstance(index_from, str) and index_from == 'default': index_from = settings.broadcasting['index_from'] if isinstance(columns_from, str) and columns_from == 'default': columns_from = settings.broadcasting['columns_from'] # Convert to np.ndarray object if not numpy or pandas # Also check whether we broadcast to pandas and whether work on 2-dim data for i in range(len(args)): if not checks.is_array(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True # If target shape specified, check again if we work on 2-dim data if to_shape is not None: if isinstance(to_shape, int): to_shape = (to_shape, ) checks.assert_type(to_shape, tuple) if len(to_shape) > 1: is_2d = True if to_frame is not None: # force either keeping Series or converting them to DataFrames is_2d = to_frame if to_pd is not None: # force either raw or pandas if isinstance(to_pd, (tuple, list)): is_pd = any(to_pd) else: is_pd = to_pd # Align pandas objects if align_index: index_to_align = [] for i in range(len(args)): if checks.is_pandas(args[i]) and len(args[i].index) > 1: index_to_align.append(i) if len(index_to_align) > 1: indexes = [args[i].index for i in index_to_align] if len(set(map(len, indexes))) > 1: index_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in index_to_align: args[i] = args[i].iloc[index_indices[ index_to_align.index(i)]] if align_columns: cols_to_align = [] for i in range(len(args)): if checks.is_frame(args[i]) and len(args[i].columns) > 1: cols_to_align.append(i) if len(cols_to_align) > 1: indexes = [args[i].columns for i in cols_to_align] if len(set(map(len, indexes))) > 1: col_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in cols_to_align: args[i] = args[i].iloc[:, col_indices[cols_to_align. index(i)]] # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data args_2d = [ arg.to_frame() if is_2d and checks.is_series(arg) else arg for arg in args ] # Get final shape if to_shape is None: to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d) # Perform broadcasting new_args = [] for i, arg in enumerate(args_2d): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw bc_arg = np.broadcast_to(arg, to_shape) if _keep_raw: new_args.append(arg) continue new_args.append(bc_arg) # Force to match requirements for i in range(len(new_args)): if isinstance(require_kwargs, (tuple, list)): _require_kwargs = require_kwargs[i] else: _require_kwargs = require_kwargs new_args[i] = np.require(new_args[i], **_require_kwargs) if is_pd: # Decide on index and columns # NOTE: Important to pass args, not args_2d, to preserve original shape info new_index = broadcast_index(args, to_shape, index_from=index_from, axis=0, **kwargs) new_columns = broadcast_index(args, to_shape, index_from=columns_from, axis=1, **kwargs) else: new_index, new_columns = None, None # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: continue if isinstance(to_pd, (tuple, list)): _is_pd = to_pd[i] else: _is_pd = is_pd new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=_is_pd, new_index=new_index, new_columns=new_columns) if len(new_args) > 1: if return_meta: return tuple(new_args), to_shape, new_index, new_columns return tuple(new_args) if return_meta: return new_args[0], to_shape, new_index, new_columns return new_args[0]
def broadcast(*args, to_shape=None, to_pd=None, to_2d=None, index_from='default', columns_from='default', writeable=False, copy_kwargs={}, keep_raw=False, **kwargs): """Bring any array-like object in `args` to the same shape by using NumPy broadcasting. See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`. to_pd (bool, tuple or list): If `True`, converts all output arrays to pandas, otherwise returns raw NumPy arrays. If `None`, converts only if there is at least one pandas object among them. to_2d (bool): If `True`, converts all Series to DataFrames. index_from (None, int, str or array_like): Broadcasting rule for index. columns_from (None, int, str or array_like): Broadcasting rule for columns. writeable (bool): If `True`, makes broadcasted arrays writable, otherwise readonly. !!! note Has effect only if broadcasting was needed for that particular array. Making arrays writable is possible only through copying them, which is pretty expensive. Numba requires arrays to be writable. copy_kwargs (dict): Keyword arguments passed to `np.array`. For example, to specify `order`. !!! note Has effect on every array, independent from whether broadcasting was needed or not. keep_raw (bool, tuple or list): If `True`, will keep the unbroadcasted version of the array. **kwargs: Keyword arguments passed to `broadcast_index`. For defaults, see `vectorbt.defaults.broadcasting`. Example: Without broadcasting index and columns: ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast >>> v = 0 >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a') >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=pd.Index(['x2', 'y2', 'z2']), ... columns=pd.Index(['a2', 'b2', 'c2'])) >>> for i in broadcast( ... v, a, sr, df, ... index_from=None, ... columns_from=None, ... ): print(i) 0 1 2 0 0 0 0 1 0 0 0 2 0 0 0 0 1 2 0 1 2 3 1 1 2 3 2 1 2 3 a a a x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x2 1 2 3 y2 4 5 6 z2 7 8 9 ``` Taking new index and columns from position: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=2, ... columns_from=3 ... ): print(i) a2 b2 c2 x 0 0 0 y 0 0 0 z 0 0 0 a2 b2 c2 x 1 2 3 y 1 2 3 z 1 2 3 a2 b2 c2 x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x 1 2 3 y 4 5 6 z 7 8 9 ``` Broadcasting index and columns through stacking: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from='stack', ... columns_from='stack' ... ): print(i) a2 b2 c2 x x2 0 0 0 y y2 0 0 0 z z2 0 0 0 a2 b2 c2 x x2 1 2 3 y y2 1 2 3 z z2 1 2 3 a2 b2 c2 x x2 1 1 1 y y2 2 2 2 z z2 3 3 3 a2 b2 c2 x x2 1 2 3 y y2 4 5 6 z z2 7 8 9 ``` Setting index and columns manually: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=['a', 'b', 'c'], ... columns_from=['d', 'e', 'f'] ... ): print(i) d e f a 0 0 0 b 0 0 0 c 0 0 0 d e f a 1 2 3 b 1 2 3 c 1 2 3 d e f a 1 1 1 b 2 2 2 c 3 3 3 d e f a 1 2 3 b 4 5 6 c 7 8 9 ``` """ is_pd = False is_2d = False args = list(args) if isinstance(index_from, str) and index_from == 'default': index_from = defaults.broadcasting['index_from'] if isinstance(columns_from, str) and columns_from == 'default': columns_from = defaults.broadcasting['columns_from'] # Convert to np.ndarray object if not numpy or pandas # Also check whether we broadcast to pandas and whether work on 2-dim data for i in range(len(args)): if not checks.is_array(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True # If target shape specified, check again if we work on 2-dim data if to_shape is not None: checks.assert_type(to_shape, tuple) if len(to_shape) > 1: is_2d = True if to_2d is not None: # force either keeping Series or converting them to DataFrames is_2d = to_2d if to_pd is not None: # force either raw or pandas if isinstance(to_pd, (tuple, list)): is_pd = np.array(to_pd).any() else: is_pd = to_pd # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data args_2d = [ arg.to_frame() if is_2d and checks.is_series(arg) else arg for arg in args ] # Get final shape if to_shape is None: to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d) # Perform broadcasting new_args = [] for i, arg in enumerate(args_2d): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: new_args.append(arg) continue new_args.append(np.broadcast_to(arg, to_shape, subok=True)) # The problem is that broadcasting creates readonly objects and Numba requires writable ones. # To make them writable we must copy, which is ok for small-sized arrays and not ok for large ones. # Thus check if broadcasting was needed in the first place, and if so, copy for i in range(len(new_args)): if new_args[i].shape == args_2d[i].shape: # Broadcasting was not needed, take old array new_args[i] = np.array(args_2d[i], copy=False, **copy_kwargs) else: # Broadcasting was needed, take new array new_args[i] = np.array(new_args[i], copy=writeable, **copy_kwargs) if is_pd: # Decide on index and columns # NOTE: Important to pass args, not args_2d, to preserve original shape info new_index = broadcast_index(args, to_shape, index_from=index_from, axis=0, **kwargs) new_columns = broadcast_index(args, to_shape, index_from=columns_from, axis=1, **kwargs) else: new_index, new_columns = None, None # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: continue if isinstance(to_pd, (tuple, list)): _is_pd = to_pd[i] else: _is_pd = is_pd new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=_is_pd, new_index=new_index, new_columns=new_columns) if len(new_args) > 1: return tuple(new_args) return new_args[0]
def test_is_array(self): assert not checks.is_array(0) assert checks.is_array(np.array([0])) assert checks.is_array(pd.Series([1, 2, 3])) assert checks.is_array(pd.DataFrame([1, 2, 3]))
def to_time_units(obj, freq): """Multiply each element with `freq_delta` to get result in time units.""" if not checks.is_array(obj): obj = np.asarray(obj) return obj * freq_delta(freq)