def _make_panel_X( n_instances=20, n_columns=1, n_timepoints=20, y=None, return_numpy=False, random_state=None, ): # If target variable y is given, we ignore n_instances and instead generate as # many instances as in the target variable if y is not None: y = np.asarray(y) n_instances = len(y) rng = check_random_state(random_state) # Generate data as 3d numpy array X = rng.normal(scale=0.5, size=(n_instances, n_columns, n_timepoints)) # Generate association between data and target variable if y is not None: X = X + (y * 100).reshape(-1, 1, 1) if return_numpy: return X else: return from_3d_numpy_to_nested(X)
def test_from_3d_numpy_to_nested(n_instances, n_columns, n_timepoints): """Test from_3d_numpy_to_nested for correctness.""" array = np.random.normal(size=(n_instances, n_columns, n_timepoints)) nested = from_3d_numpy_to_nested(array) # check types and shapes assert is_nested_dataframe(nested) assert nested.shape == (n_instances, n_columns) assert nested.iloc[0, 0].shape[0] == n_timepoints # check values of random series np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
def measure_inference_time(model, shape=(160, 6), repetitions=300): mock_x = np.zeros(shape=[1, shape[0], shape[1]]) mock_x = from_3d_numpy_to_nested(mock_x.transpose((0, 2, 1))) performance = list() for x in range(repetitions): tic = time.time() model.predict(mock_x) toc = time.time() performance.append(toc - tic) return np.mean(performance), np.std(performance)
def _extract(_deployid: str, _idx: pd.DatetimeIndex): _sensors = sensors[_deployid] idx = _sensors.index.get_indexer(_idx) data_3d = np.empty([len(idx), len(_sensors.columns), win_size], float) data_arr = _sensors.to_numpy().transpose() start_idx = idx - win_size_2 for i, start in enumerate(start_idx): data_3d[i] = data_arr[:, start:(start + win_size)] nested = convert.from_3d_numpy_to_nested(data_3d) nested.columns = _sensors.columns nested.index = _sensors.index[idx] return nested
def check_X( X, enforce_univariate=False, enforce_min_instances=1, enforce_min_columns=1, coerce_to_numpy=False, coerce_to_pandas=False, ): """Validate input data. Parameters ---------- X : pd.DataFrame or np.array Input data enforce_univariate : bool, optional (default=False) Enforce that X is univariate. enforce_min_instances : int, optional (default=1) Enforce minimum number of instances. enforce_min_columns : int, optional (default=1) Enforce minimum number of columns (or time-series variables). coerce_to_numpy : bool, optional (default=False) If True, X will be coerced to a 3-dimensional numpy array. coerce_to_pandas : bool, optional (default=False) If True, X will be coerced to a nested pandas DataFrame. Returns ------- X : pd.DataFrame or np.array Checked and possibly converted input data Raises ------ ValueError If X is invalid input data """ # check input type if coerce_to_pandas and coerce_to_numpy: raise ValueError( "`coerce_to_pandas` and `coerce_to_numpy` cannot both be set to True" ) if not isinstance(X, VALID_X_TYPES): raise ValueError(f"X must be a pd.DataFrame or a np.array, " f"but found: {type(X)}") # check np.array # check first if we have the right number of dimensions, otherwise we # may not be able to get the shape of the second dimension below if isinstance(X, np.ndarray): if not X.ndim == 3: raise ValueError( f"If passed as a np.array, X must be a 3-dimensional " f"array, but found shape: {X.shape}") if coerce_to_pandas: X = from_3d_numpy_to_nested(X) # enforce minimum number of columns n_columns = X.shape[1] if n_columns < enforce_min_columns: raise ValueError( f"X must contain at least: {enforce_min_columns} columns, " f"but found only: {n_columns}.") # enforce univariate data if enforce_univariate and n_columns > 1: raise ValueError( f"X must be univariate with X.shape[1] == 1, but found: " f"X.shape[1] == {n_columns}.") # enforce minimum number of instances if enforce_min_instances > 0: _enforce_min_instances(X, min_instances=enforce_min_instances) # check pd.DataFrame if isinstance(X, pd.DataFrame): if not is_nested_dataframe(X): raise ValueError( "If passed as a pd.DataFrame, X must be a nested " "pd.DataFrame, with pd.Series or np.arrays inside cells.") # convert pd.DataFrame if coerce_to_numpy: X = from_nested_to_3d_numpy(X) return X
def _put_to_nested(ds): x_train = np.asarray([s['signal'] for s in ds.signals]) return from_3d_numpy_to_nested(x_train.transpose((0, 2, 1)))
def _qcat_to_nested(ds): return from_3d_numpy_to_nested(ds.signals['ft'].transpose((0, 2, 1)))