def _pre_fit(self, X, y): X, event, time = check_arrays_survival(X, y, copy=self.copy_X) # center feature matrix X_offset = numpy.average(X, axis=0) X -= X_offset if self.normalize: X = f_normalize(X, copy=False, axis=0) # sort descending o = numpy.argsort(-time, kind="mergesort") X = numpy.asfortranarray(X[o, :]) event_num = event[o].astype(numpy.uint8) time = time[o].astype(numpy.float64) return X, event_num, time
def _pre_fit(self, X, y): X, event, time = check_arrays_survival(X, y, copy=self.copy_X) X = self._validate_data(X) # center feature matrix X_offset = numpy.average(X, axis=0) X -= X_offset if self.normalize: X, X_scale = f_normalize(X, copy=False, axis=0, return_norm=True) else: X_scale = numpy.ones(X.shape[1], dtype=X.dtype) # sort descending o = numpy.argsort(-time, kind="mergesort") X = numpy.asfortranarray(X[o, :]) event_num = event[o].astype(numpy.uint8) time = time[o].astype(numpy.float64) return X, event_num, time, X_offset, X_scale
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, sample_weight=None, return_mean=False, check_input=True): """Center and scale data. Centers data to have mean zero along axis 0. If fit_intercept=False or if the X is a sparse matrix, no centering is done, but normalization can still be applied. The function returns the statistics necessary to reconstruct the input data, which are X_offset, y_offset, X_scale, such that the output X = (X - X_offset) / X_scale X_scale is the L2 norm of X - X_offset. If sample_weight is not None, then the weighted mean of X and y is zero, and not the mean itself. If return_mean=True, the mean, eventually weighted, is returned, independently of whether X was centered (option used for optimization with sparse data in coordinate_descend). This is here because nearly all linear models will want their data to be centered. This function also systematically makes y consistent with X.dtype """ if isinstance(sample_weight, numbers.Number): sample_weight = None if sample_weight is not None: sample_weight = np.asarray(sample_weight) if check_input: X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'], dtype=FLOAT_DTYPES) elif copy: if issparse(X): X = X.copy() else: X = X.copy(order='K') y = np.asarray(y, dtype=X.dtype) if fit_intercept: if issparse(X): X_offset, X_var = mean_variance_axis(X, axis=0) if not return_mean: X_offset[:] = X.dtype.type(0) if normalize: # TODO: f_normalize could be used here as well but the function # inplace_csr_row_normalize_l2 must be changed such that it # can return also the norms computed internally # transform variance to norm in-place X_var *= X.shape[0] X_scale = np.sqrt(X_var, X_var) del X_var X_scale[X_scale == 0] = 1 inplace_column_scale(X, 1. / X_scale) else: X_scale = np.ones(X.shape[1], dtype=X.dtype) else: X_offset = np.average(X, axis=0, weights=sample_weight) X -= X_offset if normalize: X, X_scale = f_normalize(X, axis=0, copy=False, return_norm=True) else: X_scale = np.ones(X.shape[1], dtype=X.dtype) y_offset = np.average(y, axis=0, weights=sample_weight) y = y - y_offset else: if normalize: if issparse(X): _, X_var = mean_variance_axis(X, axis=0) # transform variance to norm in-place X_var *= X.shape[0] X_scale = np.sqrt(X_var, X_var) del X_var X_scale[X_scale == 0] = 1 inplace_column_scale(X, 1. / X_scale) else: X, X_scale = f_normalize(X, axis=0, copy=False, return_norm=True) else: X_scale = np.ones(X.shape[1], dtype=X.dtype) X_offset = np.zeros(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) return X, y, X_offset, y_offset, X_scale