def transform(self, X: np.ndarray) -> np.ndarray: """ Parameters ---------- X : np.ndarray, shape (n_samples, height, width) Returns ------- : np.ndarray, shape (n_samples, height, n_components) """ if self.components_ is None: raise NotFittedError( "This PCA instance is not fitted yet. " "Call 'fit' with appropriate arguments before using this estimator." ) if X.ndim != 3: raise ValueError(f"Expected 3D array, got {X.ndim}D array instead") return np.array([x @ self.components_ for x in X])
def transform(self, X): """Select the n_selected_features best features to create a new dataset. Parameters ---------- X : pandas dataframe or array-like of shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. Returns ------- n_selected_features array of shape (n_samples, n_selected_features) containing the selected features """ X, _ = self._check_X_Y(X, None) if self.is_fitted: self.selected_features = super(FeatureSelection, self).transform(X) return self.selected_features else: raise NotFittedError( 'Fit method must be used before calling transform')
def transform(self, df: pd.DataFrame) -> pd.DataFrame: """Label Encoded the categories in ``columns_to_encode``""" try: self.encoding_dict except AttributeError: raise NotFittedError( "This LabelEncoder instance is not fitted yet. " "Call 'fit' with appropriate arguments before using this LabelEncoder." ) df_inp = df.copy() # sanity check to make sure all categorical columns are in an adequate # format for col in self.columns_to_encode: # type: ignore df_inp[col] = df_inp[col].astype("O") for k, v in self.encoding_dict.items(): df_inp[k] = df_inp[k].apply(lambda x: v[x] if x in v.keys() else 0) return df_inp
def predict_proba(self, X): """ Get class(= author) probabilities for code sample Paramters: X: code sample """ if self.user_to_id is None: raise NotFittedError('Model not fitted. Fit with CodoxerModel.fit(X, y).') X = self.tokenizer.transform(X) X = self.tfidf.transform(X) X = self.selector.transform(X) return self.estimator.predict(X)
def const_marginal_effect(self, X): """Calculate the constant marginal CATE θ(·) conditional on a vector of features X. Parameters ---------- X : array-like, shape (n, d_x) Feature vector that captures heterogeneity. Returns ------- Theta : matrix , shape (n, d_t) Constant marginal CATE of each treatment for each sample. """ if not self.model_is_fitted: raise NotFittedError('This {0} instance is not fitted yet.'.format(self.__class__.__name__)) X = check_array(X) results = Parallel(n_jobs=self.n_jobs, verbose=3, backend='threading')( delayed(self._pointwise_effect)(X_single) for X_single in X) # TODO: Check performance return np.asarray(results)
def predict_probabilites(self, X): if not self._session: raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__) h_np, b_np, h_sizes, b_sizes, h_sent_sizes, b_sent_sizes = X[ 'h_np'], X['b_np'], X['h_sizes'], X['b_sizes'], X[ 'h_sent_sizes'], X['b_sent_sizes'] with self._session.as_default() as sess: return self._probabilites.eval( feed_dict={ self._X_head: h_np, self._X_body: b_np, self._X_h_sizes: h_sizes, self._X_b_sizes: b_sizes, self._X_h_sent_sizes: h_sent_sizes, self._X_b_sent_sizes: b_sent_sizes })
def inverse_transform(self, X): """Transform X back to original space. Parameters ---------- X : array-like, shape (n_samples, n_components) Returns ------- X_new : array-like, shape (n_samples, n_features) References ---------- "Learning to Find Pre-Images", G BakIr et al, 2004. """ if not self.fit_inverse_transform: raise NotFittedError("The fit_inverse_transform parameter was not" " set to True when instantiating and hence " "the inverse transform is not available.") K = self._get_kernel(X, self.X_transformed_fit_) return np.dot(K, self.dual_coef_)
def transform(self, X): if not self.is_fit: raise NotFittedError("This LoadEpimlTransformer is not fitted yet") X = X.copy() # add in self._unused_cols to the headers so that the error checks don't look for those columns X_cols = set(X.columns.values).union(self._unused_cols) data_cols = set(self._orig_col_headers).union(self._unused_cols) if X_cols != data_cols: missing_cols = data_cols - X_cols extra_cols = X_cols - data_cols raise ValueError( "X missing {} cols [{}], and has {} extra cols [{}]".format( len(missing_cols), missing_cols, len(extra_cols), extra_cols)) #binar-i-tize data X = pd.get_dummies(X, columns=self._cols_to_binarize, drop_first=True) # drop columns X = X.drop(self._cols_to_drop, axis=1) X = X.drop(self._unused_cols, axis=1, errors='ignore') return X
def predict_proba(self, X): """Predict class probabilities for X. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- p : array of shape = [n_samples, n_classes] The class probabilities of the input samples. The order of the classes corresponds to that in the attribute classes_. """ if not self.estimator_fitted: raise NotFittedError( 'The estimator must be fitted before calling predict_proba().') probabilistic_predictions = self.estimator.predict_proba(X) probabilistic_predictions = probabilistic_predictions[:, 1] return probabilistic_predictions / self.c
def get_interval_mapping(self, col_name: str): """ Get the mapping from encoded value to its corresponding group. """ if self.bins is None: raise NotFittedError( 'This {} is not fitted. Call the fit method first.'.format( self.__class__.__name__)) if col_name in self.discrete_encoding and isinstance( self.bins[col_name], list): # categorical columns encoding = self.discrete_encoding[col_name] group = defaultdict(list) for i, v in zip(searchsorted(self.bins[col_name], encoding), encoding.index): group[i].append(v) group = {k: ', '.join(map(str, v)) for k, v in group.items()} group[0] = 'UNSEEN' return group else: return super().get_interval_mapping(col_name)
def predict(self, X): """Predicts the response variable given a design matrix. The output is the mode of the Poisson distribution. Parameters ---------- X : array_like, shape (n_samples, n_features) Design matrix to predict on. Returns ------- mode : array_like, shape (n_samples) The predicted response values, i.e. the modes. """ if hasattr(self, 'coef_') and hasattr(self, 'intercept_'): mu = np.exp(self.intercept_ + np.dot(X, self.coef_)) mode = np.floor(mu) return mode else: raise NotFittedError('Poisson model is not fit.')
def frozen(self, experiment: ExperimentBackend) -> 'MetaBlock': """ save fitted models to the experiment Args: experiment: 保存する対象となる environment Returns: myself """ if not self._check_has_fitted_models(): raise NotFittedError() dir_names = [ self._get_fold_dir(i) for i in range(len(self._fitted_models)) ] for name, model in zip(dir_names, self._fitted_models): with experiment.as_environment(name, style='nested') as fold_env: fold_env.save_as_python_object('model', model) experiment.mark('cv_dirs', dir_names) return self
def predict_proba(self, X): """Estimate the class probabilities. This function returns the probability that each datapoint belongs to the positive class. Parameters ---------- X : np.ndarray The data matrix. Returns ------- p : np.ndarray A vector of probabilities. The i-th entry is the probability for the i-th data point belonging to the positive class. """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") return predict_proba(self.coef_, X)
def transform(self, df: pd.DataFrame) -> np.ndarray: try: self.vocab except: raise NotFittedError( "This TextPreprocessor instance is not fitted yet. " "Call 'fit' with appropriate arguments before using this estimator." ) texts = df[self.text_col].tolist() self.tokens = get_texts(texts) sequences = [self.vocab.numericalize(t) for t in self.tokens] padded_seq = np.array( [pad_sequences(s, maxlen=self.maxlen) for s in sequences]) if self.verbose: print("The vocabulary contains {} tokens".format( len(self.vocab.stoi))) if self.word_vectors_path is not None: self.embedding_matrix = build_embeddings_matrix( self.vocab, self.word_vectors_path, self.min_freq) return padded_seq
def transform(self, X): """ Transforms the input matrix X. Parameters ---------- X : Union(ndarray, sparse matrix) of size (n_samples, n_features) Returns ------- Y: ndarray of size (n_samples, hidden_layer_size) """ if self._input_weights is None or self._bias_weights is None: raise NotFittedError(self) self._hidden_layer_state = InputToNode._node_inputs( X, self._input_weights, self.input_scaling, self._bias_weights, self.bias_scaling) ACTIVATIONS[self.input_activation](self._hidden_layer_state) return self._hidden_layer_state
def predict_probability(self, points_in): if not self._session: raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__) with self._session.as_default() as sess: points_in = np.expand_dims(points_in, 0) softmax, feat, feat_in, xyz = sess.run( (self._softmax_op, self._ln_feat, self._ln_feat_in, self._ln_xyz), feed_dict={ self._points_in: points_in, self._is_training: False }) #if self.savefiles: # for level, x in enumerate(xyz): # print(xyz[level][0], feat[level][0]) # print(xyz[level][0].shape, feat[level][0].shape) # np.savetxt(os.path.join(self.output_dir, 'xyz%i.xyz' % level), np.hstack((xyz[level][0], feat[level][0]))) # np.savetxt(os.path.join(self.output_dir, 'xyz%i_in.xyz' % level), np.hstack((xyz[level][0], feat_in[level][0]))) return softmax
def predict(self, X): """ Predict regression target for X. The predicted regression target of an input sample is computed. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Returns ------- y : array of shape = [n_samples] The predicted values. """ if not hasattr(self, '_fitted') or not self._fitted: raise NotFittedError(NOT_FITTED_ERROR_DESC) X = check_array(X, accept_sparse=True) self._check_n_features(X.shape[1]) return self._estimators[0].predict(X)
def predict(self, X): """ Predicts using the base regressor, applying inverse. :param X: {array-like, sparse matrix}, shape = (n_samples, n_features) Samples. :return: y_hat : array, shape = (n_samples,) Predicted values. """ if not hasattr(self, 'regressor_'): raise NotFittedError( "This instance {} is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.".format( type(self))) X_trans, _ = self.transformer_.transform(X, None) pred = self.regressor_.predict(X_trans) inv = self.transformer_.get_fct_inv() _, pred_inv = inv.transform(X_trans, pred) return pred_inv
def _predict(self, X, axis=-1, batch_size=-1): if not self._initialized: raise NotFittedError() self._graph.add_to_collection("IS_TRAINING", False) predict_data_feeder = setup_predict_data_feeder( X, batch_size=batch_size) preds = [] dropouts = self._graph.get_collection(DROPOUTS) feed_dict = {prob: 1.0 for prob in dropouts} for data in predict_data_feeder: feed_dict[self._inp] = data predictions_for_batch = self._session.run( self._model_predictions, feed_dict) if self.n_classes > 1 and axis != -1: preds.append(predictions_for_batch.argmax(axis=axis)) else: preds.append(predictions_for_batch) return np.concatenate(preds, axis=0)
def transform(self, X): """Normalizes data using the specified scaling method. Parameters ---------- X : `pandas.DataFrame` Data to transform. e.g. each column is a timeseries. Columns are expected to be numeric. Returns ------- X_subset : `pandas.DataFrame` Selected columns of X. Keeps columns that were not degenerate on the training data. """ if self.keep_cols is None: raise NotFittedError( "This instance is not fitted yet. Call 'fit' with appropriate arguments " "before calling 'transform'.") return X[self.keep_cols]
def predict(self, X): """ Predicts the targets using the trained ELM regressor. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) Returns ------- y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets) The predicted targets """ if self._input_to_node is None or self._regressor is None: raise NotFittedError(self) hidden_layer_state = self._input_to_node.transform(X) hidden_layer_state = self._node_to_node.transform(hidden_layer_state) return self._regressor.predict(hidden_layer_state)
def predict(self, X): """Predict using the average of the base learners Parameters ---------- X : pandas DataFrame Features Returns ------- y_pred : pandas Series Predicted target variable """ # Ensure model has been fit if self.fit_learners is None: raise NotFittedError('Model has not been fit') # Preprocess the data if self.preprocessing is None: Xp = X else: Xp = self.preprocessing.transform(X) # Compute predictions for each base learner if isinstance(X, pd.DataFrame): preds = pd.DataFrame(index=X.index) else: preds = pd.DataFrame(index=np.arange(X.shape[0])) for i, learner in enumerate(self.fit_learners): if isinstance(Xp, pd.DataFrame): Xs = Xp.iloc[:, self.features_ix[i]] else: Xs = Xp[:, self.features_ix[i]] preds[str(i)] = learner.predict(Xs) # Return the average predictions if isinstance(X, pd.DataFrame): return preds.mean(axis=1) else: return preds.mean(axis=1).values
def predict_proba(self, X): """Returns class probability estimates for the given test data. Arguments: X: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. Returns: proba: array-like, shape `(n_samples, n_outputs)` Class probability estimates. In the case of binary classification, to match the scikit-learn API, will return an array of shape `(n_samples, 2)` (instead of `(n_sample, 1)` as in Keras). """ # check if fitted if not self._initialized(): raise NotFittedError("Estimator needs to be fit before `predict` " "can be called") # basic input checks X = self._validate_data(X=X, y=None) # pre process X X = self.feature_encoder_.transform(X) # collect arguments predict_args = route_params( self.get_params(), destination="predict", pass_filter=self._predict_kwargs, ) # call the Keras model's predict outputs = self.model_.predict(X, **predict_args) # post process y y = self.target_encoder_.inverse_transform(outputs, return_proba=True) return y
def predict_proba(self, X): """ Predict class probabilities for X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. Returns ---------- avg : array-like, shape = [n_samples, n_classes] Weighted average probability for each class per sample. """ if not hasattr(self, 'clfs_'): raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") avg = np.average(self._predict_probas(X), axis=0, weights=self.weights) return avg
def transform(self, X): """ Calculates time series features of the input time series Parameters ---------- X : pd.DataFrame Returns ------- A copy of the data frame with original time points and calculated features """ if self.origin_for_time_vars is None: raise NotFittedError( "This instance is not fitted yet. Call 'fit' with appropriate arguments " "before calling 'transform'.") assert isinstance(X, pd.DataFrame) dt = X[self.time_col] features_ts = build_time_features_df( dt, conti_year_origin=self.origin_for_time_vars) output = pd.concat([dt, features_ts], axis=1) return output
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, n_features)) return X
def predict_meta_features(self, X): """ Get meta-features of test-data. Parameters ---------- X : numpy array, shape = [n_samples, n_features] Test vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- meta-features : numpy array, shape = [n_samples, len(self.regressors)] meta-features for test data, where n_samples is the number of samples in test data and len(self.regressors) is the number of regressors. """ if not hasattr(self, 'regr_'): raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") return np.column_stack([r.predict(X) for r in self.regr_])
def transform(self, X, y=None): """ Extract non-correlated features from X. Parameters ---------- X: {array-like, sparse matrix}, shape (n_samples, n_features) New data. Must have the same number of columns as the data used to fit the transformer. Returns ------- X_new : {same data type as X}, shape (n_samples, n_components) """ if not isinstance(self._mask, np.ndarray): raise NotFittedError() if self._is_df: return X.iloc[:, self._mask] else: return X[:, self._mask]
def transform(self, data): if self.fitted is not True: raise NotFittedError("Transformation is not fitted yet.") # Check if shadow shuffling process has to be used if self._shadow: temp = self._transform_special_process(data) if "dataframe" in str(type(temp)).lower(): z = np.array(temp) idx = np.arange(len(z)) np.random.shuffle(idx) return pd.DataFrame(z[idx], columns=temp.columns, index=temp.index) else: z = np.array(temp) np.random.shuffle(z) return pd.Series(z, name="shadow_" + self._name, index=temp.index) else: return self._transform_special_process(data)
def predict(self, target_times, readings, turbines=None): """Make predictions using this pipeline. Args: target_times (pandas.DataFrame): ``target_times`` table, containing the ``turbine_id``, ``cutoff_time`` and ``target`` columns. readings (pandas.DataFrame): ``readings`` table. turbines (pandas.DataFrame): ``turbines`` table. Returns: numpy.ndarray: Vector of predictions. """ if not self.fitted: raise NotFittedError() X = target_times[['turbine_id', 'cutoff_time']] return self._pipeline.predict(X, readings=readings, turbines=turbines)