Пример #1
0
    def transform(self, X: np.ndarray) -> np.ndarray:
        """
        Parameters
        ----------
        X : np.ndarray, shape (n_samples, height, width)

        Returns
        -------
        : np.ndarray, shape (n_samples, height, n_components)
        """
        if self.components_ is None:
            raise NotFittedError(
                "This PCA instance is not fitted yet. "
                "Call 'fit' with appropriate arguments before using this estimator."
            )

        if X.ndim != 3:
            raise ValueError(f"Expected 3D array, got {X.ndim}D array instead")

        return np.array([x @ self.components_ for x in X])
    def transform(self, X):
        """Select the n_selected_features best features to create a new dataset.
            Parameters
            ----------
            X : pandas dataframe or array-like of shape (n_samples, n_features)
                Training vector, where n_samples is the number of samples and
                n_features is the number of features.
            Returns
            -------
            n_selected_features
                 array of shape (n_samples, n_selected_features) containing the selected features
        """

        X, _ = self._check_X_Y(X, None)
        if self.is_fitted:
            self.selected_features = super(FeatureSelection, self).transform(X)
            return self.selected_features
        else:
            raise NotFittedError(
                'Fit method must be used before calling transform')
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """Label Encoded the categories in ``columns_to_encode``"""
        try:
            self.encoding_dict
        except AttributeError:
            raise NotFittedError(
                "This LabelEncoder instance is not fitted yet. "
                "Call 'fit' with appropriate arguments before using this LabelEncoder."
            )

        df_inp = df.copy()
        # sanity check to make sure all categorical columns are in an adequate
        # format
        for col in self.columns_to_encode:  # type: ignore
            df_inp[col] = df_inp[col].astype("O")

        for k, v in self.encoding_dict.items():
            df_inp[k] = df_inp[k].apply(lambda x: v[x] if x in v.keys() else 0)

        return df_inp
Пример #4
0
    def predict_proba(self, X):
        """
			Get class(= author) probabilities for code sample

			Paramters:

				X: code sample
        """


        if self.user_to_id is None:
            raise NotFittedError('Model not fitted. Fit with CodoxerModel.fit(X, y).')

        X = self.tokenizer.transform(X)

        X = self.tfidf.transform(X)

        X = self.selector.transform(X)

        return self.estimator.predict(X)
Пример #5
0
    def const_marginal_effect(self, X):
        """Calculate the constant marginal CATE θ(·) conditional on a vector of features X.

        Parameters
        ----------
        X : array-like, shape (n, d_x)
            Feature vector that captures heterogeneity.

        Returns
        -------
        Theta : matrix , shape (n, d_t)
            Constant marginal CATE of each treatment for each sample.
        """
        if not self.model_is_fitted:
            raise NotFittedError('This {0} instance is not fitted yet.'.format(self.__class__.__name__))
        X = check_array(X)
        results = Parallel(n_jobs=self.n_jobs, verbose=3, backend='threading')(
            delayed(self._pointwise_effect)(X_single) for X_single in X)
        # TODO: Check performance
        return np.asarray(results)
Пример #6
0
    def predict_probabilites(self, X):

        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" %
                                 self.__class__.__name__)

        h_np, b_np, h_sizes, b_sizes, h_sent_sizes, b_sent_sizes = X[
            'h_np'], X['b_np'], X['h_sizes'], X['b_sizes'], X[
                'h_sent_sizes'], X['b_sent_sizes']

        with self._session.as_default() as sess:
            return self._probabilites.eval(
                feed_dict={
                    self._X_head: h_np,
                    self._X_body: b_np,
                    self._X_h_sizes: h_sizes,
                    self._X_b_sizes: b_sizes,
                    self._X_h_sent_sizes: h_sent_sizes,
                    self._X_b_sent_sizes: b_sent_sizes
                })
Пример #7
0
    def inverse_transform(self, X):
        """Transform X back to original space.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_components)
        Returns
        -------
        X_new : array-like, shape (n_samples, n_features)
        References
        ----------
        "Learning to Find Pre-Images", G BakIr et al, 2004.
        """
        if not self.fit_inverse_transform:
            raise NotFittedError("The fit_inverse_transform parameter was not"
                                 " set to True when instantiating and hence "
                                 "the inverse transform is not available.")

        K = self._get_kernel(X, self.X_transformed_fit_)

        return np.dot(K, self.dual_coef_)
 def transform(self, X):
     if not self.is_fit:
         raise NotFittedError("This LoadEpimlTransformer is not fitted yet")
     X = X.copy()
     # add in self._unused_cols to the headers so that the error checks don't look for those columns
     X_cols = set(X.columns.values).union(self._unused_cols)
     data_cols = set(self._orig_col_headers).union(self._unused_cols)
     if X_cols != data_cols:
         missing_cols = data_cols - X_cols
         extra_cols = X_cols - data_cols
         raise ValueError(
             "X missing {} cols [{}], and has {} extra cols [{}]".format(
                 len(missing_cols), missing_cols, len(extra_cols),
                 extra_cols))
     #binar-i-tize data
     X = pd.get_dummies(X, columns=self._cols_to_binarize, drop_first=True)
     # drop columns
     X = X.drop(self._cols_to_drop, axis=1)
     X = X.drop(self._unused_cols, axis=1, errors='ignore')
     return X
Пример #9
0
    def predict_proba(self, X):
        """Predict class probabilities for X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        """
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().')
        probabilistic_predictions = self.estimator.predict_proba(X)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        return probabilistic_predictions / self.c
Пример #10
0
    def get_interval_mapping(self, col_name: str):
        """ Get the mapping from encoded value to its corresponding group. """
        if self.bins is None:
            raise NotFittedError(
                'This {} is not fitted. Call the fit method first.'.format(
                    self.__class__.__name__))

        if col_name in self.discrete_encoding and isinstance(
                self.bins[col_name], list):
            # categorical columns
            encoding = self.discrete_encoding[col_name]
            group = defaultdict(list)
            for i, v in zip(searchsorted(self.bins[col_name], encoding),
                            encoding.index):
                group[i].append(v)
            group = {k: ', '.join(map(str, v)) for k, v in group.items()}
            group[0] = 'UNSEEN'
            return group
        else:
            return super().get_interval_mapping(col_name)
Пример #11
0
    def predict(self, X):
        """Predicts the response variable given a design matrix. The output is
        the mode of the Poisson distribution.

        Parameters
        ----------
        X : array_like, shape (n_samples, n_features)
            Design matrix to predict on.

        Returns
        -------
        mode : array_like, shape (n_samples)
            The predicted response values, i.e. the modes.
        """
        if hasattr(self, 'coef_') and hasattr(self, 'intercept_'):
            mu = np.exp(self.intercept_ + np.dot(X, self.coef_))
            mode = np.floor(mu)
            return mode
        else:
            raise NotFittedError('Poisson model is not fit.')
Пример #12
0
    def frozen(self, experiment: ExperimentBackend) -> 'MetaBlock':
        """
        save fitted models to the experiment

        Args:
            experiment:
                保存する対象となる environment
        Returns:
            myself
        """
        if not self._check_has_fitted_models():
            raise NotFittedError()
        dir_names = [
            self._get_fold_dir(i) for i in range(len(self._fitted_models))
        ]
        for name, model in zip(dir_names, self._fitted_models):
            with experiment.as_environment(name, style='nested') as fold_env:
                fold_env.save_as_python_object('model', model)
        experiment.mark('cv_dirs', dir_names)
        return self
Пример #13
0
    def predict_proba(self, X):
        """Estimate the class probabilities.

        This function returns the probability that each datapoint belongs to
        the positive class.

        Parameters
        ----------
        X : np.ndarray
            The data matrix.

        Returns
        -------
        p : np.ndarray
            A vector of probabilities. The i-th entry is the probability for
            the i-th data point belonging to the positive class.
        """
        if not hasattr(self, "coef_"):
            raise NotFittedError("Call fit before prediction")
        return predict_proba(self.coef_, X)
Пример #14
0
 def transform(self, df: pd.DataFrame) -> np.ndarray:
     try:
         self.vocab
     except:
         raise NotFittedError(
             "This TextPreprocessor instance is not fitted yet. "
             "Call 'fit' with appropriate arguments before using this estimator."
         )
     texts = df[self.text_col].tolist()
     self.tokens = get_texts(texts)
     sequences = [self.vocab.numericalize(t) for t in self.tokens]
     padded_seq = np.array(
         [pad_sequences(s, maxlen=self.maxlen) for s in sequences])
     if self.verbose:
         print("The vocabulary contains {} tokens".format(
             len(self.vocab.stoi)))
     if self.word_vectors_path is not None:
         self.embedding_matrix = build_embeddings_matrix(
             self.vocab, self.word_vectors_path, self.min_freq)
     return padded_seq
Пример #15
0
    def transform(self, X):
        """
        Transforms the input matrix X.

        Parameters
        ----------
        X : Union(ndarray, sparse matrix) of size (n_samples, n_features)

        Returns
        -------
        Y: ndarray of size (n_samples, hidden_layer_size)
        """
        if self._input_weights is None or self._bias_weights is None:
            raise NotFittedError(self)

        self._hidden_layer_state = InputToNode._node_inputs(
            X, self._input_weights, self.input_scaling, self._bias_weights,
            self.bias_scaling)
        ACTIVATIONS[self.input_activation](self._hidden_layer_state)
        return self._hidden_layer_state
 def predict_probability(self, points_in):
     if not self._session:
         raise NotFittedError("This %s instance is not fitted yet" %
                              self.__class__.__name__)
     with self._session.as_default() as sess:
         points_in = np.expand_dims(points_in, 0)
         softmax, feat, feat_in, xyz = sess.run(
             (self._softmax_op, self._ln_feat, self._ln_feat_in,
              self._ln_xyz),
             feed_dict={
                 self._points_in: points_in,
                 self._is_training: False
             })
         #if self.savefiles:
         #    for level, x in enumerate(xyz):
         #        print(xyz[level][0], feat[level][0])
         #        print(xyz[level][0].shape, feat[level][0].shape)
         #        np.savetxt(os.path.join(self.output_dir, 'xyz%i.xyz' % level), np.hstack((xyz[level][0], feat[level][0])))
         #        np.savetxt(os.path.join(self.output_dir, 'xyz%i_in.xyz' % level), np.hstack((xyz[level][0], feat_in[level][0])))
         return softmax
Пример #17
0
    def predict(self, X):
        """
        Predict regression target for X.
        The predicted regression target of an input sample is computed.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples]
            The predicted values.
        """
        if not hasattr(self, '_fitted') or not self._fitted:
            raise NotFittedError(NOT_FITTED_ERROR_DESC)
        X = check_array(X, accept_sparse=True)
        self._check_n_features(X.shape[1])
        return self._estimators[0].predict(X)
Пример #18
0
    def predict(self, X):
        """
        Predicts using the base regressor, applying inverse.

        :param X: {array-like, sparse matrix}, shape = (n_samples, n_features)
            Samples.
        :return: y_hat : array, shape = (n_samples,)
            Predicted values.
        """
        if not hasattr(self, 'regressor_'):
            raise NotFittedError(
                "This instance {} is not fitted yet. Call 'fit' with "
                "appropriate arguments before using this method.".format(
                    type(self)))
        X_trans, _ = self.transformer_.transform(X, None)
        pred = self.regressor_.predict(X_trans)

        inv = self.transformer_.get_fct_inv()
        _, pred_inv = inv.transform(X_trans, pred)
        return pred_inv
Пример #19
0
    def _predict(self, X, axis=-1, batch_size=-1):
        if not self._initialized:
            raise NotFittedError()
        self._graph.add_to_collection("IS_TRAINING", False)
        predict_data_feeder = setup_predict_data_feeder(
            X, batch_size=batch_size)
        preds = []
        dropouts = self._graph.get_collection(DROPOUTS)
        feed_dict = {prob: 1.0 for prob in dropouts}
        for data in predict_data_feeder:
            feed_dict[self._inp] = data
            predictions_for_batch = self._session.run(
                self._model_predictions,
                feed_dict)
            if self.n_classes > 1 and axis != -1:
                preds.append(predictions_for_batch.argmax(axis=axis))
            else:
                preds.append(predictions_for_batch)

        return np.concatenate(preds, axis=0)
    def transform(self, X):
        """Normalizes data using the specified scaling method.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Data to transform. e.g. each column is a timeseries.
            Columns are expected to be numeric.

        Returns
        -------
        X_subset : `pandas.DataFrame`
            Selected columns of X. Keeps columns that were not
            degenerate on the training data.
        """
        if self.keep_cols is None:
            raise NotFittedError(
                "This instance is not fitted yet. Call 'fit' with appropriate arguments "
                "before calling 'transform'.")
        return X[self.keep_cols]
Пример #21
0
    def predict(self, X):
        """
        Predicts the targets using the trained ELM regressor.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)

        Returns
        -------
        y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
            The predicted targets
        """
        if self._input_to_node is None or self._regressor is None:
            raise NotFittedError(self)

        hidden_layer_state = self._input_to_node.transform(X)
        hidden_layer_state = self._node_to_node.transform(hidden_layer_state)

        return self._regressor.predict(hidden_layer_state)
Пример #22
0
    def predict(self, X):
        """Predict using the average of the base learners

        Parameters
        ----------
        X : pandas DataFrame
            Features

        Returns
        -------
        y_pred : pandas Series
            Predicted target variable
        """

        # Ensure model has been fit
        if self.fit_learners is None:
            raise NotFittedError('Model has not been fit')

        # Preprocess the data
        if self.preprocessing is None:
            Xp = X
        else:
            Xp = self.preprocessing.transform(X)

        # Compute predictions for each base learner
        if isinstance(X, pd.DataFrame):
            preds = pd.DataFrame(index=X.index)
        else:
            preds = pd.DataFrame(index=np.arange(X.shape[0]))
        for i, learner in enumerate(self.fit_learners):
            if isinstance(Xp, pd.DataFrame):
                Xs = Xp.iloc[:, self.features_ix[i]]
            else:
                Xs = Xp[:, self.features_ix[i]]
            preds[str(i)] = learner.predict(Xs)

        # Return the average predictions
        if isinstance(X, pd.DataFrame):
            return preds.mean(axis=1)
        else:
            return preds.mean(axis=1).values
Пример #23
0
    def predict_proba(self, X):
        """Returns class probability estimates for the given test data.

        Arguments:
            X: array-like, shape `(n_samples, n_features)`
                Test samples where `n_samples` is the number of samples
                and `n_features` is the number of features.

        Returns:
            proba: array-like, shape `(n_samples, n_outputs)`
                Class probability estimates.
                In the case of binary classification,
                to match the scikit-learn API,
                will return an array of shape `(n_samples, 2)`
                (instead of `(n_sample, 1)` as in Keras).
        """
        # check if fitted
        if not self._initialized():
            raise NotFittedError("Estimator needs to be fit before `predict` "
                                 "can be called")

        # basic input checks
        X = self._validate_data(X=X, y=None)

        # pre process X
        X = self.feature_encoder_.transform(X)

        # collect arguments
        predict_args = route_params(
            self.get_params(),
            destination="predict",
            pass_filter=self._predict_kwargs,
        )

        # call the Keras model's predict
        outputs = self.model_.predict(X, **predict_args)

        # post process y
        y = self.target_encoder_.inverse_transform(outputs, return_proba=True)

        return y
    def predict_proba(self, X):
        """ Predict class probabilities for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        ----------
        avg : array-like, shape = [n_samples, n_classes]
            Weighted average probability for each class per sample.

        """
        if not hasattr(self, 'clfs_'):
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        avg = np.average(self._predict_probas(X), axis=0, weights=self.weights)
        return avg
    def transform(self, X):
        """ Calculates time series features of the input time series

        Parameters
        ----------
        X : pd.DataFrame

        Returns
        -------
        A copy of the data frame with original time points and calculated features
        """
        if self.origin_for_time_vars is None:
            raise NotFittedError(
                "This instance is not fitted yet. Call 'fit' with appropriate arguments "
                "before calling 'transform'.")
        assert isinstance(X, pd.DataFrame)
        dt = X[self.time_col]
        features_ts = build_time_features_df(
            dt, conti_year_origin=self.origin_for_time_vars)
        output = pd.concat([dt, features_ts], axis=1)
        return output
Пример #26
0
    def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
Пример #27
0
    def predict_meta_features(self, X):
        """ Get meta-features of test-data.

        Parameters
        ----------
        X : numpy array, shape = [n_samples, n_features]
            Test vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        -------
        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
            meta-features for test data, where n_samples is the number of
            samples in test data and len(self.regressors) is the number
            of regressors.

        """
        if not hasattr(self, 'regr_'):
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")
        return np.column_stack([r.predict(X) for r in self.regr_])
    def transform(self, X, y=None):
        """
        Extract non-correlated features from X.

        Parameters
        ----------
        X: {array-like, sparse matrix}, shape (n_samples, n_features)
            New data. Must have the same number of columns as the data used to fit the transformer.

        Returns
        -------
        X_new : {same data type as X}, shape (n_samples, n_components)
        """
        if not isinstance(self._mask, np.ndarray):
            raise NotFittedError()

        if self._is_df:
            return X.iloc[:, self._mask]

        else:
            return X[:, self._mask]
Пример #29
0
 def transform(self, data):
     if self.fitted is not True:
         raise NotFittedError("Transformation is not fitted yet.")
     # Check if shadow shuffling process has to be used
     if self._shadow:
         temp = self._transform_special_process(data)
         if "dataframe" in str(type(temp)).lower():
             z = np.array(temp)
             idx = np.arange(len(z))
             np.random.shuffle(idx)
             return pd.DataFrame(z[idx],
                                 columns=temp.columns,
                                 index=temp.index)
         else:
             z = np.array(temp)
             np.random.shuffle(z)
             return pd.Series(z,
                              name="shadow_" + self._name,
                              index=temp.index)
     else:
         return self._transform_special_process(data)
Пример #30
0
    def predict(self, target_times, readings, turbines=None):
        """Make predictions using this pipeline.

        Args:
            target_times (pandas.DataFrame):
                ``target_times`` table, containing the ``turbine_id``, ``cutoff_time``
                and ``target`` columns.
            readings (pandas.DataFrame):
                ``readings`` table.
            turbines (pandas.DataFrame):
                ``turbines`` table.

        Returns:
            numpy.ndarray:
                Vector of predictions.
        """
        if not self.fitted:
            raise NotFittedError()

        X = target_times[['turbine_id', 'cutoff_time']]
        return self._pipeline.predict(X, readings=readings, turbines=turbines)