Exemplo n.º 1
0
    def predict(self, X, **kwargs):
        """ Predict rankings for a given collection of sets of objects.

        Parameters
        ----------
        X : dict or numpy array
            Dictionary with a mapping from ranking size to numpy arrays
            or a single numpy array of size:
            (n_instances, n_objects, n_features)


        Returns
        -------
        Y : dict or numpy array
            Dictionary with a mapping from ranking size to numpy arrays
            or a single numpy array of size:
            (n_instances, n_objects)
            Predicted ranking
        """
        self.logger.debug('Predicting started')

        predicted_scores = self.predict_scores(X, **kwargs)
        self.logger.debug('Predicting scores complete')

        predicted_rankings = scores_to_rankings(predicted_scores)
        self.logger.debug('Predicting ranks complete')
        del predicted_scores
        return predicted_rankings
Exemplo n.º 2
0
    def make_similarity_based_dataset(self, n_instances, seed=42):

        random_state = np.random.RandomState(seed=seed)
        X = np.empty((n_instances, self.n_objects, self.n_features),
                     dtype=float)
        rankings = np.empty((n_instances, self.n_objects), dtype=int)
        similarity_scores = np.empty_like(rankings, dtype=float)
        movie_features = self.movies_df.as_matrix()[:, 4:]
        for i in range(n_instances):
            subset = random_state.choice(movie_features.shape[0],
                                         size=self.n_objects,
                                         replace=False)
            X[i] = movie_features[subset]
            query = random_state.choice(self.n_objects, size=1)
            one_row = [
                self.similarity_matrix_lin_list[get_key_for_indices(i, j)]
                for i, j in product(subset[query], subset)
            ]
            similarity_scores[i] = np.array(one_row)

        # Higher the similarity lower the rank of the object
        rankings = scores_to_rankings(similarity_scores)

        for i, x in enumerate(X):
            x = StandardScaler().fit_transform(x)
            X[i] = x
        return X, rankings
    def make_gp_transitive(self,
                           n_instances=1000,
                           n_objects=5,
                           noise=0.0,
                           n_features=100,
                           kernel_params=None,
                           seed=42,
                           **kwd):
        """Creates a nonlinear object ranking problem by sampling from a
        Gaussian process as the latent utility function.
        Note that this function needs to compute a kernel matrix of size
        (n_instances * n_objects) ** 2, which could allocate a large chunk of the
        memory."""
        random_state = np.random.RandomState(seed=seed)

        if kernel_params is None:
            kernel_params = dict()
        n_total = n_instances * n_objects
        X = random_state.rand(n_total, n_features)
        L = np.linalg.cholesky(Matern(**kernel_params)(X))
        f = (L.dot(random_state.randn(n_total)) +
             random_state.normal(scale=noise, size=n_total))
        X = X.reshape(n_instances, n_objects, n_features)
        f = f.reshape(n_instances, n_objects)
        rankings = scores_to_rankings(f)

        return X, rankings
Exemplo n.º 4
0
    def make_similarity_based_dataset(self, datatype='train', seed=42):
        """Picks a random subset of objects, determines the medoid and ranks the objects
        based on the distance to the medoid.

        The medoid is also included in the ordering."""
        random_state = np.random.RandomState(seed=seed)
        if datatype == 'train':
            image_features = self.image_features_train
            n_instances = self.n_train_instances
            similarity_matrix_file = self.similarity_matrix_train_file
        elif datatype == 'test':
            image_features = self.image_features_test
            n_instances = self.n_test_instances
            similarity_matrix_file = self.similarity_matrix_test_file

        X = np.empty((n_instances, self.n_objects, self.n_features), dtype=float)
        similarity_scores = np.empty((n_instances, self.n_objects), dtype=float)
        similarity_matrix_lin_list = initialize_similarity_matrix(similarity_matrix_file)

        for i in range(n_instances):
            subset = random_state.choice(image_features.shape[0], size=self.n_objects, replace=False)
            X[i] = image_features[subset]
            query = random_state.choice(self.n_objects, size=1)
            one_row = [similarity_matrix_lin_list[get_key_for_indices(i, j)] for i, j in product(subset[query], subset)]
            similarity_scores[i] = np.array(one_row)

        rankings = scores_to_rankings(similarity_scores)
        for i, x in enumerate(X):
            x = StandardScaler().fit_transform(x)
            X[i] = x
        return X, rankings
Exemplo n.º 5
0
 def predict(self, X, **kwargs):
     self.logger.info("Predicting ranks")
     if isinstance(X, dict):
         result = dict()
         for n, scores in self.predict_scores(X, **kwargs).items():
             predicted_rankings = scores_to_rankings(scores)
             result[n] = predicted_rankings
         return result
     return ObjectRanker.predict(self, X, **kwargs)
 def make_linear_transitive(self,
                            n_instances=1000,
                            n_objects=5,
                            noise=0.0,
                            n_features=100,
                            n_informative=10,
                            seed=42,
                            **kwd):
     random_state = np.random.RandomState(seed=seed)
     X, y, coeff = make_regression(n_samples=n_instances * n_objects,
                                   n_features=n_features,
                                   n_informative=n_informative,
                                   coef=True,
                                   noise=noise,
                                   random_state=random_state)
     X = X.reshape(n_instances, n_objects, n_features)
     y = y.reshape(n_instances, n_objects)
     rankings = scores_to_rankings(y)
     return X, rankings
Exemplo n.º 7
0
    def predict(self, X, **kwargs):
        """ Predict orderings for the labels for a given context vector.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)


        Returns
        -------
        Y : array-like, shape (n_samples, n_labels)
            Predicted orderings
        """
        self.logger.debug('Predicting started')

        predicted_scores = self.predict_scores(X, **kwargs)
        self.logger.debug('Predicting scores complete')

        predicted_orderings = scores_to_rankings(predicted_scores)
        self.logger.debug('Predicting ranks complete')

        del predicted_scores
        return predicted_orderings
Exemplo n.º 8
0
 def predict(self, Xo, Xc, **kwargs):
     s = self.predict_scores(Xo, Xc, **kwargs)
     return scores_to_rankings(s)