示例#1
0
    def predict_for_scores(self, scores, **kwargs):
        """ Predict rankings for scores for a given collection of sets of objects.

        Parameters
        ----------
        scores : dict or numpy array
            Dictionary with a mapping from ranking size to numpy arrays
            or a single numpy array of size containing scores of each object of size:
            (n_instances, n_objects)


        Returns
        -------
        Y : dict or numpy array
            Dictionary with a mapping from ranking size to numpy arrays
            or a single numpy array containing predicted ranking of size:
            (n_instances, n_objects)
        """

        if isinstance(scores, dict):
            result = dict()
            for n, score in scores.items():
                rankings = scores_to_rankings(score)
                result[n] = rankings
        else:
            result = scores_to_rankings(scores)
        return result
示例#2
0
    def predict_for_scores(self, scores, **kwargs):
        """
            The permutation vector :math:`\\pi` represents the ranking amongst the objects in :math:`Q`, such that
            :math:`\\pi(k)` is the position of the :math:`k`-th object :math:`x_k`, and :math:`\\pi^{-1}(k)` is the index
            of the object on position :math:`k`. Predict rankings for the scores for a given collection of sets of
            objects (query sets).

            Parameters
            ----------
            scores : dict or numpy array
                Dictionary with a mapping from query set size to numpy arrays or a single numpy array of size containing
                scores of each object of size: (n_instances, n_objects)

            Returns
            -------
            Y : dict or numpy array
                Dictionary with a mapping from objects size to numpy arrays or a single numpy array containing
                predicted rankings of size: (n_samples, n_objects)
        """

        if isinstance(scores, dict):
            result = dict()
            for n, score in scores.items():
                rankings = scores_to_rankings(score)
                result[n] = rankings
        else:
            result = scores_to_rankings(scores)
        return result
 def make_nearest_neighbour_dataset(self, n_instances, n_objects, seed,
                                    **kwargs):
     X, scores = super().make_nearest_neighbour_dataset(
         n_instances=n_instances, n_objects=n_objects, seed=seed)
     # Higher the similarity lower the rank of the object
     Y = scores_to_rankings(scores)
     return X, Y
示例#4
0
def spearman_correlation_for_scores_scipy(y_true, s_pred):
    y_pred = scores_to_rankings(s_pred)
    rho = []
    for r1, r2 in zip(y_true, y_pred):
        s = spearmanr(r1, r2)[0]
        rho.append(s)
    return np.nanmean(np.array(rho))
    def make_gp_transitive(self,
                           n_instances=1000,
                           n_objects=5,
                           noise=0.0,
                           n_features=100,
                           kernel_params=None,
                           seed=42,
                           **kwd):
        """Creates a nonlinear object ranking problem by sampling from a
        Gaussian process as the latent utility function.
        Note that this function needs to compute a kernel matrix of size
        (n_instances * n_objects) ** 2, which could allocate a large chunk of the
        memory."""
        random_state = check_random_state(seed=seed)

        if kernel_params is None:
            kernel_params = dict()
        n_total = n_instances * n_objects
        X = random_state.rand(n_total, n_features)
        L = np.linalg.cholesky(Matern(**kernel_params)(X))
        f = (L.dot(random_state.randn(n_total)) +
             random_state.normal(scale=noise, size=n_total))
        X = X.reshape(n_instances, n_objects, n_features)
        f = f.reshape(n_instances, n_objects)
        Y = scores_to_rankings(f)

        return X, Y
 def dataset_generator(n_instances, n_objects, seed, **kwargs):
     X, scores = super(TagGenomeObjectRankingDatasetReader,
                       self).make_critique_fit_dataset(
                           n_instances=n_instances,
                           n_objects=n_objects,
                           seed=seed,
                           direction=direction)
     Y = scores_to_rankings(scores)
     return X, Y
示例#7
0
def spearman_correlation_for_scores_np(y_true, s_pred):
    y_pred = scores_to_rankings(s_pred)
    rho = []
    n_objects = y_true.shape[1]
    denominator = n_objects * (n_objects**2 - 1)

    for r1, r2 in zip(y_true, y_pred):
        if len(np.unique(r2)) == len(r2):
            s = 1 - (6 * np.sum((r1 - r2)**2) / denominator)
            rho.append(s)
        else:
            rho.append(np.nan)
    return np.nanmean(np.array(rho))
 def make_linear_transitive(self,
                            n_instances=1000,
                            n_objects=5,
                            noise=0.0,
                            n_features=100,
                            n_informative=10,
                            seed=42,
                            **kwd):
     random_state = check_random_state(seed=seed)
     X, y, coeff = make_regression(n_samples=n_instances * n_objects,
                                   n_features=n_features,
                                   n_informative=n_informative,
                                   coef=True,
                                   noise=noise,
                                   random_state=random_state)
     X = X.reshape(n_instances, n_objects, n_features)
     y = y.reshape(n_instances, n_objects)
     Y = scores_to_rankings(y)
     return X, Y
示例#9
0
    def make_similarity_based_dataset(self, datatype="train", seed=42):
        """Picks a random subset of objects, determines the medoid and ranks the objects
        based on the distance to the medoid.

        The medoid is also included in the ordering."""
        random_state = np.random.RandomState(seed=seed)
        if datatype == "train":
            image_features = self.image_features_train
            n_instances = self.n_train_instances
            similarity_matrix_file = self.similarity_matrix_train_file
        elif datatype == "test":
            image_features = self.image_features_test
            n_instances = self.n_test_instances
            similarity_matrix_file = self.similarity_matrix_test_file

        X = np.empty((n_instances, self.n_objects, self.n_features),
                     dtype=float)
        similarity_scores = np.empty((n_instances, self.n_objects),
                                     dtype=float)
        similarity_matrix_lin_list = get_similarity_matrix(
            similarity_matrix_file)

        for i in range(n_instances):
            subset = random_state.choice(image_features.shape[0],
                                         size=self.n_objects,
                                         replace=False)
            X[i] = image_features[subset]
            query = random_state.choice(self.n_objects, size=1)
            one_row = [
                similarity_matrix_lin_list[get_key_for_indices(i, j)]
                for i, j in product(subset[query], subset)
            ]
            similarity_scores[i] = np.array(one_row)

        Y = scores_to_rankings(similarity_scores)
        for i, x in enumerate(X):
            x = StandardScaler().fit_transform(x)
            X[i] = x
        return X, Y
示例#10
0
 def predict(self, Xo, Xc, **kwargs):
     s = self.predict_scores(Xo, Xc, **kwargs)
     return scores_to_rankings(s)
 def predict_for_scores(self, scores, **kwargs):
     self.logger('Predicting rankings')
     return scores_to_rankings(scores)
示例#12
0
def zero_one_accuracy_for_scores_np(y_true, s_pred):
    y_pred = scores_to_rankings(s_pred)
    acc = np.sum(np.all(np.equal(y_true, y_pred), axis=1)) / y_pred.shape[0]
    return acc
 def predict_for_scores(self, scores, **kwargs):
     return scores_to_rankings(scores)