Exemplo n.º 1
0
def cluster(train_features, targets, test_features, predictions, k_means=3):
    """Penalize test points that are too clustered.

    Parameters
    ----------
    train_features : array
        Feature matrix for the training data.
    targets : list
        Training targets.
    test_features : array
        Feature matrix for the test data.
    predictions : list
        Predicted means.
    k_means : int
        Number of clusters.
    """
    fit = []

    cf = cluster_features(train_matrix=train_features,
                          train_target=targets,
                          k_means=k_means,
                          test_matrix=test_features,
                          test_target=predictions)

    train_count = Counter(cf['train_order'])

    for i, c in enumerate(cf['test_order']):
        fit.append(predictions[i] / train_count[c])

    return fit
Exemplo n.º 2
0
 def test_cluster(self):
     """Test clustering function."""
     train_features, train_targets, test_features, \
         test_targets = self.get_data()
     cf = cluster_features(train_matrix=train_features,
                           train_target=train_targets,
                           test_matrix=test_features,
                           test_target=test_targets,
                           k=2)
     self.assertTrue(len(cf['train_features']) == 2)
     self.assertTrue(len(cf['train_target']) == 2)
     self.assertTrue(len(cf['test_features']) == 2)
     self.assertTrue(len(cf['test_target']) == 2)
Exemplo n.º 3
0
    def _cluster_fit(self):
        """Penalize test points that are too clustered."""
        fit = []

        cf = cluster_features(train_matrix=self.train_features,
                              train_target=self.targets,
                              k_means=self.k_means,
                              test_matrix=self.test_features,
                              test_target=self.predictions)

        train_count = Counter(cf['train_order'])

        for i, c in enumerate(cf['test_order']):
            fit.append(self.predictions[i] / train_count[c])

        return fit