Exemplo n.º 1
0
 def get_scores_for_category(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = ScaledFScore.get_scores_for_category(cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(scores,
                                    [0.23991183969723384, 0.24969810634506373, 0.23991183969723384,
                                     0.27646711056272855, 0.92885244834997516, 0.42010144843632563,
                                     0.49166017105966719, 0.0, 0.0, 0.50262304057984664])
Exemplo n.º 2
0
 def test_get_scores(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = ScaledFScore.get_scores(cat_counts, not_cat_counts, beta=1.)
     np.testing.assert_almost_equal(
         scores,
         np.array([
             0.2689108, 0., 0.2689108, 0.1266617, 1., 0.5, 0.5590517, 0.5,
             0.5, 0.5720015
         ]))
Exemplo n.º 3
0
 def get_scores_for_category(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = ScaledFScore.get_scores_for_category(cat_counts,
                                                   not_cat_counts)
     np.testing.assert_almost_equal(scores, [
         0.23991183969723384, 0.24969810634506373, 0.23991183969723384,
         0.27646711056272855, 0.92885244834997516, 0.42010144843632563,
         0.49166017105966719, 0.0, 0.0, 0.50262304057984664
     ])
Exemplo n.º 4
0
 def _get_scaled_f_score_from_counts(self, cat_word_counts, not_cat_word_counts, scaler_algo,
                                     beta=DEFAULT_BETA):
     '''
     scaler = self._get_scaler_function(scaler_algo)
     p_word_given_category = cat_word_counts.astype(np.float64) / cat_word_counts.sum()
     p_category_given_word = cat_word_counts.astype(np.float64) / (cat_word_counts + not_cat_word_counts)
     scores \
         = self._computer_harmoic_mean_of_probabilities_over_non_zero_in_category_count_terms(
         cat_word_counts, p_category_given_word, p_word_given_category, scaler
     )
     '''
     return ScaledFScore.get_scores(cat_word_counts, not_cat_word_counts, scaler_algo, beta=beta)
Exemplo n.º 5
0
    def get_scaled_f_scores_vs_background(self, scaler_algo='none', beta=1.):
        '''
		Parameters
		----------
		scaler_algo : str
			see get_scaled_f_scores, default 'none'
		beta : float
		  default 1.
		Returns
		-------
		pd.DataFrame of scaled_f_score scores compared to background corpus
		'''
        df = self.get_term_and_background_counts()
        df['Scaled f-score'] = ScaledFScore.get_scores_for_category(
            df['corpus'], df['background'], scaler_algo, beta)
        return df.sort_values(by='Scaled f-score', ascending=False)
Exemplo n.º 6
0
 def test_get_scores_zero_median(self):
     cat_counts = np.array([0, 0, 0, 0, 0, 0, 1, 2])
     not_cat_counts = np.array([1, 1, 2, 1, 1, 1, 1, 3])
     ScaledFScore.get_scores(cat_counts, not_cat_counts)
Exemplo n.º 7
0
 def test_get_scores_zero_all_same(self):
     cat_counts = np.array([0, 0, 0, 0, 0, 0, 1, 2])
     not_cat_counts = np.array([1, 1, 2, 1, 1, 1, 1, 2])
     scores = ScaledFScore.get_scores(cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(scores,
                                    [0.5, 0.5, 0, 0.5, 0.5, 0.5, 0.5, 1.])
Exemplo n.º 8
0
 def test_get_scores(self):
     cat_counts, not_cat_counts = self._get_counts()
     scores = ScaledFScore.get_scores(cat_counts, not_cat_counts, beta=1.)
     np.testing.assert_almost_equal(scores,
                                    np.array([0.2689108, 0., 0.2689108, 0.1266617, 1.,
                                              0.5, 0.5590517, 0.5, 0.5, 0.5720015]))
Exemplo n.º 9
0
 def test_get_scores_zero_median(self):
     cat_counts = np.array([0, 0, 0, 0, 0, 0, 1, 2])
     not_cat_counts = np.array([1, 1, 2, 1, 1, 1, 1, 3])
     ScaledFScore.get_scores(cat_counts, not_cat_counts)
Exemplo n.º 10
0
 def test_get_scores_zero_all_same(self):
     cat_counts = np.array([0, 0, 0, 0, 0, 0, 1, 2])
     not_cat_counts = np.array([1, 1, 2, 1, 1, 1, 1, 2])
     scores = ScaledFScore.get_scores(cat_counts, not_cat_counts)
     np.testing.assert_almost_equal(scores, [0.5, 0.5, 0, 0.5, 0.5, 0.5, 0.5, 1.])