def confusion(self, y_hat, y): """ :param y_hat: :type y_hat: :param y: :type y: :return: :rtype: """ assert isinstance(y_hat, pd.Series) assert isinstance(y, pd.Series) a = y.values.reshape(-1) b = pd.Series(a) # c = b.drop(['<PAD>', '*', '<STOP>', ',']) # print(c) roll_y = pd.Series( y.values.reshape(-1)) #.drop(['<PAD>', '*', '<STOP>', ',']) roll_y_hat = pd.Series( y_hat.values.reshape(-1)) #.drop(['<PAD>', '*', '<STOP>', ',']) most_reacuent_tags = self.tag_corpus[2:12] sc = Score(most_reacuent_tags) sc.fit(roll_y, roll_y_hat) return sc.matrix_confusion()
def test_over_all_acc(self): sc = Score(tags) sc.fit(y, y) cm = sc.over_all_acc() self.assertEqual(cm, 1.) print(cm) sc = Score(tags) sc.fit(y, y_bad) cm = sc.over_all_acc() self.assertNotEqual(cm, 1.) print(cm)
def test_matrix_confusion(self): sc = Score(tags) sc.fit(y, y) cm = sc.matrix_confusion() print(cm) sc = Score(tags) sc.fit(y, y_bad) cm2 = sc.matrix_confusion() print(cm2) eq = np.min(np.min(cm == cm2)) self.assertEqual(eq, 0) same_amount = np.min(cm.sum() == cm2.sum()) self.assertEqual(same_amount, 1)
def acc_per_tag(self, y_hat, y): assert isinstance(y_hat, pd.Series) assert isinstance(y, pd.Series) roll_y = pd.Series(y.values.reshape(-1)).drop( ['<PAD>', '*', '<STOP>', ',']) roll_y_hat = pd.Series(y_hat.values.reshape(-1)).drop( ['<PAD>', '*', '<STOP>', ',']) most_reacuent_tags = self.tag_corpus[:10] sc = Score(most_reacuent_tags) sc.fit(roll_y, roll_y_hat) return sc.acc_per_tag(roll_y, roll_y_hat)
def test_all_data(self): data = PreprocessTags().load_data(r'..\data\test.wtag') y_hat = data.y y = data.y roll_y = pd.Series(y.values.reshape(-1)) roll_y_hat = pd.Series(y_hat.values.reshape(-1)) index = pd.value_counts(y.values.reshape(-1)).index most_reacuent_tags = pd.Series(index, index=index).drop(['<STOP>', '*'])[:10] sc = Score(most_reacuent_tags) sc.fit(roll_y, roll_y_hat) cm = sc.matrix_confusion() acc_dict = sc.acc_per_tag(y, y_hat) print(acc_dict) print(cm)
def accuracy(self, y_hat, y): """ :param x: :type x: :param y: :type y: :return: :rtype: """ assert isinstance(y_hat, pd.Series) assert isinstance(y, pd.Series) roll_y = pd.Series(y.values.reshape(-1)).drop( ['<PAD>', '*', '<STOP>', ',']) roll_y_hat = pd.Series(y_hat.values.reshape(-1)).drop( ['<PAD>', '*', '<STOP>', ',']) most_reacuent_tags = self.tag_corpus[:10] sc = Score(most_reacuent_tags) sc.fit(roll_y, roll_y_hat) return sc.over_all_acc()
def test_fit(self): sc = Score(tags) sc.fit(y, y) self.assertTrue(np.min(sc.y_hat == y[2:-1]), 'The parsing is not working')