class TestAssemblePredicted(unittest.TestCase): def setUp(self): self.dataset = Dataset("test", "train", 1, sequence=True) self.dataset_validate = Dataset("test", "validate", 1, sequence=True) self.dataset_test = Dataset("test", "test", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) self.dataset_validate.change_to_Doc2Vec(doc2vec) self.dataset_test.change_to_Doc2Vec(doc2vec) hidden = [5] * self.dataset.number_of_level() batch_size = [3] * self.dataset.number_of_level() target_hidden = [3] * (self.dataset.number_of_level() - 1) self.model = ESLNN( "test", self.dataset, self.dataset_validate, self.dataset_test, 30, hidden, target_hidden, stopping_time=3, batch_size=batch_size) self.model.classifier[0].dense.weight.data.fill_(1) self.model.classifier[0].dense.bias.data.zero_() self.model.classifier[0].logit.weight.data.fill_(0.2) self.model.classifier[0].logit.bias.data.zero_() def test_initial_model(self): for i in range(self.dataset.number_of_level()): test_model = self.model.classifier[i] number_of_class = self.dataset.check_each_number_of_class(i) self.assertEqual(test_model.input_size, 7) self.assertEqual(test_model.hidden_size, 5) self.assertEqual(test_model.number_of_class, number_of_class) def test_score_each_level(self): f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) def test_evaluate(self): f1_macro, f1_micro, f1_each = self.model.evaluate("train") real_score = [1, 4 / 5, 4 / 5, 4 / 5, 1 / 2, 0] self.assertAlmostEqual(0.7125, f1_macro, 6) for f1, real in zip(f1_each, real_score): self.assertAlmostEqual(real, f1[0], 6) # self.assertAlmostEqual(real, f1[1], 6) def test_train(self): # just train successfully self.model.train() f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) f1_macro, f1_micro = self.model.evaluate_each_level(1, "train") self.assertAlmostEqual(0.0, f1_macro, 6) def test_threshold_tuning(self): self.model.train() self.model.tuning_threshold() f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) f1_macro, f1_micro = self.model.evaluate_each_level(1, "train") self.assertAlmostEqual(0.8, f1_macro, 6) self.assertAlmostEqual(0.8, f1_micro, 6) def test_correction(self): test_label = [[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0]] real_result_label = [[1, 0, 0, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 0, 0]] torch_label = ByteTensor(test_label) result = self.model.child_based_correction( torch_label).cpu().numpy().tolist() for label, real_label in zip(result, real_result_label): self.assertListEqual( real_label, label)
class DatasetDoc2vecUnitTest(unittest.TestCase): def setUp(self): self.dataset = Dataset("test", "train", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) def test_change_to_Doc2Vec(self): label = self.dataset.labels.toarray().astype(int).tolist() data = self.dataset.datas.tolist() real_data = [ [0, 0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0], ] real_label = [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ] self.assertListEqual(data, real_data) self.assertListEqual(label, real_label) def test_generate_batch(self): real_label = [[ [1, 1], [1, 1], [1, 1], ], [ [1, 1], [1, 1], [0, 0], ], [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ]] real_data = [ [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0], ] for i in range(20): for l in range(3): level = l if l == 2: level = -1 label_index = 0 for data, label in self.dataset.generate_batch(level, 1): self.assertSequenceEqual( label.numpy().reshape(-1).tolist(), real_label[l][label_index]) self.assertSequenceEqual( data.numpy().reshape(-1).tolist(), real_data[label_index]) label_index = label_index + 1 def test_number_of_data_in_class(self): real_number = [3, 3, 2, 2, 2, 2, 1, 0] number = self.dataset.number_of_data_in_each_class() self.assertListEqual(real_number, number) def test_size_of_feature(self): size_of_data = self.dataset.size_of_feature() self.assertEqual(7, size_of_data) def test_number_of_each_class(self): self.assertIsInstance( self.dataset.check_each_number_of_class(0), int) self.assertEqual(2, self.dataset.check_each_number_of_class(0)) self.assertEqual(2, self.dataset.check_each_number_of_class(1)) self.assertEqual(1, self.dataset.check_each_number_of_class(5))
class DatasetUnitTest(unittest.TestCase): def setUp(self): self.dataset_train = Dataset(data_name="test", mode="train", fold_number=1, sequence=True) self.dataset_validate = Dataset(data_name="test", mode="validate", fold_number=1, sequence=True) self.dataset_test = Dataset(data_name="test", mode="test", fold_number=1, sequence=True) def test_hierarchy(self): real_all_name = ['1', '2', '3', '4', '5', '6', '7', '8'] real_hierarchy = { 0: set([2, 3]), 1: set([4, 6]), 2: set([5]), 3: set([4]), 4: set([5]), 5: set([6]), 6: set([7]) } real_parent_of = { 2: set([0]), 3: set([0]), 4: set([1, 3]), 5: set([2, 4]), 6: set([1, 5]), 7: set([6]) } real_name_to_index = { '1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7 } real_level = [0, 2, 4, 5, 6, 7, 8] self.assertSequenceEqual(real_hierarchy, self.dataset_train.hierarchy) self.assertSequenceEqual(real_parent_of, self.dataset_train.parent_of) self.assertSequenceEqual(real_all_name, self.dataset_train.all_name) self.assertSequenceEqual(real_name_to_index, self.dataset_train.name_to_index) self.assertSequenceEqual(real_level, self.dataset_train.level.tolist()) def test_load_data(self): file_name = "test/data.txt" datas, labels = prep.import_data(file_name) hierarchy_file_name = "test/hierarchy.pickle" labels = prep.map_index_of_label(hierarchy_file_name, labels) train = self.dataset_train.datas validate = self.dataset_validate.datas test = self.dataset_test.datas train_label = self.dataset_train.labels validate_label = self.dataset_validate.labels test_label = self.dataset_test.labels fold_datas = np.concatenate([train, validate, test]) fold_labels = np.concatenate([train_label, validate_label, test_label]) self.assertListEqual(sorted(fold_datas.tolist()), sorted(datas)) a = sorted(map(list, fold_labels.tolist())) b = sorted(map(list, labels)) self.assertListEqual(a, b) def test_cant_use_generate_batch(self): with self.assertRaises(NotEmbeddingState): for _ in self.dataset_train.generate_batch(0, 1): pass def test_number_of_each_class(self): self.assertIsInstance(self.dataset_train.check_each_number_of_class(0), int) self.assertEqual(2, self.dataset_train.check_each_number_of_class(0)) self.assertEqual(2, self.dataset_train.check_each_number_of_class(1)) self.assertEqual(1, self.dataset_train.check_each_number_of_class(5)) def test_number_of_level(self): self.assertEqual(6, self.dataset_train.number_of_level())