def test_metric_tracker_best(): """ 测试 metric tracker :return: """ metric_tracker = MetricTracker(patient=None) for metric in METRICS: metric_tracker.add_metric(**metric) expect = {"epoch": 3, "train_metric": {"acc": 0.85}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85), "validation_metric": {"acc": 0.60}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)} best = metric_tracker.best() ASSERT.assertEqual(expect["epoch"], best.epoch) ASSERT.assertDictEqual(expect["train_metric"], best.train_metric) ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric) ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name) ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value) ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name) ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
def test_dynamic_rnn(sequence_embedding): sequence, mask = sequence_embedding hidden_size = 4 batch_size = 3 sequence_len = 3 rnn = RNN(input_size=2, hidden_size=4, num_layers=2, batch_first=True, bidirectional=True) dynamic_rnn = DynamicRnn(rnn=rnn) rnn_output: DynamicRnnOutput = dynamic_rnn(sequence=sequence, mask=mask) logging.info(json2str(rnn_output)) last_layer_h_n: torch.Tensor = rnn_output.last_layer_h_n last_layer_h_n_expect_size = (batch_size, hidden_size * 2) ASSERT.assertEqual(last_layer_h_n_expect_size, last_layer_h_n.size()) ASSERT.assertTrue(rnn_output.last_layer_c_n is None) sequence_encoding_expect_size = (batch_size, sequence_len, hidden_size * 2) senquence_encoding = rnn_output.output ASSERT.assertEqual(sequence_encoding_expect_size, senquence_encoding.size())
def test_metric_tracker_save_and_load(): metric_tracker = MetricTracker(patient=1) for metric in METRICS: metric_tracker.add_metric(**metric) if metric["epoch"] > 4: ASSERT.assertTrue(metric_tracker.early_stopping(metric["epoch"])) else: ASSERT.assertFalse(metric_tracker.early_stopping(metric["epoch"])) if metric_tracker.early_stopping(metric["epoch"]): break saved_file_path = os.path.join(ROOT_PATH, "data/easytext/tests/trainer/metric_tracker.json") metric_tracker.save(saved_file_path) loaded_metric_tracker = MetricTracker.from_file(saved_file_path) best = metric_tracker.best() loaded_best = loaded_metric_tracker.best() ASSERT.assertEqual(best.epoch, loaded_best.epoch) ASSERT.assertDictEqual(best.train_metric, loaded_best.train_metric) ASSERT.assertDictEqual(best.validation_metric, loaded_best.validation_metric) ASSERT.assertEqual(best.train_model_target_metric.name, loaded_best.train_model_target_metric.name) ASSERT.assertEqual(best.train_model_target_metric.value, loaded_best.train_model_target_metric.value) ASSERT.assertEqual(best.validation_model_target_metric.name, loaded_best.validation_model_target_metric.name) ASSERT.assertEqual(best.validation_model_target_metric.value, loaded_best.validation_model_target_metric.value)
def test_pretrained_vocabulary(pretrained_vocabulary): """ 测试预训练词汇表 """ ASSERT.assertEqual(4, pretrained_vocabulary.size) ASSERT.assertEqual(4, len(pretrained_vocabulary)) ASSERT.assertEqual(2, pretrained_vocabulary.index("我")) ASSERT.assertEqual(3, pretrained_vocabulary.index("美丽")) ASSERT.assertEqual((pretrained_vocabulary.size, 3), pretrained_vocabulary.embedding_matrix.size()) expect_embedding_dict = { "a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0], "美丽": [7.0, 8.0, 9.0] } ASSERT.assertListEqual( expect_embedding_dict["美丽"], pretrained_vocabulary.embedding_matrix[ pretrained_vocabulary.index("美丽")].tolist()) zero_vec = [0.] * 3 for index in [ pretrained_vocabulary.index("我"), pretrained_vocabulary.padding_index, pretrained_vocabulary.index(pretrained_vocabulary.unk) ]: ASSERT.assertListEqual( zero_vec, pretrained_vocabulary.embedding_matrix[index].tolist())
def test_metric_tracker_patient(): metric_tracker = MetricTracker(patient=1) for metric in METRICS: metric_tracker.add_metric(**metric) if metric["epoch"] > 4: ASSERT.assertTrue(metric_tracker.early_stopping(metric["epoch"])) else: ASSERT.assertFalse(metric_tracker.early_stopping(metric["epoch"])) if metric_tracker.early_stopping(metric["epoch"]): break expect = {"epoch": 3, "train_metric": {"acc": 0.85}, "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85), "validation_metric": {"acc": 0.60}, "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)} best = metric_tracker.best() ASSERT.assertEqual(expect["epoch"], best.epoch) ASSERT.assertDictEqual(expect["train_metric"], best.train_metric) ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric) ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name) ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value) ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name) ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
def test_masked_softmax(): """ 测试 masked softmax :return: """ vector = torch.FloatTensor([[1., 2., 3.], [4., 5., 6.]]) mask = torch.ByteTensor([[1, 1, 0], [1, 1, 1]]) result = masked_softmax(vector=vector, mask=mask) expect1 = np.exp(np.array([1., 2.])) expect1 = expect1 / np.sum(expect1) expect1 = np.concatenate([expect1, np.array([0.])], axis=-1).tolist() result1 = result[0].tolist() ASSERT.assertEqual(len(expect1), len(result1)) for expect_data, result_data in zip(expect1, result1): ASSERT.assertAlmostEqual(expect_data, result_data) expect2 = np.exp(np.array([4., 5., 6.])) expect2 = expect2 / np.sum(expect2) expect2 = expect2.tolist() result2 = result[1].tolist() ASSERT.assertEqual(len(expect2), len(result2)) for expect_data, result_data in zip(expect2, result2): ASSERT.assertAlmostEqual(expect_data, result_data)
def test_multi_input_lstm_cell(): """ 测试 MultiInputLSTMCell """ input_size = 2 hidden_size = 3 cell = MultiInputLSTMCell(input_size=input_size, hidden_size=hidden_size, bias=True) with torch.no_grad(): weight_ih_value = list() for i in range(input_size): weight_ih_value.append([ j * 0.37 for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3) ]) cell.weight_ih.copy_(torch.tensor(weight_ih_value, dtype=torch.float)) alpha_weight_ih_value = list() for i in range(input_size): alpha_weight_ih_value.append([ j * 0.23 for j in range(i * hidden_size, (i + 1) * hidden_size) ]) cell.alpha_weight_ih.copy_( torch.tensor(alpha_weight_ih_value, dtype=torch.float)) torch.nn.init.constant(cell.bias, val=1.0) torch.nn.init.constant(cell.alpha_bias, val=0.5) char_input = torch.tensor([[0.2, 0.4]], dtype=torch.float) h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float) c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float) word_c_input = [ torch.tensor([[0.7, 0.5, 0.2]], dtype=torch.float), torch.tensor([[0.3, 0.4, 1.5]], dtype=torch.float) ] output_hc = cell(input_=char_input, c_input=word_c_input, hx=(h, c)) expect_size = (1, hidden_size) ASSERT.assertEqual(expect_size, output_hc[0].size()) ASSERT.assertEqual(expect_size, output_hc[1].size()) expects = [[0.5728, 0.5523, 0.7130], [0.6873, 0.6506, 0.9345]] for expect, hc in zip(expects, output_hc): for e_i, hc_i in zip(expect, hc[0].tolist()): ASSERT.assertAlmostEqual(e_i, hc_i, places=4)
def test_label_f1_metric(): """ 测试 label f1 metric """ predictions = torch.tensor([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) gold_labels = torch.tensor([0, 1, 1, 2, 2, 3, 3, 4, 4, 1]) labels = [0, 1, 2, 3, 4] f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None) metrics = f1_metric(prediction_labels=predictions, gold_labels=gold_labels, mask=None) logging.debug(json2str(metrics)) ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics)) precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"] recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"] f1_0 = metrics[f"{LabelF1Metric.F1}-0"] expect_precision_0 = 1. / 2. ASSERT.assertAlmostEqual(expect_precision_0, precision_0) expect_recall_0 = 1. / 1. ASSERT.assertAlmostEqual(expect_recall_0, recall_0) expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / ( expect_precision_0 + expect_recall_0) ASSERT.assertAlmostEqual(expect_f1_0, f1_0) expect_precision_overall = 5. / 10. expect_recall_overall = 5. / 10 precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL] recall_overall = metrics[LabelF1Metric.RECALL_OVERALL] ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall) ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall) predictions = torch.tensor([0, 2]) gold_labels = torch.tensor([0, 1]) f1_metric(prediction_labels=predictions, gold_labels=gold_labels, mask=None) precision_0 = f1_metric.metric[f"{LabelF1Metric.PRECISION}-0"] recall_0 = f1_metric.metric[f"{LabelF1Metric.RECALL}-0"] f1_0 = f1_metric.metric[f"{LabelF1Metric.F1}-0"] expect_precision_0 = (1. + 1.) / (2. + 1.) ASSERT.assertAlmostEqual(expect_precision_0, precision_0) expect_recall_0 = (1. + 1.) / (1. + 1.) ASSERT.assertAlmostEqual(expect_recall_0, recall_0) expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / ( expect_precision_0 + expect_recall_0) ASSERT.assertAlmostEqual(expect_f1_0, f1_0)
def test_sequence_max_label_index_decoder(): label_vocabulary = LabelVocabulary( [["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = label_vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = label_vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = label_vocabulary.index("O") ASSERT.assertEqual(2, o_index) # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]] batch_sequence_logits = torch.tensor( [[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]], dtype=torch.float) expect_sequence_labels = [["O", "B-T", "I-T"], ["B-T", "B-T", "I-T"], ["B-T", "I-T", "I-T"], ["B-T", "I-T", "O"]] expect = list() for expect_sequence_label in expect_sequence_labels: expect.append( [label_vocabulary.index(label) for label in expect_sequence_label]) decoder = SequenceMaxLabelIndexDecoder(label_vocabulary=label_vocabulary) label_indices = decoder(logits=batch_sequence_logits, mask=None) ASSERT.assertEqual(expect, label_indices.tolist())
def test_cnn_seq2vec_output_dim(): """ 测试 cnn 输出维度 :return: """ kernel_size = (1, 2, 3, 4, 5) encoder = CnnSeq2Vec(embedding_dim=7, num_filters=13, kernel_sizes=kernel_size) tokens = torch.rand(4, 8, 7) vector = encoder(sequence=tokens, mask=None) expect = (4, 13 * len(kernel_size)) ASSERT.assertEqual(expect, vector.size())
def event_type_vocabulary(): event_types = [["A", "B", "C"], ["A", "B"], ["A"]] vocabulary = Vocabulary(tokens=event_types, padding="", unk="Negative", special_first=True) ASSERT.assertEqual(4, vocabulary.size) ASSERT.assertEqual(0, vocabulary.index(vocabulary.unk)) ASSERT.assertEqual(1, vocabulary.index("A")) ASSERT.assertEqual(2, vocabulary.index("B")) ASSERT.assertEqual(3, vocabulary.index("C")) return vocabulary
def test_component_evaluate_factory(): Registry().clear_objects() config_json_file_path = "data/easytext/tests/component/training.json" config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path) with open(config_json_file_path, encoding="utf-8") as f: param_dict = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=False) parsed_dict = factory.create(config=param_dict) my_component = parsed_dict["my_component"] ASSERT.assertEqual("evaluate_3", my_component.value)
def test_glove_loader(): pretrained_file_path = "data/easytext/tests/pretrained/word_embedding_sample.3d.txt" pretrained_file_path = os.path.join(ROOT_PATH, pretrained_file_path) glove_loader = GloveLoader(embedding_dim=3, pretrained_file_path=pretrained_file_path) embedding_dict = glove_loader.load() expect_embedding_dict = { "a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0], "美丽": [7.0, 8.0, 9.0] } ASSERT.assertDictEqual(expect_embedding_dict, embedding_dict) ASSERT.assertEqual(glove_loader.embedding_dim, 3)
def test_label_vocabulary(): """ 测试 label vocabulary :return: """ vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]], padding="") ASSERT.assertEqual(vocabulary.size, 5) vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]], padding=LabelVocabulary.PADDING) ASSERT.assertEqual(vocabulary.size, 6) ASSERT.assertEqual(vocabulary.label_size, 5) ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 5) for index, w in enumerate(["A", "B", "C", "D", "E"]): ASSERT.assertEqual(vocabulary.index(w), index)
def test_component_factory(): Registry().clear_objects() model_json_file_path = "data/easytext/tests/component/model.json" model_json_file_path = os.path.join(ROOT_PATH, model_json_file_path) with open(model_json_file_path, encoding="utf-8") as f: config = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=True) parserd_dict = factory.create(config=config) model = parserd_dict["model"] ASSERT.assertTrue(model.linear is not None) ASSERT.assertEqual((2, 4), (model.linear.in_features, model.linear.out_features))
def test_default_typename(): """ 测试,当 component 构建的时候,某个参数是 object :return: """ Registry().clear_objects() config_json_file_path = "data/easytext/tests/component/default_typename.json" config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path) with open(config_json_file_path, encoding="utf-8") as f: param_dict = json.load(f, object_pairs_hook=OrderedDict) factory = ComponentFactory(is_training=False) parsed_dict = factory.create(config=param_dict) default_typename = parsed_dict["default_typename"] ASSERT.assertEqual(10, default_typename.value)
def test_vocabulary_speical_first(): """ 测试 vocabulary speical first :return: """ batch_tokens = [["我", "和", "你"], ["在", "我"]] vocabulary = Vocabulary(batch_tokens, padding=Vocabulary.PADDING, unk=Vocabulary.UNK, special_first=True, min_frequency=1, max_size=None) ASSERT.assertEqual(vocabulary.size, 6) ASSERT.assertEqual(vocabulary.padding, vocabulary.PADDING) ASSERT.assertEqual(vocabulary.unk, vocabulary.UNK) ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 0) ASSERT.assertEqual(vocabulary.index(vocabulary.unk), 1)
def test_fill(): """ 测试 bio :return: """ pairs = [(1, 2), (2, 4)] for begin, end in pairs: sl = ["O"] * 10 tag = "Test" BIO.fill(sequence_label=sl, begin_index=begin, end_index=end, tag=tag) for i in range(begin, end): if i == begin: ASSERT.assertEqual(sl[i], f"B-{tag}") else: ASSERT.assertEqual(sl[i], f"I-{tag}")
def test_gat_without_hidden(): """ 测试 gat :return: """ torch.manual_seed(7) torch.cuda.manual_seed_all(7) in_features = 2 out_features = 4 gat = GAT(in_features=in_features, out_features=out_features, dropout=0., alpha=0.1, num_heads=3, hidden_size=None) nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]], dtype=torch.float) adj = torch.tensor( [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]], dtype=torch.long) output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj) expect_size = (nodes.size(0), nodes.size(1), out_features) ASSERT.assertEqual(expect_size, output_nodes.size()) expect = torch.tensor([[[-1.6478, -0.3935, -2.6613, -2.7653], [-1.3204, -0.8394, -1.8519, -1.9375], [-1.6478, -0.3935, -2.6613, -2.7653]], [[-1.9897, -0.4203, -2.4447, -2.1232], [-2.1944, -0.1897, -3.4053, -3.5697], [-2.9364, -0.0878, -4.1695, -4.1617]]], dtype=torch.float) ASSERT.assertTrue( tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
def test_gat_with_hidden(): """ 测试 gat :return: """ torch.manual_seed(7) torch.cuda.manual_seed_all(7) in_features = 2 out_features = 4 gat = GAT(in_features=in_features, out_features=out_features, dropout=0., alpha=0.1, num_heads=3, hidden_size=3) nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]], dtype=torch.float) adj = torch.tensor( [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]], dtype=torch.long) output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj) expect_size = (nodes.size(0), nodes.size(1), out_features) ASSERT.assertEqual(expect_size, output_nodes.size()) expect = torch.tensor([[[-1.3835, -1.4764, -1.2033, -1.5113], [-1.3316, -1.5785, -1.1564, -1.5368], [-1.3475, -1.5467, -1.1706, -1.5279]], [[-1.3388, -1.6693, -1.4427, -1.1610], [-1.4288, -1.6525, -1.6607, -0.9707], [-1.4320, -1.4422, -1.6465, -1.1025]]]) ASSERT.assertTrue( tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
def test_cnn_seq2vec(): """ 测试 cnn seq2vec :return: """ encoder = CnnSeq2Vec(embedding_dim=2, num_filters=1, kernel_sizes=(1, 2)) for name, parameter in encoder.named_parameters(): parameter.data.fill_(1.) tokens = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]]) vector = encoder(sequence=tokens, mask=None) vector = vector.view(-1).tolist() expect = torch.tensor([[0.1 + 1.5 + 1., 0.7 + 0.8 + 0.1 + 1.5 + 1.]]).view(-1).tolist() ASSERT.assertEqual(len(expect), len(vector)) for i in range(len(vector)): ASSERT.assertAlmostEqual(expect[i], vector[i])
def __call__(self, instances: Iterable[Instance]) -> ModelInputs: x = list() labels = list() for instance in instances: x_data = instance["x"] x.append(torch.tensor([x_data], dtype=torch.float)) if x_data - 50 > 0: labels.append(1) else: labels.append(0) x = torch.stack(x) batch_size = x.size(0) ASSERT.assertEqual(x.dim(), 2) ASSERT.assertListEqual([batch_size, 1], [x.size(0), x.size(1)]) labels = torch.tensor(labels) ASSERT.assertEqual(labels.dim(), 1) ASSERT.assertEqual(batch_size, labels.size(0)) model_inputs = ModelInputs(batch_size=batch_size, model_inputs={"x": x}, labels=labels) return model_inputs
def test_decode(): """ 测试 模型输出的 batch logits 解码 :return: """ # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]] batch_sequence_logits = torch.tensor([[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]], [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]], dtype=torch.float) expect = [[{"label": "T", "begin": 1, "end": 3}], [{"label": "T", "begin": 0, "end": 1}, {"label": "T", "begin": 1, "end": 3}], [{"label": "T", "begin": 0, "end": 3}], [{"label": "T", "begin": 0, "end": 2}]] vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) spans = BIO.decode(batch_sequence_logits=batch_sequence_logits, mask=None, vocabulary=vocabulary) ASSERT.assertListEqual(expect, spans)
def test_decode_decode_label_index_to_span(): """ 测试解码 golden label index :return: """ vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) golden_labels = torch.tensor([[0, 1, 2, 0], [2, 0, 1, 1]]) expect = [[{"label": "T", "begin": 0, "end": 2}, {"label": "T", "begin": 3, "end": 4}], [{"label": "T", "begin": 1, "end": 4}]] spans = BIO.decode_label_index_to_span(batch_sequence_label_index=golden_labels, mask=None, vocabulary=vocabulary) ASSERT.assertListEqual(expect, spans)
def test_label_f1_metric_with_mask(): """ 测试 label f1 metric """ predictions = torch.tensor([0, 1, 2, 3]) gold_labels = torch.tensor([0, 0, 0, 2]) mask = torch.tensor([1, 1, 1, 0], dtype=torch.long) labels = [0, 1, 2, 3] f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None) metrics = f1_metric(prediction_labels=predictions, gold_labels=gold_labels, mask=mask) logging.debug(json2str(metrics)) ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics)) precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"] recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"] f1_0 = metrics[f"{LabelF1Metric.F1}-0"] expect_precision_0 = 1. / 1. ASSERT.assertAlmostEqual(expect_precision_0, precision_0) expect_recall_0 = 1. / 3. ASSERT.assertAlmostEqual(expect_recall_0, recall_0) expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / ( expect_precision_0 + expect_recall_0) ASSERT.assertAlmostEqual(expect_f1_0, f1_0) expect_precision_overall = 1. / 3. expect_recall_overall = 1. / 3. precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL] recall_overall = metrics[LabelF1Metric.RECALL_OVERALL] ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall) ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall)
def test_attention_seq2vec_no_mask(inputs): """ 测试 attention seq2vec :return: """ sequence, mask = inputs encoder = AttentionSeq2Vec(input_size=2, query_hidden_size=3, value_hidden_size=None) encoder.wk.weight = Parameter(FloatTensor([ [0.1, 0.2], [0.3, 0.4], [0.5, 0.6] ])) encoder.wk.bias = Parameter(FloatTensor([0.2, 0.4, 0.6])) encoder.attention.weight = Parameter(FloatTensor( [ [0.6, 0.2, 7] ] )) vec = encoder(sequence=sequence, mask=None) print(vec) ASSERT.assertEqual((2, 2), vec.size()) expect = torch.tensor([[4.8455, 5.2867], [5.7232, 3.6037]]) vec1d = vec.view(-1).tolist() expect1d = expect.view(-1).tolist() for expect_data, vec_data in zip(expect1d, vec1d): ASSERT.assertAlmostEqual(expect_data, vec_data, delta=1e-4)
def test_graph_attention_layer(): torch.manual_seed(7) torch.cuda.manual_seed_all(7) in_features = 2 out_features = 4 gat_layer = GraphAttentionLayer(in_features=in_features, out_features=out_features, dropout=0.0, alpha=0.1) nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]], dtype=torch.float) adj = torch.tensor( [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]], dtype=torch.long) outputs: torch.Tensor = gat_layer(input=nodes, adj=adj) expect_size = (nodes.size(0), nodes.size(1), out_features) ASSERT.assertEqual(expect_size, outputs.size()) # 下面的 expect 是从原论文中测试得到的结果,直接拿来用 expect = torch.tensor([[[0.2831, 0.3588, -0.5131, -0.2058], [0.1606, 0.1292, -0.2264, -0.0951], [0.2831, 0.3588, -0.5131, -0.2058]], [[-0.0748, 0.5025, -0.3840, -0.1192], [0.2959, 0.4624, -0.6123, -0.2405], [0.1505, 0.8668, -0.8609, -0.3059]]], dtype=torch.float) ASSERT.assertTrue( tensor_util.is_tensor_equal(expect, outputs, epsilon=1e-4))
def test_word_lstm_cell_with_bias(): """ 测试 WordLSTMCell :return: """ input_size = 2 hidden_size = 3 word_lstm_cell = WordLSTMCell(input_size=input_size, hidden_size=hidden_size, bias=True) value = list() for i in range(input_size): value.append([ j * 0.37 for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3) ]) with torch.no_grad(): word_lstm_cell.weight_ih.copy_(torch.tensor(value, dtype=torch.float)) torch.nn.init.constant(word_lstm_cell.bias, val=1.0) word_input = torch.tensor([[0.2, 0.4]], dtype=torch.float) h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float) c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float) output_c = word_lstm_cell(input_=word_input, hx=(h, c)) expect_size = (1, hidden_size) ASSERT.assertEqual(expect_size, output_c.size()) expect_output_c = [1.4231, 1.5257, 1.6372] for e_i, i in zip(expect_output_c, output_c[0].tolist()): ASSERT.assertAlmostEqual(e_i, i, places=3)
def test_decode_one_sequence_logits_to_label(): """ 测试 decode sequence label :return: """ sequence_logits_list = list() expect_list = list() sequence_logits = torch.tensor([[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]], dtype=torch.float) # O B I 正常 expect = ["O", "B-T", "I-T"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.1]], dtype=torch.float) expect = ["B-T", "I-T", "I-T"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.9]], dtype=torch.float) expect = ["B-T", "I-T", "O"] sequence_logits_list.append(sequence_logits) expect_list.append(expect) vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]], padding=LabelVocabulary.PADDING) b_index = vocabulary.index("B-T") ASSERT.assertEqual(0, b_index) i_index = vocabulary.index("I-T") ASSERT.assertEqual(1, i_index) o_index = vocabulary.index("O") ASSERT.assertEqual(2, o_index) for sequence_logits, expect in zip(sequence_logits_list, expect_list): sequence_label, sequence_label_indices = BIO.decode_one_sequence_logits_to_label( sequence_logits=sequence_logits, vocabulary=vocabulary) ASSERT.assertListEqual(sequence_label, expect) expect_indices = [vocabulary.index(label) for label in expect] ASSERT.assertListEqual(sequence_label_indices, expect_indices)
def test_vocabulary(): """ :return: """ batch_tokens = [["我", "和", "你"], ["在", "我"]] vocabulary = Vocabulary(batch_tokens, padding="", unk="", special_first=True, min_frequency=1, max_size=None) ASSERT.assertEqual(vocabulary.size, 4) ASSERT.assertTrue(not vocabulary.padding) ASSERT.assertTrue(not vocabulary.unk) ASSERT.assertEqual(vocabulary.index("我"), 0) ASSERT.assertEqual(vocabulary.index("和"), 1)