Exemplo n.º 1
0
def test_metric_tracker_best():
    """
    测试 metric tracker
    :return:
    """
    metric_tracker = MetricTracker(patient=None)

    for metric in METRICS:
        metric_tracker.add_metric(**metric)

    expect = {"epoch": 3,
              "train_metric": {"acc": 0.85},
              "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85),
              "validation_metric": {"acc": 0.60},
              "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)}

    best = metric_tracker.best()
    ASSERT.assertEqual(expect["epoch"], best.epoch)

    ASSERT.assertDictEqual(expect["train_metric"], best.train_metric)
    ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric)
    ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name)
    ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value)
    ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name)
    ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
Exemplo n.º 2
0
def test_dynamic_rnn(sequence_embedding):
    sequence, mask = sequence_embedding

    hidden_size = 4
    batch_size = 3
    sequence_len = 3

    rnn = RNN(input_size=2,
              hidden_size=4,
              num_layers=2,
              batch_first=True,
              bidirectional=True)

    dynamic_rnn = DynamicRnn(rnn=rnn)

    rnn_output: DynamicRnnOutput = dynamic_rnn(sequence=sequence, mask=mask)

    logging.info(json2str(rnn_output))

    last_layer_h_n: torch.Tensor = rnn_output.last_layer_h_n

    last_layer_h_n_expect_size = (batch_size, hidden_size * 2)

    ASSERT.assertEqual(last_layer_h_n_expect_size, last_layer_h_n.size())

    ASSERT.assertTrue(rnn_output.last_layer_c_n is None)

    sequence_encoding_expect_size = (batch_size, sequence_len, hidden_size * 2)
    senquence_encoding = rnn_output.output
    ASSERT.assertEqual(sequence_encoding_expect_size,
                       senquence_encoding.size())
Exemplo n.º 3
0
def test_metric_tracker_save_and_load():
    metric_tracker = MetricTracker(patient=1)

    for metric in METRICS:
        metric_tracker.add_metric(**metric)

        if metric["epoch"] > 4:
            ASSERT.assertTrue(metric_tracker.early_stopping(metric["epoch"]))
        else:
            ASSERT.assertFalse(metric_tracker.early_stopping(metric["epoch"]))

        if metric_tracker.early_stopping(metric["epoch"]):
            break

    saved_file_path = os.path.join(ROOT_PATH, "data/easytext/tests/trainer/metric_tracker.json")

    metric_tracker.save(saved_file_path)

    loaded_metric_tracker = MetricTracker.from_file(saved_file_path)

    best = metric_tracker.best()
    loaded_best = loaded_metric_tracker.best()
    ASSERT.assertEqual(best.epoch, loaded_best.epoch)

    ASSERT.assertDictEqual(best.train_metric, loaded_best.train_metric)
    ASSERT.assertDictEqual(best.validation_metric, loaded_best.validation_metric)
    ASSERT.assertEqual(best.train_model_target_metric.name, loaded_best.train_model_target_metric.name)
    ASSERT.assertEqual(best.train_model_target_metric.value, loaded_best.train_model_target_metric.value)
    ASSERT.assertEqual(best.validation_model_target_metric.name, loaded_best.validation_model_target_metric.name)
    ASSERT.assertEqual(best.validation_model_target_metric.value, loaded_best.validation_model_target_metric.value)
def test_pretrained_vocabulary(pretrained_vocabulary):
    """
    测试预训练词汇表
    """

    ASSERT.assertEqual(4, pretrained_vocabulary.size)
    ASSERT.assertEqual(4, len(pretrained_vocabulary))
    ASSERT.assertEqual(2, pretrained_vocabulary.index("我"))
    ASSERT.assertEqual(3, pretrained_vocabulary.index("美丽"))

    ASSERT.assertEqual((pretrained_vocabulary.size, 3),
                       pretrained_vocabulary.embedding_matrix.size())

    expect_embedding_dict = {
        "a": [1.0, 2.0, 3.0],
        "b": [4.0, 5.0, 6.0],
        "美丽": [7.0, 8.0, 9.0]
    }

    ASSERT.assertListEqual(
        expect_embedding_dict["美丽"], pretrained_vocabulary.embedding_matrix[
            pretrained_vocabulary.index("美丽")].tolist())

    zero_vec = [0.] * 3

    for index in [
            pretrained_vocabulary.index("我"),
            pretrained_vocabulary.padding_index,
            pretrained_vocabulary.index(pretrained_vocabulary.unk)
    ]:
        ASSERT.assertListEqual(
            zero_vec, pretrained_vocabulary.embedding_matrix[index].tolist())
Exemplo n.º 5
0
def test_metric_tracker_patient():
    metric_tracker = MetricTracker(patient=1)

    for metric in METRICS:
        metric_tracker.add_metric(**metric)

        if metric["epoch"] > 4:
            ASSERT.assertTrue(metric_tracker.early_stopping(metric["epoch"]))
        else:
            ASSERT.assertFalse(metric_tracker.early_stopping(metric["epoch"]))

        if metric_tracker.early_stopping(metric["epoch"]):
            break

    expect = {"epoch": 3,
              "train_metric": {"acc": 0.85},
              "train_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.85),
              "validation_metric": {"acc": 0.60},
              "validation_model_target_metric": ModelTargetMetric(metric_name="acc", metric_value=0.60)}

    best = metric_tracker.best()
    ASSERT.assertEqual(expect["epoch"], best.epoch)

    ASSERT.assertDictEqual(expect["train_metric"], best.train_metric)
    ASSERT.assertDictEqual(expect["validation_metric"], best.validation_metric)
    ASSERT.assertEqual(expect["train_model_target_metric"].name, best.train_model_target_metric.name)
    ASSERT.assertEqual(expect["train_model_target_metric"].value, best.train_model_target_metric.value)
    ASSERT.assertEqual(expect["validation_model_target_metric"].name, best.validation_model_target_metric.name)
    ASSERT.assertEqual(expect["validation_model_target_metric"].value, best.validation_model_target_metric.value)
Exemplo n.º 6
0
def test_masked_softmax():
    """
    测试 masked softmax
    :return:
    """

    vector = torch.FloatTensor([[1., 2., 3.], [4., 5., 6.]])

    mask = torch.ByteTensor([[1, 1, 0], [1, 1, 1]])

    result = masked_softmax(vector=vector, mask=mask)

    expect1 = np.exp(np.array([1., 2.]))

    expect1 = expect1 / np.sum(expect1)
    expect1 = np.concatenate([expect1, np.array([0.])], axis=-1).tolist()

    result1 = result[0].tolist()

    ASSERT.assertEqual(len(expect1), len(result1))

    for expect_data, result_data in zip(expect1, result1):
        ASSERT.assertAlmostEqual(expect_data, result_data)

    expect2 = np.exp(np.array([4., 5., 6.]))
    expect2 = expect2 / np.sum(expect2)
    expect2 = expect2.tolist()

    result2 = result[1].tolist()

    ASSERT.assertEqual(len(expect2), len(result2))

    for expect_data, result_data in zip(expect2, result2):
        ASSERT.assertAlmostEqual(expect_data, result_data)
Exemplo n.º 7
0
def test_multi_input_lstm_cell():
    """
    测试 MultiInputLSTMCell
    """

    input_size = 2
    hidden_size = 3

    cell = MultiInputLSTMCell(input_size=input_size,
                              hidden_size=hidden_size,
                              bias=True)

    with torch.no_grad():
        weight_ih_value = list()

        for i in range(input_size):
            weight_ih_value.append([
                j * 0.37
                for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3)
            ])

        cell.weight_ih.copy_(torch.tensor(weight_ih_value, dtype=torch.float))

        alpha_weight_ih_value = list()

        for i in range(input_size):
            alpha_weight_ih_value.append([
                j * 0.23 for j in range(i * hidden_size, (i + 1) * hidden_size)
            ])

        cell.alpha_weight_ih.copy_(
            torch.tensor(alpha_weight_ih_value, dtype=torch.float))

        torch.nn.init.constant(cell.bias, val=1.0)
        torch.nn.init.constant(cell.alpha_bias, val=0.5)

    char_input = torch.tensor([[0.2, 0.4]], dtype=torch.float)

    h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float)
    c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float)

    word_c_input = [
        torch.tensor([[0.7, 0.5, 0.2]], dtype=torch.float),
        torch.tensor([[0.3, 0.4, 1.5]], dtype=torch.float)
    ]

    output_hc = cell(input_=char_input, c_input=word_c_input, hx=(h, c))

    expect_size = (1, hidden_size)

    ASSERT.assertEqual(expect_size, output_hc[0].size())
    ASSERT.assertEqual(expect_size, output_hc[1].size())

    expects = [[0.5728, 0.5523, 0.7130], [0.6873, 0.6506, 0.9345]]

    for expect, hc in zip(expects, output_hc):

        for e_i, hc_i in zip(expect, hc[0].tolist()):
            ASSERT.assertAlmostEqual(e_i, hc_i, places=4)
Exemplo n.º 8
0
def test_label_f1_metric():
    """
    测试 label f1 metric
    """

    predictions = torch.tensor([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
    gold_labels = torch.tensor([0, 1, 1, 2, 2, 3, 3, 4, 4, 1])

    labels = [0, 1, 2, 3, 4]
    f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None)

    metrics = f1_metric(prediction_labels=predictions,
                        gold_labels=gold_labels,
                        mask=None)

    logging.debug(json2str(metrics))

    ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics))

    precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = metrics[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = 1. / 2.
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = 1. / 1.
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)

    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)

    expect_precision_overall = 5. / 10.
    expect_recall_overall = 5. / 10
    precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL]
    recall_overall = metrics[LabelF1Metric.RECALL_OVERALL]

    ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall)
    ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall)

    predictions = torch.tensor([0, 2])
    gold_labels = torch.tensor([0, 1])

    f1_metric(prediction_labels=predictions,
              gold_labels=gold_labels,
              mask=None)

    precision_0 = f1_metric.metric[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = f1_metric.metric[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = f1_metric.metric[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = (1. + 1.) / (2. + 1.)
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = (1. + 1.) / (1. + 1.)
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)
    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)
Exemplo n.º 9
0
def test_sequence_max_label_index_decoder():
    label_vocabulary = LabelVocabulary(
        [["B-T", "B-T", "B-T", "I-T", "I-T", "O"]],
        padding=LabelVocabulary.PADDING)

    b_index = label_vocabulary.index("B-T")
    ASSERT.assertEqual(0, b_index)
    i_index = label_vocabulary.index("I-T")
    ASSERT.assertEqual(1, i_index)
    o_index = label_vocabulary.index("O")
    ASSERT.assertEqual(2, o_index)

    # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]]
    batch_sequence_logits = torch.tensor(
        [[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
         [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
         [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]],
         [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]],
        dtype=torch.float)

    expect_sequence_labels = [["O", "B-T", "I-T"], ["B-T", "B-T", "I-T"],
                              ["B-T", "I-T", "I-T"], ["B-T", "I-T", "O"]]

    expect = list()

    for expect_sequence_label in expect_sequence_labels:
        expect.append(
            [label_vocabulary.index(label) for label in expect_sequence_label])

    decoder = SequenceMaxLabelIndexDecoder(label_vocabulary=label_vocabulary)

    label_indices = decoder(logits=batch_sequence_logits, mask=None)

    ASSERT.assertEqual(expect, label_indices.tolist())
Exemplo n.º 10
0
def test_cnn_seq2vec_output_dim():
    """
    测试 cnn 输出维度
    :return:
    """
    kernel_size = (1, 2, 3, 4, 5)
    encoder = CnnSeq2Vec(embedding_dim=7,
                         num_filters=13,
                         kernel_sizes=kernel_size)

    tokens = torch.rand(4, 8, 7)
    vector = encoder(sequence=tokens, mask=None)
    expect = (4, 13 * len(kernel_size))

    ASSERT.assertEqual(expect, vector.size())
Exemplo n.º 11
0
def event_type_vocabulary():
    event_types = [["A", "B", "C"], ["A", "B"], ["A"]]

    vocabulary = Vocabulary(tokens=event_types,
                            padding="",
                            unk="Negative",
                            special_first=True)

    ASSERT.assertEqual(4, vocabulary.size)
    ASSERT.assertEqual(0, vocabulary.index(vocabulary.unk))
    ASSERT.assertEqual(1, vocabulary.index("A"))
    ASSERT.assertEqual(2, vocabulary.index("B"))
    ASSERT.assertEqual(3, vocabulary.index("C"))

    return vocabulary
Exemplo n.º 12
0
def test_component_evaluate_factory():
    Registry().clear_objects()

    config_json_file_path = "data/easytext/tests/component/training.json"
    config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path)
    with open(config_json_file_path, encoding="utf-8") as f:

        param_dict = json.load(f, object_pairs_hook=OrderedDict)

    factory = ComponentFactory(is_training=False)

    parsed_dict = factory.create(config=param_dict)

    my_component = parsed_dict["my_component"]

    ASSERT.assertEqual("evaluate_3", my_component.value)
Exemplo n.º 13
0
def test_glove_loader():
    pretrained_file_path = "data/easytext/tests/pretrained/word_embedding_sample.3d.txt"
    pretrained_file_path = os.path.join(ROOT_PATH, pretrained_file_path)

    glove_loader = GloveLoader(embedding_dim=3,
                               pretrained_file_path=pretrained_file_path)

    embedding_dict = glove_loader.load()
    expect_embedding_dict = {
        "a": [1.0, 2.0, 3.0],
        "b": [4.0, 5.0, 6.0],
        "美丽": [7.0, 8.0, 9.0]
    }

    ASSERT.assertDictEqual(expect_embedding_dict, embedding_dict)
    ASSERT.assertEqual(glove_loader.embedding_dim, 3)
Exemplo n.º 14
0
def test_label_vocabulary():
    """
    测试 label vocabulary
    :return:
    """
    vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]], padding="")
    ASSERT.assertEqual(vocabulary.size, 5)

    vocabulary = LabelVocabulary([["A", "B", "C"], ["D", "E"]],
                                 padding=LabelVocabulary.PADDING)
    ASSERT.assertEqual(vocabulary.size, 6)
    ASSERT.assertEqual(vocabulary.label_size, 5)

    ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 5)

    for index, w in enumerate(["A", "B", "C", "D", "E"]):
        ASSERT.assertEqual(vocabulary.index(w), index)
Exemplo n.º 15
0
def test_component_factory():
    Registry().clear_objects()

    model_json_file_path = "data/easytext/tests/component/model.json"
    model_json_file_path = os.path.join(ROOT_PATH, model_json_file_path)
    with open(model_json_file_path, encoding="utf-8") as f:
        config = json.load(f, object_pairs_hook=OrderedDict)

    factory = ComponentFactory(is_training=True)

    parserd_dict = factory.create(config=config)

    model = parserd_dict["model"]

    ASSERT.assertTrue(model.linear is not None)
    ASSERT.assertEqual((2, 4),
                       (model.linear.in_features, model.linear.out_features))
Exemplo n.º 16
0
def test_default_typename():
    """
    测试,当 component 构建的时候,某个参数是 object
    :return:
    """
    Registry().clear_objects()
    config_json_file_path = "data/easytext/tests/component/default_typename.json"
    config_json_file_path = os.path.join(ROOT_PATH, config_json_file_path)
    with open(config_json_file_path, encoding="utf-8") as f:
        param_dict = json.load(f, object_pairs_hook=OrderedDict)

    factory = ComponentFactory(is_training=False)

    parsed_dict = factory.create(config=param_dict)

    default_typename = parsed_dict["default_typename"]

    ASSERT.assertEqual(10, default_typename.value)
Exemplo n.º 17
0
def test_vocabulary_speical_first():
    """
    测试 vocabulary speical first
    :return:
    """
    batch_tokens = [["我", "和", "你"], ["在", "我"]]
    vocabulary = Vocabulary(batch_tokens,
                            padding=Vocabulary.PADDING,
                            unk=Vocabulary.UNK,
                            special_first=True,
                            min_frequency=1,
                            max_size=None)

    ASSERT.assertEqual(vocabulary.size, 6)

    ASSERT.assertEqual(vocabulary.padding, vocabulary.PADDING)
    ASSERT.assertEqual(vocabulary.unk, vocabulary.UNK)
    ASSERT.assertEqual(vocabulary.index(vocabulary.padding), 0)
    ASSERT.assertEqual(vocabulary.index(vocabulary.unk), 1)
Exemplo n.º 18
0
def test_fill():
    """
    测试 bio
    :return:
    """

    pairs = [(1, 2), (2, 4)]

    for begin, end in pairs:
        sl = ["O"] * 10
        tag = "Test"
        BIO.fill(sequence_label=sl, begin_index=begin, end_index=end, tag=tag)

        for i in range(begin, end):

            if i == begin:
                ASSERT.assertEqual(sl[i], f"B-{tag}")
            else:
                ASSERT.assertEqual(sl[i], f"I-{tag}")
Exemplo n.º 19
0
def test_gat_without_hidden():
    """
    测试 gat
    :return:
    """

    torch.manual_seed(7)
    torch.cuda.manual_seed_all(7)

    in_features = 2
    out_features = 4

    gat = GAT(in_features=in_features,
              out_features=out_features,
              dropout=0.,
              alpha=0.1,
              num_heads=3,
              hidden_size=None)

    nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]],
                          [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]],
                         dtype=torch.float)

    adj = torch.tensor(
        [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]],
        dtype=torch.long)

    output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj)

    expect_size = (nodes.size(0), nodes.size(1), out_features)
    ASSERT.assertEqual(expect_size, output_nodes.size())

    expect = torch.tensor([[[-1.6478, -0.3935, -2.6613, -2.7653],
                            [-1.3204, -0.8394, -1.8519, -1.9375],
                            [-1.6478, -0.3935, -2.6613, -2.7653]],
                           [[-1.9897, -0.4203, -2.4447, -2.1232],
                            [-2.1944, -0.1897, -3.4053, -3.5697],
                            [-2.9364, -0.0878, -4.1695, -4.1617]]],
                          dtype=torch.float)

    ASSERT.assertTrue(
        tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
Exemplo n.º 20
0
def test_gat_with_hidden():
    """
    测试 gat
    :return:
    """

    torch.manual_seed(7)
    torch.cuda.manual_seed_all(7)

    in_features = 2
    out_features = 4

    gat = GAT(in_features=in_features,
              out_features=out_features,
              dropout=0.,
              alpha=0.1,
              num_heads=3,
              hidden_size=3)

    nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]],
                          [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]],
                         dtype=torch.float)

    adj = torch.tensor(
        [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]],
        dtype=torch.long)

    output_nodes: torch.Tensor = gat(nodes=nodes, adj=adj)

    expect_size = (nodes.size(0), nodes.size(1), out_features)
    ASSERT.assertEqual(expect_size, output_nodes.size())

    expect = torch.tensor([[[-1.3835, -1.4764, -1.2033, -1.5113],
                            [-1.3316, -1.5785, -1.1564, -1.5368],
                            [-1.3475, -1.5467, -1.1706, -1.5279]],
                           [[-1.3388, -1.6693, -1.4427, -1.1610],
                            [-1.4288, -1.6525, -1.6607, -0.9707],
                            [-1.4320, -1.4422, -1.6465, -1.1025]]])

    ASSERT.assertTrue(
        tensor_util.is_tensor_equal(expect, output_nodes, epsilon=1e-4))
Exemplo n.º 21
0
def test_cnn_seq2vec():
    """
    测试 cnn seq2vec
    :return:
    """

    encoder = CnnSeq2Vec(embedding_dim=2, num_filters=1, kernel_sizes=(1, 2))

    for name, parameter in encoder.named_parameters():
        parameter.data.fill_(1.)

    tokens = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]])
    vector = encoder(sequence=tokens, mask=None)
    vector = vector.view(-1).tolist()

    expect = torch.tensor([[0.1 + 1.5 + 1.,
                            0.7 + 0.8 + 0.1 + 1.5 + 1.]]).view(-1).tolist()

    ASSERT.assertEqual(len(expect), len(vector))
    for i in range(len(vector)):
        ASSERT.assertAlmostEqual(expect[i], vector[i])
Exemplo n.º 22
0
    def __call__(self, instances: Iterable[Instance]) -> ModelInputs:

        x = list()
        labels = list()
        for instance in instances:

            x_data = instance["x"]
            x.append(torch.tensor([x_data], dtype=torch.float))

            if x_data - 50 > 0:
                labels.append(1)
            else:
                labels.append(0)

        x = torch.stack(x)

        batch_size = x.size(0)
        ASSERT.assertEqual(x.dim(), 2)
        ASSERT.assertListEqual([batch_size, 1], [x.size(0), x.size(1)])

        labels = torch.tensor(labels)
        ASSERT.assertEqual(labels.dim(), 1)
        ASSERT.assertEqual(batch_size, labels.size(0))

        model_inputs = ModelInputs(batch_size=batch_size,
                                   model_inputs={"x": x},
                                   labels=labels)

        return model_inputs
Exemplo n.º 23
0
def test_decode():
    """
    测试 模型输出的 batch logits 解码
    :return:
    """

    # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]]
    batch_sequence_logits = torch.tensor([[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]],
                                         dtype=torch.float)

    expect = [[{"label": "T", "begin": 1, "end": 3}],
              [{"label": "T", "begin": 0, "end": 1}, {"label": "T", "begin": 1, "end": 3}],
              [{"label": "T", "begin": 0, "end": 3}],
              [{"label": "T", "begin": 0, "end": 2}]]

    vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]],
                                 padding=LabelVocabulary.PADDING)

    b_index = vocabulary.index("B-T")
    ASSERT.assertEqual(0, b_index)
    i_index = vocabulary.index("I-T")
    ASSERT.assertEqual(1, i_index)
    o_index = vocabulary.index("O")
    ASSERT.assertEqual(2, o_index)

    spans = BIO.decode(batch_sequence_logits=batch_sequence_logits,
                       mask=None,
                       vocabulary=vocabulary)

    ASSERT.assertListEqual(expect, spans)
Exemplo n.º 24
0
def test_decode_decode_label_index_to_span():
    """
    测试解码 golden label index
    :return:
    """

    vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]],
                                 padding=LabelVocabulary.PADDING)

    b_index = vocabulary.index("B-T")
    ASSERT.assertEqual(0, b_index)
    i_index = vocabulary.index("I-T")
    ASSERT.assertEqual(1, i_index)
    o_index = vocabulary.index("O")
    ASSERT.assertEqual(2, o_index)

    golden_labels = torch.tensor([[0, 1, 2, 0],
                                  [2, 0, 1, 1]])

    expect = [[{"label": "T", "begin": 0, "end": 2}, {"label": "T", "begin": 3, "end": 4}],
              [{"label": "T", "begin": 1, "end": 4}]]

    spans = BIO.decode_label_index_to_span(batch_sequence_label_index=golden_labels,
                                           mask=None,
                                           vocabulary=vocabulary)

    ASSERT.assertListEqual(expect, spans)
Exemplo n.º 25
0
def test_label_f1_metric_with_mask():
    """
    测试 label f1 metric
    """

    predictions = torch.tensor([0, 1, 2, 3])
    gold_labels = torch.tensor([0, 0, 0, 2])
    mask = torch.tensor([1, 1, 1, 0], dtype=torch.long)

    labels = [0, 1, 2, 3]
    f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None)

    metrics = f1_metric(prediction_labels=predictions,
                        gold_labels=gold_labels,
                        mask=mask)

    logging.debug(json2str(metrics))

    ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics))

    precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = metrics[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = 1. / 1.
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = 1. / 3.
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)

    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)

    expect_precision_overall = 1. / 3.
    expect_recall_overall = 1. / 3.
    precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL]
    recall_overall = metrics[LabelF1Metric.RECALL_OVERALL]

    ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall)
    ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall)
Exemplo n.º 26
0
def test_attention_seq2vec_no_mask(inputs):
    """
    测试 attention seq2vec
    :return:
    """

    sequence, mask = inputs

    encoder = AttentionSeq2Vec(input_size=2,
                               query_hidden_size=3,
                               value_hidden_size=None)

    encoder.wk.weight = Parameter(FloatTensor([
        [0.1, 0.2],
        [0.3, 0.4],
        [0.5, 0.6]
    ]))
    encoder.wk.bias = Parameter(FloatTensor([0.2, 0.4, 0.6]))

    encoder.attention.weight = Parameter(FloatTensor(
        [
            [0.6, 0.2, 7]
        ]
    ))

    vec = encoder(sequence=sequence, mask=None)

    print(vec)

    ASSERT.assertEqual((2, 2), vec.size())

    expect = torch.tensor([[4.8455, 5.2867],
                           [5.7232, 3.6037]])

    vec1d = vec.view(-1).tolist()
    expect1d = expect.view(-1).tolist()

    for expect_data, vec_data in zip(expect1d, vec1d):
        ASSERT.assertAlmostEqual(expect_data, vec_data, delta=1e-4)
Exemplo n.º 27
0
def test_graph_attention_layer():
    torch.manual_seed(7)
    torch.cuda.manual_seed_all(7)

    in_features = 2
    out_features = 4

    gat_layer = GraphAttentionLayer(in_features=in_features,
                                    out_features=out_features,
                                    dropout=0.0,
                                    alpha=0.1)

    nodes = torch.tensor([[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]],
                          [[0.7, 0.8], [0.9, 0.10], [0.11, 0.12]]],
                         dtype=torch.float)

    adj = torch.tensor(
        [[[0, 1, 0], [1, 0, 0], [0, 0, 0]], [[0, 1, 1], [1, 0, 1], [1, 1, 0]]],
        dtype=torch.long)

    outputs: torch.Tensor = gat_layer(input=nodes, adj=adj)

    expect_size = (nodes.size(0), nodes.size(1), out_features)

    ASSERT.assertEqual(expect_size, outputs.size())

    # 下面的 expect 是从原论文中测试得到的结果,直接拿来用
    expect = torch.tensor([[[0.2831, 0.3588, -0.5131, -0.2058],
                            [0.1606, 0.1292, -0.2264, -0.0951],
                            [0.2831, 0.3588, -0.5131, -0.2058]],
                           [[-0.0748, 0.5025, -0.3840, -0.1192],
                            [0.2959, 0.4624, -0.6123, -0.2405],
                            [0.1505, 0.8668, -0.8609, -0.3059]]],
                          dtype=torch.float)

    ASSERT.assertTrue(
        tensor_util.is_tensor_equal(expect, outputs, epsilon=1e-4))
Exemplo n.º 28
0
def test_word_lstm_cell_with_bias():
    """
    测试 WordLSTMCell
    :return:
    """

    input_size = 2
    hidden_size = 3
    word_lstm_cell = WordLSTMCell(input_size=input_size,
                                  hidden_size=hidden_size,
                                  bias=True)

    value = list()

    for i in range(input_size):
        value.append([
            j * 0.37
            for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3)
        ])

    with torch.no_grad():
        word_lstm_cell.weight_ih.copy_(torch.tensor(value, dtype=torch.float))
        torch.nn.init.constant(word_lstm_cell.bias, val=1.0)

    word_input = torch.tensor([[0.2, 0.4]], dtype=torch.float)
    h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float)
    c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float)

    output_c = word_lstm_cell(input_=word_input, hx=(h, c))

    expect_size = (1, hidden_size)
    ASSERT.assertEqual(expect_size, output_c.size())

    expect_output_c = [1.4231, 1.5257, 1.6372]

    for e_i, i in zip(expect_output_c, output_c[0].tolist()):
        ASSERT.assertAlmostEqual(e_i, i, places=3)
Exemplo n.º 29
0
def test_decode_one_sequence_logits_to_label():
    """
    测试 decode sequence label
    :return:
    """

    sequence_logits_list = list()
    expect_list = list()

    sequence_logits = torch.tensor([[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
                                   dtype=torch.float)  # O B I 正常
    expect = ["O", "B-T", "I-T"]
    sequence_logits_list.append(sequence_logits)
    expect_list.append(expect)

    sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.1]],
                                   dtype=torch.float)
    expect = ["B-T", "I-T", "I-T"]

    sequence_logits_list.append(sequence_logits)
    expect_list.append(expect)

    sequence_logits = torch.tensor([[0.9, 0.3, 0.4], [0.2, 0.8, 0.3], [0.2, 0.3, 0.9]],
                                   dtype=torch.float)
    expect = ["B-T", "I-T", "O"]
    sequence_logits_list.append(sequence_logits)
    expect_list.append(expect)

    vocabulary = LabelVocabulary([["B-T", "B-T", "B-T", "I-T", "I-T", "O"]],
                                 padding=LabelVocabulary.PADDING)

    b_index = vocabulary.index("B-T")
    ASSERT.assertEqual(0, b_index)
    i_index = vocabulary.index("I-T")
    ASSERT.assertEqual(1, i_index)
    o_index = vocabulary.index("O")
    ASSERT.assertEqual(2, o_index)

    for sequence_logits, expect in zip(sequence_logits_list, expect_list):
        sequence_label, sequence_label_indices = BIO.decode_one_sequence_logits_to_label(
            sequence_logits=sequence_logits,
            vocabulary=vocabulary)

        ASSERT.assertListEqual(sequence_label, expect)
        expect_indices = [vocabulary.index(label) for label in expect]
        ASSERT.assertListEqual(sequence_label_indices, expect_indices)
Exemplo n.º 30
0
def test_vocabulary():
    """

    :return:
    """

    batch_tokens = [["我", "和", "你"], ["在", "我"]]
    vocabulary = Vocabulary(batch_tokens,
                            padding="",
                            unk="",
                            special_first=True,
                            min_frequency=1,
                            max_size=None)

    ASSERT.assertEqual(vocabulary.size, 4)

    ASSERT.assertTrue(not vocabulary.padding)
    ASSERT.assertTrue(not vocabulary.unk)

    ASSERT.assertEqual(vocabulary.index("我"), 0)
    ASSERT.assertEqual(vocabulary.index("和"), 1)