示例#1
0
def test_masked_softmax():
    """
    测试 masked softmax
    :return:
    """

    vector = torch.FloatTensor([[1., 2., 3.], [4., 5., 6.]])

    mask = torch.ByteTensor([[1, 1, 0], [1, 1, 1]])

    result = masked_softmax(vector=vector, mask=mask)

    expect1 = np.exp(np.array([1., 2.]))

    expect1 = expect1 / np.sum(expect1)
    expect1 = np.concatenate([expect1, np.array([0.])], axis=-1).tolist()

    result1 = result[0].tolist()

    ASSERT.assertEqual(len(expect1), len(result1))

    for expect_data, result_data in zip(expect1, result1):
        ASSERT.assertAlmostEqual(expect_data, result_data)

    expect2 = np.exp(np.array([4., 5., 6.]))
    expect2 = expect2 / np.sum(expect2)
    expect2 = expect2.tolist()

    result2 = result[1].tolist()

    ASSERT.assertEqual(len(expect2), len(result2))

    for expect_data, result_data in zip(expect2, result2):
        ASSERT.assertAlmostEqual(expect_data, result_data)
示例#2
0
def test_multi_input_lstm_cell():
    """
    测试 MultiInputLSTMCell
    """

    input_size = 2
    hidden_size = 3

    cell = MultiInputLSTMCell(input_size=input_size,
                              hidden_size=hidden_size,
                              bias=True)

    with torch.no_grad():
        weight_ih_value = list()

        for i in range(input_size):
            weight_ih_value.append([
                j * 0.37
                for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3)
            ])

        cell.weight_ih.copy_(torch.tensor(weight_ih_value, dtype=torch.float))

        alpha_weight_ih_value = list()

        for i in range(input_size):
            alpha_weight_ih_value.append([
                j * 0.23 for j in range(i * hidden_size, (i + 1) * hidden_size)
            ])

        cell.alpha_weight_ih.copy_(
            torch.tensor(alpha_weight_ih_value, dtype=torch.float))

        torch.nn.init.constant(cell.bias, val=1.0)
        torch.nn.init.constant(cell.alpha_bias, val=0.5)

    char_input = torch.tensor([[0.2, 0.4]], dtype=torch.float)

    h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float)
    c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float)

    word_c_input = [
        torch.tensor([[0.7, 0.5, 0.2]], dtype=torch.float),
        torch.tensor([[0.3, 0.4, 1.5]], dtype=torch.float)
    ]

    output_hc = cell(input_=char_input, c_input=word_c_input, hx=(h, c))

    expect_size = (1, hidden_size)

    ASSERT.assertEqual(expect_size, output_hc[0].size())
    ASSERT.assertEqual(expect_size, output_hc[1].size())

    expects = [[0.5728, 0.5523, 0.7130], [0.6873, 0.6506, 0.9345]]

    for expect, hc in zip(expects, output_hc):

        for e_i, hc_i in zip(expect, hc[0].tolist()):
            ASSERT.assertAlmostEqual(e_i, hc_i, places=4)
示例#3
0
def test_acc_metric():

    # 对应的label是 [1, 1, 0, 1]
    logits = torch.tensor([[1., 2.], [3., 4.], [5, 4.], [3., 7.]], dtype=torch.float)
    prediction_labels = torch.argmax(logits, dim=-1)

    golden_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long)

    acc_metric = AccMetric()

    expect = 1/4

    acc = acc_metric(prediction_labels=prediction_labels,
                     gold_labels=golden_labels,
                     mask=None)

    ASSERT.assertAlmostEqual(expect, acc[acc_metric.ACC])
    ASSERT.assertAlmostEqual(expect, acc_metric.metric[acc_metric.ACC])

    # 对应的label是 [0, 1, 0, 1]
    logits = torch.tensor([[3., 2.], [4., 6.], [5, 4.], [3., 7.]], dtype=torch.float)
    prediction_labels = torch.argmax(logits, dim=-1)

    golden_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long)

    acc = acc_metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=None)

    expect = 2 / 4
    ASSERT.assertAlmostEqual(expect, acc[acc_metric.ACC])

    # 下面的会计算将两次结果综合起来
    expect = (1+2)/(4+4)
    ASSERT.assertAlmostEqual(expect, acc_metric.metric[acc_metric.ACC])
示例#4
0
def test_span_f1_measure_with_mask():

    # [[O, B, I], [B, B, I], [B, I, I], [B, I, O]]
    batch_sequence_logits = torch.tensor([[[0.2, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.7, 0.2, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.1]],
                                          [[0.8, 0.3, 0.4], [0.1, 0.7, 0.3], [0.2, 0.3, 0.5]]],
                                         dtype=torch.float)

    batch_sequence_labels = [["O", "B-T", "I-T"],
                             ["B-T", "B-T", "I-T"],
                             ["B-T", "I-T", "I-T"],
                             ["B-T", "I-T", "O"]]
    sequence_label_indices = list()

    for sequence_label in batch_sequence_labels:
        sequence_label_indices.append([VOCAB.index(label) for label in sequence_label])

    sequence_label_indices = torch.tensor(sequence_label_indices, dtype=torch.long)

    gold = torch.tensor([
        [2, 0, 1],
        [0, 0, 1],
        [0, 1, 1],
        [0, 1, 2]
    ])

    f1 = SpanF1Metric(label_vocabulary=VOCAB)

    mask = torch.tensor([
        [1, 1, 0],
        [1, 1, 1],
        [1, 0, 0],
        [1, 1, 1]
    ], dtype=torch.long)

    f1(prediction_labels=sequence_label_indices, gold_labels=gold, mask=mask)

    metrics = f1.metric

    print(f"metrics: {json.dumps(metrics)}")

    expect = {f"{SpanF1Metric.PRECISION}-T": 1., f"{SpanF1Metric.RECALL}-T": 1., f"{SpanF1Metric.F1}-T": 1.,
              f"{SpanF1Metric.PRECISION_OVERALL}": 1., f"{SpanF1Metric.RECALL_OVERALL}": 1., f"{SpanF1Metric.F1_OVERALL}": 1.}

    for key, _ in expect.items():
        ASSERT.assertAlmostEqual(expect[key], metrics[key])
示例#5
0
def test_cnn_seq2vec():
    """
    测试 cnn seq2vec
    :return:
    """

    encoder = CnnSeq2Vec(embedding_dim=2, num_filters=1, kernel_sizes=(1, 2))

    for name, parameter in encoder.named_parameters():
        parameter.data.fill_(1.)

    tokens = torch.FloatTensor([[[0.7, 0.8], [0.1, 1.5]]])
    vector = encoder(sequence=tokens, mask=None)
    vector = vector.view(-1).tolist()

    expect = torch.tensor([[0.1 + 1.5 + 1.,
                            0.7 + 0.8 + 0.1 + 1.5 + 1.]]).view(-1).tolist()

    ASSERT.assertEqual(len(expect), len(vector))
    for i in range(len(vector)):
        ASSERT.assertAlmostEqual(expect[i], vector[i])
示例#6
0
def test_label_f1_metric_with_mask():
    """
    测试 label f1 metric
    """

    predictions = torch.tensor([0, 1, 2, 3])
    gold_labels = torch.tensor([0, 0, 0, 2])
    mask = torch.tensor([1, 1, 1, 0], dtype=torch.long)

    labels = [0, 1, 2, 3]
    f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None)

    metrics = f1_metric(prediction_labels=predictions,
                        gold_labels=gold_labels,
                        mask=mask)

    logging.debug(json2str(metrics))

    ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics))

    precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = metrics[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = 1. / 1.
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = 1. / 3.
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)

    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)

    expect_precision_overall = 1. / 3.
    expect_recall_overall = 1. / 3.
    precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL]
    recall_overall = metrics[LabelF1Metric.RECALL_OVERALL]

    ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall)
    ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall)
def test_attention_seq2vec_no_mask(inputs):
    """
    测试 attention seq2vec
    :return:
    """

    sequence, mask = inputs

    encoder = AttentionSeq2Vec(input_size=2,
                               query_hidden_size=3,
                               value_hidden_size=None)

    encoder.wk.weight = Parameter(FloatTensor([
        [0.1, 0.2],
        [0.3, 0.4],
        [0.5, 0.6]
    ]))
    encoder.wk.bias = Parameter(FloatTensor([0.2, 0.4, 0.6]))

    encoder.attention.weight = Parameter(FloatTensor(
        [
            [0.6, 0.2, 7]
        ]
    ))

    vec = encoder(sequence=sequence, mask=None)

    print(vec)

    ASSERT.assertEqual((2, 2), vec.size())

    expect = torch.tensor([[4.8455, 5.2867],
                           [5.7232, 3.6037]])

    vec1d = vec.view(-1).tolist()
    expect1d = expect.view(-1).tolist()

    for expect_data, vec_data in zip(expect1d, vec1d):
        ASSERT.assertAlmostEqual(expect_data, vec_data, delta=1e-4)
示例#8
0
def test_word_lstm_cell_with_bias():
    """
    测试 WordLSTMCell
    :return:
    """

    input_size = 2
    hidden_size = 3
    word_lstm_cell = WordLSTMCell(input_size=input_size,
                                  hidden_size=hidden_size,
                                  bias=True)

    value = list()

    for i in range(input_size):
        value.append([
            j * 0.37
            for j in range(i * hidden_size * 3, (i + 1) * hidden_size * 3)
        ])

    with torch.no_grad():
        word_lstm_cell.weight_ih.copy_(torch.tensor(value, dtype=torch.float))
        torch.nn.init.constant(word_lstm_cell.bias, val=1.0)

    word_input = torch.tensor([[0.2, 0.4]], dtype=torch.float)
    h = torch.tensor([[0.2, 0.11, 0.15]], dtype=torch.float)
    c = torch.tensor([[0.5, 0.6, 0.7]], dtype=torch.float)

    output_c = word_lstm_cell(input_=word_input, hx=(h, c))

    expect_size = (1, hidden_size)
    ASSERT.assertEqual(expect_size, output_c.size())

    expect_output_c = [1.4231, 1.5257, 1.6372]

    for e_i, i in zip(expect_output_c, output_c[0].tolist()):
        ASSERT.assertAlmostEqual(e_i, i, places=3)
示例#9
0
def test_synchronized_data():
    """
    测试 from_synchronized_data 和 to_synchronized_data
    :return:
    """

    acc_metric = AccMetric()

    sync_data, op = acc_metric.to_synchronized_data()

    ASSERT.assertEqual((2,), sync_data.size())
    ASSERT.assertEqual(0, sync_data[0].item())
    ASSERT.assertEqual(0, sync_data[1].item())

    # 对应的label是 [1, 1, 0, 1]
    logits = torch.tensor([[1., 2.], [3., 4.], [5, 4.], [3., 7.]], dtype=torch.float)
    prediction_labels = torch.argmax(logits, dim=-1)

    golden_labels = torch.tensor([0, 1, 1, 0], dtype=torch.long)

    acc_metric(prediction_labels=prediction_labels, gold_labels=golden_labels, mask=None)

    # acc = 1/4

    sync_data, op = acc_metric.to_synchronized_data()
    ASSERT.assertListEqual([1, 4], sync_data.tolist())

    acc_metric.from_synchronized_data(sync_data=sync_data, reduce_op=op)
    acc = acc_metric.metric

    expect = 1/4
    ASSERT.assertAlmostEqual(expect, acc[AccMetric.ACC])

    new_sync_data, op = acc_metric.to_synchronized_data()

    ASSERT.assertListEqual(sync_data.tolist(), new_sync_data.tolist())
示例#10
0
def test_event_f1_metric(event_type_vocabulary):
    """
    测试 event f1 metric
    """
    f1_metric = EventF1MetricAdapter(
        event_type_vocabulary=event_type_vocabulary)

    # label: [1, 0, 1, 1, 0, 0, 1, 1, 0]
    logits = torch.tensor([0.6, 0.2, 0.7, 0.8, 0.1, 0.2, 0.8, 0.9, 0.3],
                          dtype=torch.float)

    #                            [1, 0, 1, 1, 0, 0, 1, 1, 0]
    golden_labels = torch.tensor([1, 0, 0, 1, 1, 0, 0, 1, 1], dtype=torch.long)

    event_type = torch.tensor([
        event_type_vocabulary.index("A"),
        event_type_vocabulary.index("A"),
        event_type_vocabulary.index("A"),
        event_type_vocabulary.index("B"),
        event_type_vocabulary.index("B"),
        event_type_vocabulary.index("C"),
        event_type_vocabulary.index("C"),
        event_type_vocabulary.index(event_type_vocabulary.unk),
        event_type_vocabulary.index(event_type_vocabulary.unk)
    ],
                              dtype=torch.long)

    model_outputs = EventModelOutputs(logits=logits, event_type=event_type)

    metric, target_metric = f1_metric(model_outputs=model_outputs,
                                      golden_labels=golden_labels)

    expect_precision_A_1 = 1 / 2
    expect_recall_A_1 = 1 / 1
    expect_f1_A_1 = 2 * expect_precision_A_1 * expect_recall_A_1 / (
        expect_precision_A_1 + expect_recall_A_1)

    ASSERT.assertAlmostEqual(expect_precision_A_1,
                             metric[f"{F1Metric.PRECISION}-A"])
    ASSERT.assertAlmostEqual(expect_recall_A_1, metric[f"{F1Metric.RECALL}-A"])
    ASSERT.assertAlmostEqual(expect_f1_A_1, metric[f"{F1Metric.F1}-A"])

    expect_precision_overall = 2 / 4
    expect_recall_overall = 2 / 3

    expect_f1_overall = 2 * expect_precision_overall * expect_recall_overall / (
        expect_precision_overall + expect_recall_overall)

    ASSERT.assertAlmostEqual(expect_precision_overall,
                             metric[F1Metric.PRECISION_OVERALL])
    ASSERT.assertAlmostEqual(expect_recall_overall,
                             metric[F1Metric.RECALL_OVERALL])
    ASSERT.assertAlmostEqual(expect_f1_overall, metric[F1Metric.F1_OVERALL])

    # 在增加一个数据,因为实际是多个batch的
    # label: [1, 1, 0]
    logits = torch.tensor([0.6, 0.8, 0.2], dtype=torch.float)
    golden_labels = torch.tensor([1, 1, 1], dtype=torch.long)

    event_type = torch.tensor([
        event_type_vocabulary.index("A"),
        event_type_vocabulary.index("A"),
        event_type_vocabulary.index("A")
    ],
                              dtype=torch.long)

    model_outputs = EventModelOutputs(logits=logits, event_type=event_type)

    metric, target_metric = f1_metric(model_outputs=model_outputs,
                                      golden_labels=golden_labels)

    expect_final_precision_A_1 = (1 + 2) / (2 + 2)
    expect_final_recall_A_1 = (1 + 2) / (1 + 3)
    expect_final_f1_A_1 = 2 * expect_final_precision_A_1 * expect_final_recall_A_1 / (
        expect_final_precision_A_1 + expect_final_recall_A_1)

    ASSERT.assertAlmostEqual(expect_final_precision_A_1,
                             f1_metric.metric[0][f"{F1Metric.PRECISION}-A"])
    ASSERT.assertAlmostEqual(expect_final_recall_A_1,
                             f1_metric.metric[0][f"{F1Metric.RECALL}-A"])
    ASSERT.assertAlmostEqual(expect_final_f1_A_1,
                             f1_metric.metric[0][f"{F1Metric.F1}-A"])

    expect_final_precision_overall = (2 + 2) / (4 + 2)
    expect_final_recall_overall = (2 + 2) / (3 + 3)
    expect_final_f1_overall = 2 * expect_final_precision_overall * expect_final_recall_overall / (
        expect_final_precision_overall + expect_final_recall_overall)
    ASSERT.assertAlmostEqual(expect_final_precision_overall,
                             f1_metric.metric[0][F1Metric.PRECISION_OVERALL])
    ASSERT.assertAlmostEqual(expect_final_recall_overall,
                             f1_metric.metric[0][F1Metric.RECALL_OVERALL])
    ASSERT.assertAlmostEqual(expect_final_f1_overall,
                             f1_metric.metric[0][F1Metric.F1_OVERALL])

    ASSERT.assertEqual(F1Metric.F1_OVERALL, f1_metric.metric[1].name)
    ASSERT.assertAlmostEqual(f1_metric.metric[1].value,
                             expect_final_f1_overall)
示例#11
0
def test_label_f1_metric():
    """
    测试 label f1 metric
    """

    predictions = torch.tensor([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
    gold_labels = torch.tensor([0, 1, 1, 2, 2, 3, 3, 4, 4, 1])

    labels = [0, 1, 2, 3, 4]
    f1_metric = LabelF1Metric(labels=labels, label_vocabulary=None)

    metrics = f1_metric(prediction_labels=predictions,
                        gold_labels=gold_labels,
                        mask=None)

    logging.debug(json2str(metrics))

    ASSERT.assertEqual((len(labels) + 1) * 3, len(metrics))

    precision_0 = metrics[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = metrics[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = metrics[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = 1. / 2.
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = 1. / 1.
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)

    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)

    expect_precision_overall = 5. / 10.
    expect_recall_overall = 5. / 10
    precision_overall = metrics[LabelF1Metric.PRECISION_OVERALL]
    recall_overall = metrics[LabelF1Metric.RECALL_OVERALL]

    ASSERT.assertAlmostEqual(expect_precision_overall, precision_overall)
    ASSERT.assertAlmostEqual(expect_recall_overall, recall_overall)

    predictions = torch.tensor([0, 2])
    gold_labels = torch.tensor([0, 1])

    f1_metric(prediction_labels=predictions,
              gold_labels=gold_labels,
              mask=None)

    precision_0 = f1_metric.metric[f"{LabelF1Metric.PRECISION}-0"]
    recall_0 = f1_metric.metric[f"{LabelF1Metric.RECALL}-0"]
    f1_0 = f1_metric.metric[f"{LabelF1Metric.F1}-0"]

    expect_precision_0 = (1. + 1.) / (2. + 1.)
    ASSERT.assertAlmostEqual(expect_precision_0, precision_0)
    expect_recall_0 = (1. + 1.) / (1. + 1.)
    ASSERT.assertAlmostEqual(expect_recall_0, recall_0)
    expect_f1_0 = 2. * expect_precision_0 * expect_recall_0 / (
        expect_precision_0 + expect_recall_0)
    ASSERT.assertAlmostEqual(expect_f1_0, f1_0)