示例#1
0
def test_bert_pretrained_batch_list():
    # (batch_size, seq_len)
    batch_size, seq_len = 2, 4
    model_path = config.get("pretrained").get("bert")

    batch = [[1, 2, 3, 4], [0, 1, 2, 3]]
    pretrain = Pretrained("bert", model_path)
    outputs = pretrain.node.call_batch(inputs=batch)

    assert len(outputs) == 2
    assert outputs[0].shape == torch.Size(
        [batch_size, seq_len, pretrain.node.model.config.hidden_size])
    assert outputs[1].shape == torch.Size(
        [batch_size, pretrain.node.model.config.hidden_size])
示例#2
0
def test_bert_pretrained_return_all_attentions():
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len))
    pretrain = Pretrained("bert", model_path, output_attentions=True)
    outputs = pretrain.node.call_batch(input_ids)

    assert len(outputs) == 3
    assert len(outputs[2]) == pretrain.node.model.config.num_hidden_layers
    assert outputs[2][0].shape == torch.Size([
        batch_size, pretrain.node.model.config.num_attention_heads, seq_len,
        seq_len
    ])
示例#3
0
def test_bert_pretrained_with_pipe():
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len)).tolist()

    pipe = (PretrainedProcessor(name="pretrained") >> Pretrained(
        "bert", model_path, True))
    outputs = pipe.run(input_ids)

    for output in outputs:
        assert len(output) == 2
        assert output[0].shape == torch.Size([batch_size, seq_len, 768])
        assert output[1].shape == torch.Size([batch_size, 768])
示例#4
0
def test_bert_pretrained_return_all_hidden_states():
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len))
    pretrain = Pretrained("bert",
                          model_path,
                          is_training=True,
                          output_hidden_states=True)
    outputs = pretrain.node.call_batch(input_ids)

    assert len(outputs) == 3
    assert len(outputs[2]) == pretrain.node.model.config.num_hidden_layers + 1
    assert outputs[2][0].shape == torch.Size(
        [batch_size, seq_len, pretrain.node.model.config.hidden_size])
示例#5
0
def test_bert_pretrained_batch():
    # (batch_size, seq_len)
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len))
    pretrain = Pretrained("bert", model_path)
    outputs = pretrain.node.call_batch(inputs=input_ids)

    assert len(outputs) == 2
    assert outputs[0].shape == torch.Size(
        [batch_size, seq_len, pretrain.node.model.config.hidden_size])
    assert outputs[1].shape == torch.Size(
        [batch_size, pretrain.node.model.config.hidden_size])
    assert outputs[0].requires_grad == False
    assert outputs[1].requires_grad == False
示例#6
0
def test_bert_pretrained_training():
    # (batch_size, seq_len)
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len))
    pretrain = Pretrained("bert", model_path, is_training=True)
    outputs = pretrain(inputs=[input_ids])

    for output in outputs:
        assert len(output) == 2
        assert output[0].shape == torch.Size(
            [batch_size, seq_len, pretrain.node.model.config.hidden_size])
        assert output[1].shape == torch.Size(
            [batch_size, pretrain.node.model.config.hidden_size])
        assert output[0].requires_grad == True
        assert output[1].requires_grad == True
示例#7
0
def test_bert_pretrained():
    # (batch_size, seq_len)
    batch_size, seq_len = 8, 10
    model_path = config.get("pretrained").get("bert")

    input_ids = torch.randint(100, 1000, (batch_size, seq_len)).tolist()

    pretrained_processor = PretrainedProcessor()
    pretrained_model = Pretrained("bert", model_path, True)

    outputs = pretrained_model(pretrained_processor(input_ids))

    for output in outputs:
        assert len(output) == 2
        assert output[0].shape == torch.Size([
            batch_size, seq_len, pretrained_model.node.model.config.hidden_size
        ])
        assert output[1].shape == torch.Size(
            [batch_size, pretrained_model.node.model.config.hidden_size])