def from_torch(model: TorchRobertaModel,
                device: Optional[torch.device] = None):
     if device is not None and 'cuda' in device.type and torch.cuda.is_available(
     ):
         model.to(device)
     encoder = BertEncoder.from_torch(model.encoder)
     pooler = BertPooler.from_torch(model.pooler)
     return RobertaModel(model.embeddings, encoder, pooler, model.config)
Пример #2
0
class TestRobertaModel(unittest.TestCase):
    def init_data(self, use_cuda) -> None:
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        turbo_transformers.set_num_threads(4)

        self.test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')

        self.cfg = RobertaConfig()
        self.torch_model = RobertaModel(self.cfg)
        self.torch_model.eval()

        if torch.cuda.is_available():
            self.torch_model.to(self.test_device)

        self.turbo_model = turbo_transformers.RobertaModel.from_torch(
            self.torch_model, self.test_device)

    def check_torch_and_turbo(self, use_cuda):
        self.init_data(use_cuda)
        num_iter = 20
        device_name = "GPU" if use_cuda else "CPU"
        input_ids = torch.randint(low=0,
                                  high=self.cfg.vocab_size - 1,
                                  size=(1, 10),
                                  dtype=torch.long,
                                  device=self.test_device)

        torch_model = lambda: self.torch_model(input_ids)
        torch_result, torch_qps, torch_time = \
            test_helper.run_model(torch_model, use_cuda, num_iter)
        print(f'RobertaModel PyTorch({device_name}) QPS {torch_qps}')

        turbo_model = (lambda: self.turbo_model(input_ids))
        with turbo_transformers.pref_guard("roberta_perf") as perf:
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)
        print(f'RobertaModel TurboTransformer({device_name}) QPS {turbo_qps}')

        torch_result_final = torch_result[0].cpu().numpy()

        turbo_result_final = turbo_result[0].cpu().numpy()
        # print(numpy.size(torch_result_final), numpy.size(turbo_result_final))
        # print(torch_result_final - turbo_result_final)
        self.assertTrue(
            numpy.allclose(torch_result_final,
                           turbo_result_final,
                           atol=1e-3,
                           rtol=1e-3))

    def test_Roberta_model(self):
        if torch.cuda.is_available() and \
            turbo_transformers.config.is_compiled_with_cuda():
            self.check_torch_and_turbo(use_cuda=True)
        self.check_torch_and_turbo(use_cuda=False)
Пример #3
0
def test(use_cuda):
    torch.set_grad_enabled(False)
    torch.set_num_threads(4)
    turbo_transformers.set_num_threads(4)

    test_device = torch.device('cuda:0') if use_cuda else \
        torch.device('cpu:0')

    cfg = RobertaConfig()
    torch_model = RobertaModel(cfg)
    torch_model.eval()

    if torch.cuda.is_available():
        torch_model.to(test_device)

    turbo_model = turbo_transformers.RobertaModel.from_torch(
        torch_model, test_device)

    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(1, 10),
                              dtype=torch.long,
                              device=test_device)

    torch_result = torch_model(input_ids)
    torch_result_final = torch_result[0][:, 0].cpu().numpy()

    turbo_result = turbo_model(input_ids)
    turbo_result_final = turbo_result[0].cpu().numpy()

    # See the differences
    # print(numpy.size(torch_result_final), numpy.size(turbo_result_final))
    # print(torch_result_final - turbo_result_final)
    assert (numpy.allclose(torch_result_final,
                           turbo_result_final,
                           atol=1e-3,
                           rtol=1e-3))