def init_data(self, use_cuda: bool): test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') torch.set_grad_enabled(False) cfg = AlbertConfig() self.torch_embedding = AlbertEmbeddings(cfg) self.torch_embedding.eval() if use_cuda: self.torch_embedding.to(test_device) self.turbo_embedding = turbo_transformers.AlbertEmbeddings.from_torch( self.torch_embedding) input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_length), dtype=torch.long, device=test_device) position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) position_ids = position_ids.repeat(batch_size, 1) token_type_ids = torch.zeros_like(input_ids, dtype=torch.long) return input_ids, position_ids, token_type_ids
class TestAlbertEmbedding(unittest.TestCase): def init_data(self, use_cuda: bool): test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') torch.set_grad_enabled(False) cfg = AlbertConfig(hidden_size=768, num_attention_heads=12, intermediate_size=3072) self.torch_embedding = AlbertEmbeddings(cfg) self.torch_embedding.eval() if use_cuda: self.torch_embedding.to(test_device) self.turbo_embedding = turbo_transformers.AlbertEmbeddings.from_torch( self.torch_embedding) input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_length), dtype=torch.long, device=test_device) position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) position_ids = position_ids.repeat(batch_size, 1) token_type_ids = torch.zeros_like(input_ids, dtype=torch.long) return input_ids, position_ids, token_type_ids def check_torch_and_turbo(self, use_cuda): input_ids, position_ids, token_type_ids = self.init_data(use_cuda) device = "GPU" if use_cuda else "CPU" num_iter = 100 torch_model = lambda: self.torch_embedding( input_ids, token_type_ids, position_ids) torch_result, torch_qps, torch_time = test_helper.run_model( torch_model, use_cuda, num_iter) print(f"AlbertEmbeddings \"({batch_size},{seq_length:03})\" ", f"{device} Torch QPS, {torch_qps}, time, {torch_time}") turbo_model = lambda: self.turbo_embedding(input_ids, position_ids, token_type_ids) turbo_result, turbo_qps, turbo_time = test_helper.run_model( turbo_model, use_cuda, num_iter) print(f"AlbertEmbeddings \"({batch_size},{seq_length:03})\" ", f"{device} Turbo QPS, {turbo_qps}, time, {turbo_time}") self.assertTrue( torch.max(torch.abs(torch_result - turbo_result)) < 1e-5) def test_embedding(self): self.check_torch_and_turbo(use_cuda=False) if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True)
def __init__(self, config): super(ESIM, self).__init__(config) self.num_labels = config.num_labels self.dropout = config.dropout self._word_embedding = AlbertEmbeddings(config) self._encoding = Seq2SeqEncoder(nn.LSTM, config.embedding_size, config.hidden_size, bidirectional=True) if self.dropout: self._rnn_dropout = RNNDropout(p=config.dropout) self._attention = SoftmaxAttention() self._projection = nn.Sequential( nn.Linear(4 * 2 * config.hidden_size, config.hidden_size), nn.ReLU()) self._composition = Seq2SeqEncoder(nn.LSTM, config.hidden_size, config.hidden_size, bidirectional=True) self._classification = nn.Sequential( nn.Dropout(p=config.dropout), nn.Linear(2 * 4 * config.hidden_size, config.hidden_size), nn.ReLU(), nn.Dropout(p=config.dropout), nn.Linear(config.hidden_size, config.num_labels)) # Initialize all weights and biases in the model. self.apply(_init_esim_weights)
def __init__(self, config): super(AlbertModel, self).__init__(config) self.config = config self.embeddings = AlbertEmbeddings(config) self.encoder = AlbertTransformer(config) self.pooler = nn.Linear(config.hidden_size, config.hidden_size) self.pooler_activation = nn.Tanh() self.init_weights()