def init_data(self, use_cuda) -> None: test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(1) torch.set_grad_enabled(False) self.cfg = BertConfig() self.intermediate_size = self.cfg.intermediate_size # 3072; self.hidden_size = self.cfg.hidden_size # 768 self.torch_bertout = BertOutput(self.cfg) self.torch_bertout.eval() if use_cuda: self.torch_bertout.to(test_device) self.turbo_bertout = turbo_transformers.BertOutput.from_torch( self.torch_bertout) self.intermediate_output = torch.rand( size=(batch_size, seq_length, self.intermediate_size), dtype=torch.float32, device=test_device) self.attention_output = torch.rand(size=(batch_size, seq_length, self.hidden_size), dtype=torch.float32, device=test_device)
class TestBertOut(unittest.TestCase): def init_data(self, use_cuda) -> None: test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(1) torch.set_grad_enabled(False) self.cfg = BertConfig() self.intermediate_size = self.cfg.intermediate_size # 3072; self.hidden_size = self.cfg.hidden_size # 768 self.torch_bertout = BertOutput(self.cfg) self.torch_bertout.eval() if use_cuda: self.torch_bertout.to(test_device) self.turbo_bertout = turbo_transformers.BertOutput.from_torch( self.torch_bertout) self.intermediate_output = torch.rand( size=(batch_size, seq_length, self.intermediate_size), dtype=torch.float32, device=test_device) self.attention_output = torch.rand(size=(batch_size, seq_length, self.hidden_size), dtype=torch.float32, device=test_device) def check_torch_and_turbo(self, use_cuda): self.init_data(use_cuda) num_iter = 2 device = "GPU" if use_cuda else "CPU" torch_model = lambda: self.torch_bertout(self.intermediate_output, self.attention_output) torch_result, torch_qps, torch_time = \ test_helper.run_model(torch_model, use_cuda, num_iter) print(f'Bert Output Plain PyTorch({device}) QPS {torch_qps}') turbo_model = lambda: self.turbo_bertout(self.intermediate_output, self.attention_output) turbo_result, turbo_qps, turbo_time = \ test_helper.run_model(turbo_model, use_cuda, num_iter) print( f'Bert Output Plain TurboTransformer({device}) QPS {turbo_qps}' ) # cuda version precision is lower due to tensor-core self.assertTrue( torch.max(torch.abs(torch_result - turbo_result)) < 1e-2 if use_cuda else 1e-4) def test_bertout(self): self.check_torch_and_turbo(use_cuda=False) if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True)
def __init__(self, config): super(BertConnectionLayer, self).__init__() self.biattention = BertBiAttention(config) self.biOutput = BertBiOutput(config) v_config = BertConfig.from_dict(config.v_config) self.v_intermediate = BertIntermediate(v_config) self.v_output = BertOutput(v_config) t_config = BertConfig.from_dict(config.t_config) self.t_intermediate = BertIntermediate(t_config) self.t_output = BertOutput(t_config)
def __init__(self, config): super().__init__() # The cross-attention Layer self.visual_attention = BertCrossattLayer(config) # Self-attention Layers self.lang_self_att = BertAttention(config) self.visn_self_att = BertAttention(config) # Intermediate and Output Layers (FFNs) self.lang_inter = BertIntermediate(config) self.lang_output = BertOutput(config) self.visn_inter = BertIntermediate(config) self.visn_output = BertOutput(config)
def __init__(self, config): super(BertGraphLayer, self).__init__() self.attention = BertGraphAttention(config) self.is_decoder = config.is_decoder if self.is_decoder: self.crossattention = BertGraphAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(SpanAttentionLayer, self).__init__() # create modules self.attention = SpanAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config) # initialize weights self.init_weights()
def __init__(self, config): super().__init__() self.chunk_size_feed_forward = config.chunk_size_feed_forward self.seq_len_dim = 1 self.attention = BertAttention(config) self.is_decoder = config.is_decoder self.add_cross_attention = config.add_cross_attention if self.add_cross_attention: assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added" self.crossattention = BertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(BertLayerOracleSparse, self).__init__() logger.info( f"Set Oracle Sparse with key_c:{config.key_c} and query_c:{config.query_c}!" ) self.attention = BertAttention(config) self.attention.self.output_attentions = True self.intermediate = BertIntermediate(config) self.output = BertOutput(config) self.key_c = config.key_c self.query_c = config.query_c self.num_heads = config.num_attention_heads
def __init__(self, config): super(EntityAwareLayer, self).__init__() self.attention = EntityAwareAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(BertScanLayer, self).__init__() self.attention = BertAttention(config) self.scan_attention = BertScanAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super().__init__() self.attention = BertAttentionJit(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(CaptionBertLayer, self).__init__(config) self.attention = CaptionBertAttention(config) self.intermediate = BertIntermediate(config) self.output = BertOutput(config)
def __init__(self, config): super(GramBertLayer, self).__init__() self.attention = GramBertAttention( config) # attention+linear+dropout+res-connnect+norm self.intermediate = BertIntermediate(config) # linear self.output = BertOutput(config) # linear+dropout+res-connnect+norm