def test_shape_on_random_data(self): set_seed(42) bs = 3 src_len = 5 tgt_len = 7 encoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=17, num_hidden_layers=1, num_attention_heads=1, ) encoder = transformers.BertModel(encoder_config) # decoder accepts vocabulary of schema vocab + pointer embeddings decoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=23, is_decoder=True, num_hidden_layers=1, num_attention_heads=1, ) decoder = transformers.BertModel(decoder_config) # logits are projected into schema vocab and combined with pointer scores max_pointer = src_len + 3 model = EncoderDecoderWPointerModel(encoder=encoder, decoder=decoder, max_src_len=max_pointer) x_enc = torch.randint(0, encoder_config.vocab_size, size=(bs, src_len)) x_dec = torch.randint(0, decoder_config.vocab_size, size=(bs, tgt_len)) out = model(input_ids=x_enc, decoder_input_ids=x_dec) # different encoders return different number of outputs # e.g. BERT returns two, but DistillBERT only one self.assertGreaterEqual(len(out), 4) schema_vocab = decoder_config.vocab_size - max_pointer combined_logits = out[0] expected_shape = (bs, tgt_len, schema_vocab + src_len) self.assertEqual(combined_logits.shape, expected_shape) decoder_hidden = out[1] expected_shape = (bs, tgt_len, decoder_config.hidden_size) self.assertEqual(decoder_hidden.shape, expected_shape) combined_logits = out[2] expected_shape = (bs, decoder_config.hidden_size) self.assertEqual(combined_logits.shape, expected_shape) encoder_hidden = out[3] expected_shape = (bs, src_len, encoder_config.hidden_size) self.assertEqual(encoder_hidden.shape, expected_shape)
def __init__(self, code_token_counter, query_token_counter): self.code_token_counter = code_token_counter get_counter_map(code_token_counter) self.code_config = transformers.BertConfig( vocab_size=len(code_token_counter), pad_token_id=get_counter_map(code_token_counter)["[PAD]"]) self.code_model = transformers.BertModel(self.code_config) self.query_token_counter = query_token_counter self.query_config = transformers.BertConfig( vocab_size=len(query_token_counter), pad_token_id=get_counter_map(query_token_counter)["[PAD]"]) self.query_model = transformers.BertModel(self.query_config)
def test_loss_computation(self): torch.manual_seed(42) src_vocab_size = 17 tgt_vocab_size = 23 encoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=src_vocab_size, num_hidden_layers=1, num_attention_heads=1, ) encoder = transformers.BertModel(encoder_config) max_position = 7 decoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=tgt_vocab_size + max_position, is_decoder=True, num_hidden_layers=1, num_attention_heads=1, ) decoder = transformers.BertModel(decoder_config) model = EncoderDecoderWPointerModel(encoder=encoder, decoder=decoder, max_src_len=7) # similar to real data src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0], [1, 6, 12, 15, 5, 3, 2]]) tgt_seq = torch.LongTensor([ [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0], [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7], ]) mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 0]]) loss = model( input_ids=src_seq, decoder_input_ids=tgt_seq, pointer_mask=mask, labels=tgt_seq, )[0] self.assertEqual(loss.shape, torch.Size([])) self.assertEqual(loss.dtype, torch.float32) self.assertGreater(loss, 0)
def generate_onnx_model(model_name: str, filename: str, seq_len: int, batch_size: int, backend: str): import transformers import torch import os test_device = torch.device('cuda:0') if backend == "GPU" else torch.device( 'cpu:0') torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) with open(filename, 'wb') as outf: torch.onnx.export(model=model, args=(input_ids, ), f=outf) outf.flush() return cfg.vocab_size
def __init__(self, hp: Optional[ModelParams] = ModelParams(), max_seq_len=1024): super().__init__() self.hp = hp config = transformers.BertConfig( hidden_size=self.hp.dim, num_hidden_layers=0, num_attention_heads=1, intermediate_size=0, max_position_embeddings=max_seq_len, output_attentions=False, output_hidden_states=False, return_dict=True, ) self.e = transformers.BertModel(config, add_pooling_layer=False) for name, param in self.e.named_parameters(): # param names # embeddings.word_embeddings.weight # embeddings.position_embeddings.weight # embeddings.token_type_embeddings.weight # embeddings.LayerNorm.weight # embeddings.LayerNorm.bias if 'position_embeddings' in name: requires_grad = self.hp.position_embedding_requires_grad else: requires_grad = self.hp.requires_grad param.requires_grad = requires_grad
def get_torch_model( model_name: str, input_shape: Tuple[int, ...], output_shape: Tuple[int, int], # pylint: disable=unused-argument dtype: str = "float32", ) -> Tuple[IRModule, Dict[str, NDArray]]: """Load model from torch model zoo Parameters ---------- model_name : str The name of the model to load input_shape: Tuple[int, ...] Tuple for input shape output_shape: Tuple[int, int] Tuple for output shape dtype: str Tensor data type """ assert dtype == "float32" import torch # type: ignore # pylint: disable=import-error,import-outside-toplevel from torchvision import models # type: ignore # pylint: disable=import-error,import-outside-toplevel import transformers # type: ignore # pylint: disable=import-error,import-outside-toplevel import os # type: ignore # pylint: disable=import-error,import-outside-toplevel def do_trace(model, inp): model.eval() model_trace = torch.jit.trace(model, inp) model_trace.eval() return model_trace # Load model from torchvision if MODEL_TYPES[model_name] == MODEL_TYPE.TEXT_CLASSIFICATION: os.environ["TOKENIZERS_PARALLELISM"] = "false" model = transformers.BertModel( transformers.BertConfig( num_hidden_layers=12, hidden_size=768, intermediate_size=3072, num_attention_heads=12, return_dict=False, )) model.eval() input_data = torch.randint(10000, input_shape) shape_list = [("input_ids", input_shape)] scripted_model = torch.jit.trace(model, [input_data], strict=False) elif MODEL_TYPES[model_name] == MODEL_TYPE.IMAGE_CLASSIFICATION: model = getattr(models, model_name)() # Setup input input_data = torch.randn(input_shape).type(torch.float32) shape_list = [("input0", input_shape)] # Get trace. Depending on the model type, wrapper may be necessary. scripted_model = do_trace(model, input_data) else: raise ValueError("Unsupported model in Torch model zoo.") # Convert torch model to relay module mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) return mod, params
def __init__(self, vocab_size, hidden_size, dropout, n_layers=1, vocab_file='./data/vocab.txt'): super(UntrainedEncoderBERT, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.dropout = dropout self.dropout_layer = nn.Dropout(dropout) self.embedding = nn.Embedding(vocab_size, hidden_size, padding_idx=PAD_token) self.embedding.weight.data.normal_(0, 0.1) self.config = transformers.BertConfig(vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=n_layers, hidden_dropout_prob=dropout, attention_probs_dropout=dropout, num_attention_heads=16, output_hidden_states=True, max_position_embeddings=1024) self.tokenizer = transformers.BertTokenizer(vocab_file, pad_token='PAD', unk_token='UNK', sep_token='EOS') self.BERT = transformers.BertModel(self.config) self.training = True
def __init__(self, L=30, model_state=None): super(MbPA, self).__init__() if model_state is None: # Key network to find key representation of content self.key_encoder = transformers.BertModel.from_pretrained( 'bert-base-uncased') # Bert model for text classification self.classifier = transformers.BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=33) else: cls_config = transformers.BertConfig.from_pretrained( 'bert-base-uncased', num_labels=33) self.classifier = transformers.BertForSequenceClassification( cls_config) self.classifier.load_state_dict(model_state['classifier']) key_config = transformers.BertConfig.from_pretrained( 'bert-base-uncased') self.key_encoder = transformers.BertModel(key_config) self.key_encoder.load_state_dict(model_state['key_encoder']) # load base model weights # we need to detach since parameters() method returns reference to the original parameters self.base_weights = self.classifier.parameters().clone().detach( ).to("cuda" if torch.cuda.is_available() else "cpu") # local adaptation learning rate - 1e-3 or 5e-3 self.loc_adapt_lr = 1e-3 # Number of local adaptation steps self.L = L
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int, num_threads: int): import torch import transformers import contexttimer import benchmark_helper torch.set_num_threads(num_threads) torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long) benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size, seq_len, "torch", num_threads)
def generate_onnx_model(model_name: str, use_gpu: bool, filename: str, seq_len: int, batch_size: int, backend: str, use_dynamic_axes: bool = False): import transformers import torch import os test_device = torch.device( 'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0') torch.set_grad_enabled(False) if model_name == "bert": # use a real model to check the correctness if checkonnxrest: model = transformers.BertModel.from_pretrained("bert-base-uncased") else: cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) with open(filename, 'wb') as outf: if not use_dynamic_axes: torch.onnx.export(model=model, args=(input_ids, ), f=outf) else: torch.onnx.export(model=model, args=(input_ids, ), f=outf, input_names=['input'], output_names=['output'], dynamic_axes={ 'input': [0, 1], 'output': [0, 1] }) # If not intended to make onnxruntime support variable batch size and sequence length, # you can unset the parameter `dynamic_axes`. # For some model, you have to try `opset_version=12` outf.flush() return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import torch import transformers import turbo_transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') cfg = None torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model, backend="turbo") elif model_name == "albert": cfg = transformers.AlbertConfig(hidden_size=768, num_attention_heads=12, intermediate_size=3072) model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) elif model_name == "distilbert": cfg = transformers.DistilBertConfig() model = transformers.DistilBertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.DistilBertModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") turbo_transformers.set_num_threads(num_threads) if enable_random: if enable_mem_opt: turbo_transformers.reset_allocator_schema("model-aware") benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "turbo", num_threads, cfg, enable_mem_opt, model_name) if enable_mem_opt: turbo_transformers.reset_allocator_schema("naive") else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "turbo", num_threads, enable_mem_opt, model_name)
def test_shape_on_real_data_batched(self): set_seed(42) src_vocab_size = 17 tgt_vocab_size = 23 max_position = 7 encoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=src_vocab_size, num_hidden_layers=1, num_attention_heads=1, ) encoder = transformers.BertModel(encoder_config) decoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=tgt_vocab_size + max_position, is_decoder=True, num_hidden_layers=1, num_attention_heads=1, ) decoder = transformers.BertModel(decoder_config) model = EncoderDecoderWPointerModel(encoder=encoder, decoder=decoder, max_src_len=max_position) # similar to real data src_seq = torch.LongTensor([[1, 6, 12, 15, 2, 0, 0], [1, 6, 12, 15, 5, 3, 2]]) tgt_seq = torch.LongTensor([ [8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7, 0, 0], [8, 6, 4, 10, 11, 8, 5, 1, 12, 13, 14, 7, 7], ]) mask = torch.FloatTensor([[0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 1, 0]]) combined_logits = model(input_ids=src_seq, decoder_input_ids=tgt_seq, pointer_mask=mask)[0] expected_shape = (2, tgt_seq.shape[1], tgt_vocab_size + src_seq.shape[1]) self.assertEqual(combined_logits.shape, expected_shape)
def __init__(self, config): super(BertForSequenceRegression, self).__init__(config) self.num_labels = config.num_labels self.bert = ptt.BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.apply(self.init_weights)
def test_shape_on_real_data(self): set_seed(42) src_vocab_size = 17 tgt_vocab_size = 23 max_position = 5 encoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=src_vocab_size, num_hidden_layers=1, num_attention_heads=1, ) encoder = transformers.BertModel(encoder_config) decoder_config = transformers.BertConfig( hidden_size=11, intermediate_size=44, vocab_size=tgt_vocab_size + max_position, is_decoder=True, num_hidden_layers=1, num_attention_heads=1, ) decoder = transformers.BertModel(decoder_config) model = EncoderDecoderWPointerModel(encoder=encoder, decoder=decoder, max_src_len=max_position) # similar to real data # e.g. '[CLS] Directions to Lowell [SEP]' src_seq = torch.LongTensor([[1, 6, 12, 15, 2]]) # e.g. '[IN:GET_DIRECTIONS Directions to [SL:DESTINATION Lowell]]' tgt_seq = torch.LongTensor([[8, 6, 4, 10, 11, 8, 5, 1, 12, 7, 7]]) mask = torch.FloatTensor([[0, 1, 1, 1, 0]]) combined_logits = model(input_ids=src_seq, decoder_input_ids=tgt_seq, pointer_mask=mask)[0] expected_shape = (1, tgt_seq.shape[1], tgt_vocab_size + src_seq.shape[1]) self.assertEqual(combined_logits.shape, expected_shape)
def test_smart_batch(use_cuda: bool): test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') cfg = transformers.BertConfig(attention_probs_dropout_prob=0.0, hidden_dropout_prob=0.0) torch_model = transformers.BertModel(cfg) # model_id = "bert-base-uncased" # torch_model = transformers.BertModel.from_pretrained(model_id) torch_model.eval() torch_model.to(test_device) torch.set_grad_enabled(False) cfg = torch_model.config # use 4 threads for computing if not use_cuda: turbo_transformers.set_num_threads(4) # Initialize a turbo BertModel with smart batching from torch model. turbo_model = turbo_transformers.BertModelSmartBatch.from_torch( torch_model) # a batch of queries with different lengths. query_seq_len_list = [18, 2, 3, 51] input_list = [] # generate random inputs. Of course you can use real data. for query_seq_len in query_seq_len_list: input_seq = torch.randint(low=0, high=cfg.vocab_size - 1, size=(1, query_seq_len), dtype=torch.long, device=test_device) input_list.append(input_seq) # start inference s_res = serial_bert_inference(torch_model, input_list) b_res = batch_bert_inference(turbo_model, input_list, query_seq_len_list) print(torch.max(torch.abs(b_res - s_res))) assert (torch.max(torch.abs(b_res - s_res)) < 1e-2) start_time = time.time() for i in range(10): serial_bert_inference(torch_model, input_list) end_time = time.time() print("\ntorch time consum: {}".format(end_time - start_time)) start_time = time.time() for i in range(10): batch_bert_inference(turbo_model, input_list, query_seq_len_list) end_time = time.time() print("\nturbo time consum: {}".format(end_time - start_time))
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool): import torch import transformers import contexttimer import turbo_transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') if use_gpu: print("using GPU") else: print("using CPU") cfg = None torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") turbo_transformers.set_num_threads(num_threads) if enable_random: benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "turbo", num_threads, cfg) else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "turbo", num_threads)
def __init__(self, emb=False, pretrained='bert-large-uncased', finetune=True): super().__init__() if emb: self.model = pt.BertModel( pt.BertConfig.from_pretrained(pretrained)) else: self.model = pt.BertModel.from_pretrained(pretrained) self.tokenizer = pt.BertTokenizer.from_pretrained(pretrained) self.hidden_dim = self.model.encoder.layer[ -1].output.dense.out_features
def __init__(self, config): super(BertForSpanComparisonClassification, self).__init__(config) self.num_labels = config.num_labels self.num_spans = config.num_spans self.bert = ptt.BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.span_attention_extractor = SelfAttentiveSpanExtractor( config.hidden_size) self.classifier = nn.Linear(config.hidden_size * self.num_spans, self.num_labels) self.apply(self.init_weights)
def __init__(self, config, initialize_wBERT=False): super().__init__(config) assert config.projection or config.indexing_dimension == 768, \ 'If no projection then indexing dimension must be equal to 768' self.config = config if initialize_wBERT: self.model = transformers.BertModel.from_pretrained( 'bert-base-uncased') else: self.model = transformers.BertModel(config) if self.config.projection: self.proj = nn.Linear(self.model.config.hidden_size, self.config.indexing_dimension) self.norm = nn.LayerNorm(self.config.indexing_dimension) self.loss_fct = torch.nn.KLDivLoss()
def __init__(self, config): super().__init__(config) self.bert = transformers.BertModel(config) self.context_projection = torch.nn.Linear( in_features=config.hidden_size, out_features=config.entity_embedding_dim, ) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.scaling_constant = torch.nn.Parameter(torch.tensor(1.0)) self.entity_embeddings = torch.nn.Embedding( num_embeddings=config.entity_vocab_size, embedding_dim=config.entity_embedding_dim, ) torch.nn.init.normal_(self.entity_embeddings.weight, std=0.02) # Std. from Nick self.use_batch_negatives = config.use_batch_negatives self.random_negatives = config.random_negatives
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import transformers import contexttimer import torch.jit torch.set_num_threads(num_threads) torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long) model = torch.jit.trace(model, (input_ids, )) with torch.jit.optimized_execution(True): model(input_ids) with contexttimer.Timer() as t: for _ in range(n): model(input_ids) print( json.dumps({ "QPS": n / t.elapsed, "elapsed": t.elapsed, "n": n, "batch_size": batch_size, "seq_len": seq_len, "framework": "torch_jit", "n_threads": num_threads, "model_name": model_name }))
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import torch import transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') torch.set_grad_enabled(False) torch.set_num_threads(num_threads) cfg = None if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) elif model_name == "distilbert": cfg = transformers.DistilBertConfig() model = transformers.DistilBertModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) # cfg = model.config # type: transformers.BertConfig if enable_random: benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "torch", num_threads, cfg, enable_mem_opt, model_name) else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "torch", num_threads, enable_mem_opt, model_name)
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int): import torch import transformers import contexttimer import turbo_transformers import benchmark_helper if not torch.cuda.is_available(): print("cuda is not available for torch") return test_device = torch.device('cuda:0') if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size, seq_len, "turbo")
def __init__(self, hp: Optional[ModelParams] = ModelParams()): super().__init__() self.hp = hp config = transformers.BertConfig( hidden_size=self.hp.dim, num_hidden_layers=0, num_attention_heads=1, intermediate_size=0, max_position_embeddings=self.hp.max_seq_len, output_attentions=False, output_hidden_states=False, return_dict=True, ) self.e = transformers.BertModel(config, add_pooling_layer=False) for name, param in self.e.named_parameters(): if name == 'position_embeddings': requires_grad = False else: requires_grad = self.hp.requires_grad param.requires_grad = requires_grad
def __init__(self, device, cfg): super().__init__() if cfg.tokens_pretrained: self.tokenizer = transformers.BertTokenizer.from_pretrained( 'bert-base-uncased') else: self.tokenizer = transformers.BertTokenizer( cfg.vocab_path, cfg.merge_path) if cfg.embeddings_pretrained: self.model = transformers.BertModel.from_pretrained( 'bert-base-uncased') else: self.model = transformers.BertModel('bert-base-uncased') self.model = self.model.to(device) self.pad_token = 'pad_token' self.device = device self.max_len = cfg.max_seq_len self.trainable = cfg.embeddings_trainable
def _get_network( args: Tuple[str, List[int]] ) -> Tuple[IRModule, bytearray, Tuple[str, List[int], str]]: name: str input_shape: List[int] name, input_shape = args mod: IRModule if name in [ "resnet_18", "resnet_50", "wide_resnet_50", "resnext_50", "mobilenet_v2", "mobilenet_v3", "inception_v3", "densenet_121", "resnet3d_18", "vgg_16", ]: import torch # type: ignore from torchvision import models # type: ignore if name in ["resnet_18", "resnet_50"]: model = getattr(models, name.replace("_", ""))(pretrained=False) elif name == "wide_resnet_50": model = getattr(models, "wide_resnet50_2")(pretrained=False) elif name == "resnext_50": model = getattr(models, "resnext50_32x4d")(pretrained=False) elif name == "mobilenet_v2": model = getattr(models, name)(pretrained=False) elif name == "mobilenet_v3": model = getattr(models, name + "_large")(pretrained=False) elif name == "inception_v3": model = getattr(models, name)(pretrained=False, aux_logits=False) elif name == "densenet_121": model = getattr(models, name.replace("_", ""))(pretrained=False) elif name == "resnet3d_18": model = models.video.r3d_18(pretrained=False) elif name == "vgg_16": model = getattr(models, name.replace("_", ""))(pretrained=False) dtype = "float32" input_data = torch.randn(input_shape).type( # pylint: disable=no-member { "float32": torch.float32, # pylint: disable=no-member }[dtype]) scripted_model = torch.jit.trace(model, input_data).eval() input_name = "input0" shape_list = [(input_name, input_shape)] mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) with tvm.transform.PassContext(opt_level=3): mod = tvm.transform.Sequential([ relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout({ "nn.conv2d": ["NHWC", "default"], "nn.conv3d": ["NDHWC", "default"], "nn.max_pool2d": ["NHWC", "default"], "nn.avg_pool2d": ["NHWC", "default"], }), ])(mod) inputs = (input_name, input_shape, dtype) elif name in ["bert_tiny", "bert_base", "bert_medium", "bert_large"]: os.environ["TOKENIZERS_PARALLELISM"] = "false" # pip3 install transformers==3.5 torch==1.7 import torch # type: ignore import transformers # type: ignore config_dict = { "bert_tiny": transformers.BertConfig( num_hidden_layers=6, hidden_size=512, intermediate_size=2048, num_attention_heads=8, return_dict=False, ), "bert_base": transformers.BertConfig( num_hidden_layers=12, hidden_size=768, intermediate_size=3072, num_attention_heads=12, return_dict=False, ), "bert_medium": transformers.BertConfig( num_hidden_layers=12, hidden_size=1024, intermediate_size=4096, num_attention_heads=16, return_dict=False, ), "bert_large": transformers.BertConfig( num_hidden_layers=24, hidden_size=1024, intermediate_size=4096, num_attention_heads=16, return_dict=False, ), } configuration = config_dict[name] model = transformers.BertModel(configuration) input_name = "input_ids" input_dtype = "int64" a = torch.randint(10000, input_shape) # pylint: disable=no-member model.eval() scripted_model = torch.jit.trace(model, [a], strict=False) input_name = "input_ids" shape_list = [(input_name, input_shape)] mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) mod = relay.transform.FastMath()(mod) mod = relay.transform.CombineParallelBatchMatmul()(mod) inputs = (input_name, input_shape, input_dtype) elif name == "dcgan": output_shape = input_shape batch_size = output_shape[0] oshape = output_shape[1:] mod, params = relay.testing.dcgan.get_workload( batch_size=batch_size, oshape=oshape, layout="NHWC", ) inputs = ("data", [100], "float32") else: raise ValueError("Invalid name: " + name) params_bytearray: bytearray = save_param_dict(params) return mod, params_bytearray, inputs
def __init__(self, config: transformers.BertConfig): super(WrappedBERT, self).__init__(config) self.bert = transformers.BertModel(config)
def __init__(self, config): super().__init__() self.roberta = transformers.BertModel(config) self.fc = torch.nn.Linear(config.hidden_size, 1)
def __init__(self, config: transformers.BertConfig): super(BertSeq2VecEncoderForPairs, self).__init__(config) self.bert = transformers.BertModel(config) self.dropout = torch.nn.Dropout(0.1)
import torch import transformers import turbo_transformers from turbo_transformers.layers.utils import convert2tt_tensor, try_convert, convert_returns_as_type, ReturnType import time cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.eval() torch.set_grad_enabled(False) intermediate = torch.quantization.quantize_dynamic(model.encoder.layer[0].intermediate) qintermediate = turbo_transformers.QBertIntermediate.from_torch(model.encoder.layer[0].intermediate) lens = [10,20,40,60,80,100,200,300] loops = 1 for l in lens: input = torch.rand(1, l, 768) print("seq length =", l) start = time.time() for i in range(loops): res = intermediate(input) end = time.time() print("torch int8 layer QPS =", loops/(end-start)) start = time.time() for i in range(loops):