def masked_mlm(): from transformers import ReformerConfig, ReformerForMaskedLM config = ReformerConfig.from_pretrained('google/reformer-enwik8') config.is_decoder = False model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8', config=config) sentence = sentence2 = "The quick brown fox jumps over the lazy dog." input_ids, attention_masks = encode([sentence]) if True: _input_ids, a = input_ids.clone(), attention_masks.clone() for i in [19, 27, 37]: a[0, i] = 0 sentence2 = sentence2[:i] + "%" + sentence2[i + 1:] else: _input_ids, a = input_ids, attention_masks f = model.forward(input_ids=_input_ids, position_ids=None, attention_mask=a, head_mask=None, inputs_embeds=None, num_hashes=None, labels=_input_ids) prediction = decode(torch.argmax(f.logits, 2))[0] print(sentence2) print(prediction)
def __init__(self, n_labels, hidden_size, dropout=0.2, label_ignore_idx=0, max_seq_length=128, batch_size=32, head_init_range=0.04, device='cuda', vocab_size=320): super().__init__() self.n_labels = n_labels self.linear_1 = nn.Linear(hidden_size, hidden_size) self.classification_head = nn.Linear(hidden_size, n_labels) self.label_ignore_idx = label_ignore_idx self.tokenizer = ReformerTokenizer.from_pretrained( 'google/reformer-crime-and-punishment') config = ReformerConfig( axial_pos_shape=[batch_size, int(max_seq_length / batch_size)]) self.model = ReformerModel(config) self.dropout = nn.Dropout(dropout) self.device = device # initializing classification head self.classification_head.weight.data.normal_(mean=0.0, std=head_init_range)
def get_reformer(vocab_size=77, n_layer=12, n_embd=768, n_head=12, n_positions=512, local_window_size=50, num_buckets=None, num_hashes=1): attn_layers = ["local", "local", "lsh", "local", "local", "local", "lsh", "local", "local", "local", "lsh", "local"] # attn_layers = ["local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh", "local", "lsh"] config = ReformerConfig( hash_seed=None, attn_layers=attn_layers[:n_layer], # attention_head_size=128, hidden_size=n_embd, max_position_embeddings=350, feed_forward_size=3072, vocab_size=vocab_size, is_decoder=True, axial_pos_embds_dim=[256, 512], axial_pos_shape=[14, 25], num_hashes=num_hashes, num_buckets=num_buckets, local_attn_chunk_length=local_window_size, # num_buckets=num_buckets, lsh_attn_chunk_length=local_window_size, num_attention_heads=n_head, # lsh_attention_probs_dropout_prob=0.1, # local_attention_probs_dropout_prob=0.1, # hidden_dropout_prob=0.1, chunk_size_feed_forward=0, chunk_size_lm_head=0, eos_token_id=2, hidden_act='relu', ) return ReformerModelWithLMHead(config=config)
def create_reformer_config(): # define config of reformer model return ReformerConfig( **{ "attention_head_size": 64, "attn_layers": ["local", "lsh", "local", "lsh", "local", "lsh"], "axial_pos_embds": True, "sinusoidal_pos_embds": False, "axial_pos_embds_dim": [64, 192], "axial_pos_shape": [512, 1024], "lsh_attn_chunk_length": 64, "local_attn_chunk_length": 64, "feed_forward_size": 512, "hidden_act": "relu", "hidden_size": 256, "is_decoder": True, "max_position_embeddings": 524288, "num_attention_heads": 2, "num_buckets": [64, 128], "num_hashes": 1, "vocab_size": 320, "lsh_attention_probs_dropout_prob": 0.0, "lsh_num_chunks_before": 1, "lsh_num_chunks_after": 0, "local_num_chunks_before": 1, "local_num_chunks_after": 0, "local_attention_probs_dropout_prob": 0.05, "hidden_dropout_prob": 0.05, "seed": None # that parameter is only needed for testing and will be removed soon })
def load(out_dir: str): text_processor = TextProcessor(tok_model_path=out_dir) with open(os.path.join(out_dir, "config"), "rb") as fp: config = pickle.load(fp) if isinstance(config, dict): # For older configs config = ReformerConfig(**config) lm = ReformerLM(text_processor=text_processor, config=config) lm.load_state_dict( torch.load(os.path.join(out_dir, "model.state_dict"))) return lm
def test_local_layer_forward_complex(self): config = self._get_basic_config_and_input() config["attn_layers"] = ["local"] attn_mask = self._get_attn_mask() hidden_states = self._get_hidden_states() torch.manual_seed(0) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer.eval() reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states, attention_mask=attn_mask,) output_slice = reformer_output.hidden_states[0, 0, :5] expected_output_slice = torch.tensor( [1.5476, -1.9020, -0.9902, 1.5013, -0.1950], dtype=torch.float, device=torch_device, ) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def test_local_layer_forward(self): config = self._get_basic_config_and_input() config["attn_layers"] = ["local"] config["is_decoder"] = False hidden_states = self._get_hidden_states() torch.manual_seed(0) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer.eval() reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states) output_slice = reformer_output.hidden_states[0, 0, :5] expected_output_slice = torch.tensor( [1.4212, -2.0576, -0.9688, 1.4599, -0.1344], dtype=torch.float, device=torch_device, ) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def test_lsh_layer_forward(self): config = self._get_basic_config_and_input() config["attn_layers"] = ["lsh"] config["is_decoder"] = False hidden_states = self._get_hidden_states() torch.manual_seed(0) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer.eval() reformer_output = layer(prev_attn_output=hidden_states.clone(), hidden_states=hidden_states) output_slice = reformer_output.hidden_states[0, 0, :5] expected_output_slice = torch.tensor( [1.6879, -1.3083, -0.4708, 1.3555, -0.6292], dtype=torch.float, device=torch_device, ) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path): # Initialise PyTorch model config = ReformerConfig.from_json_file(config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = ReformerModelWithLMHead(config) with open(trax_model_pkl_path, "rb") as f: model_weights = pickle.load(f)["weights"] set_model_weights_in_torch(model_weights, model, config.hidden_size) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def test_lsh_layer_forward_complex(self): config = self._get_basic_config_and_input() config["attn_layers"] = ["lsh"] config["num_buckets"] = [2, 4] attn_mask = self._get_attn_mask() hidden_states = self._get_hidden_states() torch.manual_seed(0) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer.eval() reformer_output = layer( prev_attn_output=hidden_states.clone(), hidden_states=hidden_states, attention_mask=attn_mask, ) output_slice = reformer_output.hidden_states[0, 0, :5] expected_output_slice = torch.tensor( [1.6439, -1.2306, -0.5108, 1.3006, -0.6537], dtype=torch.float, device=torch_device, ) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def __init__(self, text_processor: TextProcessor, config: ReformerConfig = None, size: int = 1): """ :param size: config size: 1 small, 2 medium, 3 base. """ super(ReformerLM, self).__init__() self.text_processor: TextProcessor = text_processor if config is not None: self.config = config else: config_func = _small_config if size == 1 else ( _base_config if size == 3 else _medium_config) self.config = config_func( vocab_size=text_processor.tokenizer.get_vocab_size(), pad_token_id=text_processor.pad_token_id(), eos_token_id=text_processor.sep_token_id()) self.config = ReformerConfig(**self.config) reformer = ReformerModelWithLMHead(self.config) self.lm_head: ReformerOnlyLMHead = reformer.lm_head self.encoder: ReformerModel = reformer.reformer
def _compute_pytorch( model_names, batch_sizes, slice_sizes, dictionary, average_over, device, torchscript, fp16, no_speed, no_memory, verbose, num_hashes ): hidden_size = 64 num_attention_heads = 2 intermediate_size = 128 chunk_length = 64 num_hashes = num_hashes hidden_states = floats_tensor((1, 2 ** 16, hidden_size)) for c, model_name in enumerate(model_names): print(f"{c + 1} / {len(model_names)}") dictionary[model_name] = { "bs": batch_sizes, "ss": slice_sizes, "results": {}, "memory": {}, } dictionary[model_name]["results"] = {i: {} for i in batch_sizes} dictionary[model_name]["memory"] = {i: {} for i in batch_sizes} for batch_size in batch_sizes: for slice_size in slice_sizes: num_buckets = int(2 * slice_size / chunk_length) if num_buckets > chunk_length: factorized_num_buckets = num_buckets // 32 num_buckets = [32, factorized_num_buckets] bert_config = BertConfig( hidden_size=hidden_size, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0, ) reformer_config = ReformerConfig( hidden_size=hidden_size, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, chunk_length=chunk_length, num_hashes=num_hashes, num_buckets=num_buckets ) layers = { 'ReformerLayer': ReformerLayer(reformer_config), 'BertLayer': BertLayer(bert_config) } model = layers[model_name] if fp16: model.half() model.to(device) model.eval() if False: dictionary[model_name]["results"][batch_size][slice_size] = "N/A" else: sequence = ( hidden_states[0, :slice_size, :] .to(device=device) .repeat(batch_size, 1, 1) ) try: if torchscript: print("Tracing model with sequence size", sequence.shape) inference = torch.jit.trace(model, sequence) inference(sequence) else: inference = model if model_name == "ReformerLayer": inference(sequence, sequence) else: inference(sequence) if not no_memory: # model.add_memory_hooks() # Forward method tracing (only for PyTorch models) trace = start_memory_tracing("transformers") if model_name == "ReformerLayer": inference(sequence, sequence) else: inference(sequence) summary = stop_memory_tracing(trace) if verbose: print_summary_statistics(summary) dictionary[model_name]["memory"][batch_size][ slice_size ] = str(summary.total) else: dictionary[model_name]["memory"][batch_size][ slice_size ] = "N/A" if not no_speed: print( "Going through model with sequence of shape", sequence.shape, ) if model_name == "ReformerLayer": runtimes = timeit.repeat( lambda: inference(sequence, sequence), repeat=average_over, number=3, ) else: runtimes = timeit.repeat( lambda: inference(sequence), repeat=average_over, number=3, ) average_time = sum(runtimes) / float(len(runtimes)) / 3.0 dictionary[model_name]["results"][batch_size][ slice_size ] = average_time else: dictionary[model_name]["results"][batch_size][ slice_size ] = "N/A" except RuntimeError as e: print("Doesn't fit on GPU.", e) torch.cuda.empty_cache() dictionary[model_name]["results"][batch_size][ slice_size ] = "N/A" dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" return dictionary
yield data train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt", tokenizer) val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt", tokenizer) train_loader = cycle(DataLoader(train_dataset, batch_size=BATCH_SIZE)) val_loader = cycle(DataLoader(val_dataset, batch_size=BATCH_SIZE)) # configuration = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment") # configuration.axial_pos_shape = (64, 72) # configuration.max_position_embeddings=SEQ_LEN # configuration.vocab_size=tokenizer.vocab_size # configuration.save_pretrained('model/config/') configuration = ReformerConfig.from_pretrained('model/config/') model = ReformerModelWithLMHead(configuration) model.cuda() NUM_BATCHES = len(train_dataset) // BATCH_SIZE from transformers import AdamW optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE) from collections import OrderedDict import json all_training_loss = OrderedDict() all_val_loss = OrderedDict() for x in range(1):
from transformers import ReformerConfig, ReformerTokenizer, ReformerModel import sentencepiece as spm import os assert os.path.exists('protein_reformer/training_vocab.txt') == 1\ , f'build a lower case amino acid txt file to train tokenizer. content should be: {"ARNDCQEGHILKMFPSTWYVOUBZX".lower()}' MODEL_MAX_LENGTH = 4608 spm.SentencePieceTrainer.Train( "--input=protein_reformer/training_vocab.txt --model_prefix=spiece --vocab_size=30 --pad_id=29 --character_coverage=1.0" ) os.system("mv spiece.model spiece.vocab protein_reformer") tokenizer = ReformerTokenizer(vocab_file="protein_reformer/spiece.model", do_lower_case=True, model_max_length=MODEL_MAX_LENGTH) tokenizer.save_pretrained("protein_reformer") configuration = ReformerConfig.from_pretrained( "google/reformer-crime-and-punishment") configuration.axial_pos_shape = (64, 72) configuration.max_position_embeddings = MODEL_MAX_LENGTH configuration.vocab_size = tokenizer.vocab_size configuration.pad_token_id = tokenizer.pad_token_id # configuration.attn_layers = ["local","lsh","local","lsh"] configuration.output_hidden_states = True configuration.save_pretrained('protein_reformer/')