def predict_minibatch(self, inputs): # Preprocess to ids and masks, and make the input batch. encoded_input = self.tokenizer.batch_encode_plus( [ex["sentence"] for ex in inputs], return_tensors="pt", add_special_tokens=True, max_length=128, pad_to_max_length=True) # Run a forward pass. with torch.no_grad(): # remove this if you need gradients. logits, embs, unused_attentions = self.model(**encoded_input) # Post-process outputs. batched_outputs = { "probas": torch.nn.functional.softmax(logits, dim=-1), "input_ids": encoded_input["input_ids"], "ntok": torch.sum(encoded_input["attention_mask"], dim=1), "cls_emb": embs[-1][:, 0], # last layer, first token } # Return as NumPy for further processing. detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(detached_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[1:ntok - 1]) yield output
def _predict_minibatch_internal(self, inputs): """Run model on a single batch. Args: inputs: List[Dict] with fields as described by input_spec() Returns: outputs: List[Dict] with fields as described by output_spec() """ # Text as sequence of sentencepiece ID"s. encoded_inputs = self._encode_texts([ self.config.input_prefix + ex["input_text"] + " </s>" for ex in inputs ]) encoded_targets = self._encode_texts( [ex.get("target_text", "") for ex in inputs]) ## # Force-decode on target text, and also get encoder embs and attention. batched_outputs = self._force_decode(encoded_inputs, encoded_targets) # Get the conditional generation from the model. # Workaround for output_hidden not being compatible with generate. # See https://github.com/huggingface/transformers/issues/8361 self.model.encoder.output_hidden_states = False self.model.decoder.output_hidden_states = False batched_outputs["generated_ids"] = self.model.generate( encoded_inputs["input_ids"], attention_mask=encoded_inputs["attention_mask"], max_length=self.config.max_gen_length) self.model.encoder.output_hidden_states = True self.model.decoder.output_hidden_states = True # Convert to numpy for post-processing. detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Split up batched outputs, then post-process each example. unbatched_outputs = utils.unbatch_preds(detached_outputs) return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs): """Predict on a single minibatch of examples.""" tokens_and_offsets = [ retokenize.subtokenize(ex['tokens'], self.tokenizer.tokenize) for ex in inputs ] tokenized_texts, offsets = zip(*tokens_and_offsets) # Process to ids, add special tokens, and compute segment ids and masks. encoded_input = self.tokenizer.batch_encode_plus( list(tokenized_texts), is_split_into_words=True, return_tensors='tf', add_special_tokens=True, max_length=self.max_seq_length, padding='longest', truncation='longest_first') out: transformers.modeling_tf_outputs.TFMaskedLMOutput = \ self.model(encoded_input) batched_outputs = { 'input_ids': encoded_input['input_ids'].numpy(), 'ntok': tf.reduce_sum(encoded_input['attention_mask'], axis=1).numpy(), 'top_layer_embs': out.hidden_states[-1].numpy(), # last layer, all tokens } # List of dicts, one per example. unbatched_outputs = list(utils.unbatch_preds(batched_outputs)) # Postprocess to remove padding and add offsets. ret = [self._postprocess(ubo) for ubo in unbatched_outputs] for preds, offset_indices in zip(ret, offsets): preds['offsets'] = offset_indices return ret
def predict_minibatch(self, inputs): """Predict on a single minibatch of examples.""" # If input has a 'tokens' field, use that. Otherwise tokenize the text. tokenized_texts = [ ex.get("tokens") or self.tokenizer.tokenize(ex["text"]) for ex in inputs ] encoded_input = batch_encode_pretokenized(self.tokenizer, tokenized_texts) # out.logits is a single tensor # <float32>[batch_size, num_tokens, vocab_size] # out.hidden_states is a list of num_layers + 1 tensors, each # <float32>[batch_size, num_tokens, h_dim] out: transformers.modeling_tf_outputs.TFMaskedLMOutput = \ self.model(encoded_input) batched_outputs = { "probas": tf.nn.softmax(out.logits, axis=-1).numpy(), "input_ids": encoded_input["input_ids"].numpy(), "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1).numpy(), # last layer, first token "cls_emb": out.hidden_states[-1][:, 0].numpy(), } # List of dicts, one per example. unbatched_outputs = utils.unbatch_preds(batched_outputs) # Postprocess to remove padding and decode predictions. return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs): # Preprocess to ids and masks, and make the input batch. encoded_input = self.tokenizer.batch_encode_plus( [ex["sentence"] for ex in inputs], return_tensors="tf", add_special_tokens=True, max_length=128, pad_to_max_length=True) # Run a forward pass. logits, embs, unused_attentions = self.model(encoded_input, training=False) # Post-process outputs. batched_outputs = { "probas": tf.nn.softmax(logits, axis=-1).numpy(), "input_ids": encoded_input["input_ids"].numpy(), "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1).numpy(), "cls_emb": embs[-1][:, 0].numpy(), # last layer, first token } # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(batched_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[1:ntok - 1]) yield output
def predict_minibatch(self, inputs): # Preprocess to ids and masks, and make the input batch. encoded_input = self.tokenizer.batch_encode_plus( [ex["sentence"] for ex in inputs], return_tensors="tf", add_special_tokens=True, max_length=128, padding="longest", truncation="longest_first") # Run a forward pass. out: transformers.modeling_tf_outputs.TFSequenceClassifierOutput = \ self.model(encoded_input, training=False) # Post-process outputs. batched_outputs = { "probas": tf.nn.softmax(out.logits, axis=-1), "input_ids": encoded_input["input_ids"], "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1), "cls_emb": out.hidden_states[-1][:, 0], # last layer, first token } # Return as NumPy for further processing. detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(detached_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[1:ntok - 1]) yield output
def predict_minibatch(self, inputs): # Preprocess to ids and masks, and make the input batch. encoded_input = self.tokenizer.batch_encode_plus( [ex["sentence"] for ex in inputs], return_tensors="pt", add_special_tokens=True, max_length=128, padding="longest", truncation="longest_first") # Check and send to cuda (GPU) if available if torch.cuda.is_available(): self.model.cuda() for tensor in encoded_input: encoded_input[tensor] = encoded_input[tensor].cuda() # Run a forward pass. with torch.no_grad(): # remove this if you need gradients. out: transformers.modeling_outputs.SequenceClassifierOutput = \ self.model(**encoded_input) # Post-process outputs. batched_outputs = { "probas": torch.nn.functional.softmax(out.logits, dim=-1), "input_ids": encoded_input["input_ids"], "ntok": torch.sum(encoded_input["attention_mask"], dim=1), "cls_emb": out.hidden_states[-1][:, 0], # last layer, first token } # Return as NumPy for further processing. detached_outputs = {k: v.cpu().numpy() for k, v in batched_outputs.items()} # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(detached_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[1:ntok - 1]) yield output
def predict_minibatch(self, inputs, config=None): """Predict on a single minibatch of examples.""" # If input has a 'tokens' field, use that. Otherwise tokenize the text. tokenized_texts = [ ex.get("tokens") or self.tokenizer.tokenize(ex["text"]) for ex in inputs ] # Process to ids, add special tokens, and compute segment ids and masks. encoded_input = self.tokenizer.batch_encode_plus( tokenized_texts, is_pretokenized=True, return_tensors="tf", add_special_tokens=True, max_length=self.max_seq_length, pad_to_max_length=True) # We have to set max_length explicitly above so that # max_tokens <= model_max_length, in order to avoid indexing errors. But # the combination of max_length=<integer> and pad_to_max_length=True means # that if the max is < model_max_length, we end up with extra padding. # Thee lines below strip this off. # TODO(lit-dev): submit a PR to make this possible with tokenizer options? max_tokens = tf.reduce_max( tf.reduce_sum(encoded_input["attention_mask"], axis=1)) encoded_input = { k: v[:, :max_tokens] for k, v in encoded_input.items() } # logits is a single tensor # <float32>[batch_size, num_tokens, vocab_size] # embs is a list of num_layers + 1 tensors, each # <float32>[batch_size, num_tokens, h_dim] # attentions is a list of num_layers tensors, each # <float32>[batch_size, num_heads, num_tokens, num_tokens] logits, embs, unused_attentions = self.model(encoded_input) batched_outputs = { "probas": tf.nn.softmax(logits, axis=-1).numpy(), "input_ids": encoded_input["input_ids"].numpy(), "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1).numpy(), "cls_emb": embs[-1][:, 0].numpy(), # last layer, first token } # List of dicts, one per example. unbatched_outputs = utils.unbatch_preds(batched_outputs) # Postprocess to remove padding and decode predictions. return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs, config=None): """Predict on a single minibatch of examples.""" # Preprocess inputs. texts = [ex["text"] for ex in inputs] encoded_inputs = self.tokenizer.batch_encode_plus( texts, return_tensors="tf", add_special_tokens=True, add_prefix_space=True, pad_to_max_length=True) # Get the predictions. batched_outputs = self._pred(encoded_inputs) # Convert to numpy for post-processing. detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Split up batched outputs, then post-process each example. unbatched_outputs = utils.unbatch_preds(detached_outputs) return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs, config=None): """Predict on a single minibatch of examples.""" tokens_and_offsets = [ retokenize.subtokenize(ex['tokens'], self.tokenizer.tokenize) for ex in inputs ] tokenized_texts, offsets = zip(*tokens_and_offsets) # Process to ids, add special tokens, and compute segment ids and masks. encoded_input = self.tokenizer.batch_encode_plus( tokenized_texts, is_pretokenized=True, return_tensors='tf', add_special_tokens=True, max_length=self.max_seq_length, pad_to_max_length=True) # We have to set max_length explicitly above so that # max_tokens <= model_max_length, in order to avoid indexing errors. But # the combination of max_length=<integer> and pad_to_max_length=True means # that if the max is < model_max_length, we end up with extra padding. # Thee lines below strip this off. # TODO(lit-dev): submit a PR to make this possible with tokenizer options? max_tokens = tf.reduce_max( tf.reduce_sum(encoded_input['attention_mask'], axis=1)) encoded_input = {k: v[:, :max_tokens] for k, v in encoded_input.items()} # logits is a single tensor # <float32>[batch_size, num_tokens, vocab_size] # embs is a list of num_layers + 1 tensors, each # <float32>[batch_size, num_tokens, h_dim] unused_logits, embs = self.model(encoded_input) batched_outputs = { 'input_ids': encoded_input['input_ids'].numpy(), 'ntok': tf.reduce_sum(encoded_input['attention_mask'], axis=1).numpy(), 'top_layer_embs': embs[-1].numpy(), # last layer, all tokens } # List of dicts, one per example. unbatched_outputs = list(utils.unbatch_preds(batched_outputs)) # Postprocess to remove padding and add offsets. ret = [self._postprocess(ubo) for ubo in unbatched_outputs] for preds, offset_indices in zip(ret, offsets): preds['offsets'] = offset_indices return ret
def predict_minibatch(self, inputs: Iterable[JsonDict]): # Use watch_accessed_variables to save memory by having the tape do nothing # if we don't need gradients. with tf.GradientTape( watch_accessed_variables=self.config.compute_grads) as tape: encoded_input = self._preprocess(inputs) logits, embs, attentions = self.model(encoded_input, training=False) batched_outputs = { "input_ids": encoded_input["input_ids"], "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1), "cls_emb": embs[-1][:, 0], # last layer, first token } assert len(attentions) == self.model.config.num_hidden_layers for i, layer_attention in enumerate(attentions): batched_outputs[f"layer_{i}/attention"] = layer_attention if self.is_regression: # <tf.float32>[batch_size] batched_outputs["score"] = tf.squeeze(logits, axis=-1) scalar_pred_for_gradients = batched_outputs["score"] else: # <tf.float32>[batch_size, num_labels] batched_outputs["probas"] = tf.nn.softmax(logits, axis=-1) # <tf.float32>[batch_size] scalar_pred_for_gradients = tf.reduce_max( batched_outputs["probas"], axis=-1) # Request gradients after the tape is run. # Note: embs[0] includes position and segment encodings, as well as subword # embeddings. if self.config.compute_grads: # <tf.float32>[batch_size, num_tokens, emb_dim] batched_outputs["input_emb_grad"] = tape.gradient( scalar_pred_for_gradients, embs[0]) detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Sequence of dicts, one per example. unbatched_outputs = utils.unbatch_preds(detached_outputs) return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs): """Run model on a single batch. Args: inputs: List[Dict] with fields as described by input_spec() Returns: outputs: List[Dict] with fields as described by output_spec() """ # Text as sequence of sentencepiece ID"s. encoded_inputs = self._encode_texts( [ex["input_text"] for ex in inputs]) encoded_targets = self._encode_texts( [ex.get("target_text", "") for ex in inputs]) ## # Force-decode on target text, and also get encoder embs and attention. batched_outputs = self._force_decode(encoded_inputs, encoded_targets) # Get the conditional generation from the model. # Workaround for output_hidden not being compatible with generate. # See https://github.com/huggingface/transformers/issues/8361 self.model.config.output_hidden_states = False generated_ids = self.model.generate( encoded_inputs.input_ids, num_beams=self.config.beam_size, attention_mask=encoded_inputs.attention_mask, max_length=self.config.max_gen_length, num_return_sequences=self.config.num_to_generate) # [batch_size*num_return_sequences, num_steps] # -> [batch_size, num_return_sequences, num_steps] batched_outputs["generated_ids"] = tf.reshape( generated_ids, [-1, self.config.num_to_generate, generated_ids.shape[-1]]) self.model.config.output_hidden_states = True # Convert to numpy for post-processing. detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Split up batched outputs, then post-process each example. unbatched_outputs = utils.unbatch_preds(detached_outputs) return list(map(self._postprocess, unbatched_outputs))
def predict_minibatch(self, inputs): # Preprocess to ids and masks, and make the input batch. encoded_input = self.tokenizer.batch_encode_plus( [ex["sentence"] for ex in inputs], return_tensors="pt", add_special_tokens=True, max_length=128, padding="longest", truncation="longest_first") # Check and send to cuda (GPU) if available if torch.cuda.is_available(): self.model.cuda() for tensor in encoded_input: encoded_input[tensor] = encoded_input[tensor].cuda() # Run a forward pass. with torch.set_grad_enabled(self.compute_grads): out: transformers.modeling_outputs.SequenceClassifierOutput = \ self.model(**encoded_input) # Post-process outputs. batched_outputs = { "probas": torch.nn.functional.softmax(out.logits, dim=-1), "input_ids": encoded_input["input_ids"], "ntok": torch.sum(encoded_input["attention_mask"], dim=1), "cls_emb": out.hidden_states[-1][:, 0], # last layer, first token } # Add attention layers to batched_outputs assert len(out.attentions) == self.model.config.num_hidden_layers for i, layer_attention in enumerate(out.attentions): batched_outputs[f"layer_{i}/attention"] = layer_attention # Request gradients after the forward pass. # Note: hidden_states[0] includes position and segment encodings, as well as # subword embeddings. if self.compute_grads: # <torch.float32>[batch_size, num_tokens, emb_dim] scalar_pred_for_gradients = torch.max(batched_outputs["probas"], dim=1, keepdim=False, out=None)[0] batched_outputs["input_emb_grad"] = torch.autograd.grad( scalar_pred_for_gradients, out.hidden_states[0], grad_outputs=torch.ones_like(scalar_pred_for_gradients))[0] # Post-process outputs. # Return as NumPy for further processing. detached_outputs = { k: v.cpu().detach().numpy() for k, v in batched_outputs.items() } # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(detached_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[:ntok]) # set token gradients if self.compute_grads: output["token_grad_sentence"] = output["input_emb_grad"][:ntok] # Process attention. for key in output: if not re.match(r"layer_(\d+)/attention", key): continue # Select only real tokens, since most of this matrix is padding. # <float32>[num_heads, max_seq_length, max_seq_length] # -> <float32>[num_heads, num_tokens, num_tokens] output[key] = output[key][:, :ntok, :ntok].transpose((0, 2, 1)) # Make a copy of this array to avoid memory leaks, since NumPy otherwise # keeps a pointer around that prevents the source array from being GCed. output[key] = output[key].copy() yield output
def predict_minibatch(self, inputs): """Make predictions for the given batch of inputs.""" # Preprocess to ids and masks, and make the input batch. encoded_input = self.sentiment_model.tokenize([inp["tweet"] for inp in inputs]) # Check and send to cuda (GPU) if available if torch.cuda.is_available(): self.model.cuda() for tensor in encoded_input: encoded_input[tensor] = encoded_input[tensor].cuda() # Run a forward pass. with torch.set_grad_enabled(self.compute_grads): logits, embs, unused_attentions = self.model(**encoded_input).values() # Post-process outputs. batched_outputs = { "probas": softmax(logits, dim=-1), "input_ids": encoded_input["input_ids"], "ntok": torch.sum(encoded_input["attention_mask"], dim=1), "cls_emb": embs[-1][:, 0], # last layer, first token (is the cls token that's used for classification) } # Add attention layers to batched_outputs for i, layer_attention in enumerate(unused_attentions): batched_outputs[f"layer_{i}/attention"] = layer_attention # Request gradients after the forward pass. # Note: embs[0] includes position and segment encodings, as well as sub-word embeddings. if self.compute_grads: # <torch.float32>[batch_size, num_tokens, emb_dim] scalar_pred_for_gradients = torch.max( batched_outputs["probas"], dim=1, keepdim=False, out=None, )[0] batched_outputs["input_emb_grad"] = torch.autograd.grad( scalar_pred_for_gradients, embs[0], grad_outputs=torch.ones_like(scalar_pred_for_gradients), )[0] # Return as NumPy for further processing. detached_outputs = {k: v.cpu().detach().numpy() for k, v in batched_outputs.items()} # Unbatch outputs so we get one record per input example. for output in utils.unbatch_preds(detached_outputs): ntok = output.pop("ntok") output["tokens"] = self.tokenizer.convert_ids_to_tokens( output.pop("input_ids")[:ntok]) # set token gradients if self.compute_grads: output["token_grad_sentence"] = output["input_emb_grad"][:ntok] # Process attention. for key in output: if not re.match(r"layer_(\d+)/attention", key): continue # Select only real tokens, since most of this matrix is padding. # <float32>[num_heads, max_seq_length, max_seq_length] # -> <float32>[num_heads, num_tokens, num_tokens] output[key] = output[key][:, :ntok, :ntok].transpose((0, 2, 1)) # Make a copy of this array to avoid memory leaks, since NumPy otherwise # keeps a pointer around that prevents the source array from being GCed. output[key] = output[key].copy() yield output
def predict_minibatch(self, inputs: Iterable[JsonDict]): # Use watch_accessed_variables to save memory by having the tape do nothing # if we don't need gradients. with tf.GradientTape( watch_accessed_variables=self.config.compute_grads) as tape: encoded_input = self._preprocess(inputs) # Gathers word embeddings from BERT model embedding layer using input ids # of the tokens. input_ids = encoded_input["input_ids"] word_embeddings = self.model.bert.embeddings.word_embeddings # <tf.float32>[batch_size, num_tokens, emb_size] input_embs = tf.gather(word_embeddings, input_ids) # Scatter in any passed in embeddings. # <tf.float32>[batch_size, num_tokens, emb_size] input_embs = self.scatter_all_embeddings(inputs, input_embs) tape.watch(input_embs) # Watch input_embs for gradient calculation. model_inputs = encoded_input.copy() model_inputs["input_ids"] = None out: transformers.modeling_tf_outputs.TFSequenceClassifierOutput = \ self.model(model_inputs, inputs_embeds=input_embs, training=False, output_hidden_states=True, output_attentions=True, return_dict=True) batched_outputs = { "input_ids": encoded_input["input_ids"], "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1), "cls_emb": out.hidden_states[-1][:, 0], # last layer, first token "input_embs": input_embs, } # First entry is embeddings, then output from each transformer layer. assert len(out.hidden_states) == self.model.config.num_hidden_layers + 1 # <float32>[batch_size, num_tokens, 1] token_mask = tf.expand_dims( tf.cast(encoded_input["attention_mask"], tf.float32), axis=2) # <float32>[batch_size, 1] denom = tf.reduce_sum(token_mask, axis=1) for i, layer_output in enumerate(out.hidden_states): # layer_output is <float32>[batch_size, num_tokens, emb_dim] # average over tokens to get <float32>[batch_size, emb_dim] batched_outputs[f"layer_{i}/avg_emb"] = tf.reduce_sum( layer_output * token_mask, axis=1) / denom assert len(out.attentions) == self.model.config.num_hidden_layers for i, layer_attention in enumerate(out.attentions): batched_outputs[f"layer_{i+1}/attention"] = layer_attention if self.is_regression: # <tf.float32>[batch_size] batched_outputs["score"] = tf.squeeze(out.logits, axis=-1) scalar_pred_for_gradients = batched_outputs["score"] else: # <tf.float32>[batch_size, num_labels] batched_outputs["probas"] = tf.nn.softmax(out.logits, axis=-1) # If a class for the gradients has been specified in the input, # calculate gradients for that class. Otherwise, calculate gradients for # the arg_max class. arg_max = tf.math.argmax(batched_outputs["probas"], axis=-1).numpy() grad_classes = [ex.get("grad_class", arg_max[i]) for (i, ex) in enumerate(inputs)] # Convert the class names to indices if needed. grad_classes = [self.config.labels.index(label) if isinstance(label, str) else label for label in grad_classes] gather_indices = list(enumerate(grad_classes)) # <tf.float32>[batch_size] scalar_pred_for_gradients = tf.gather_nd(batched_outputs["probas"], gather_indices) if self.config.compute_grads: batched_outputs["grad_class"] = tf.convert_to_tensor(grad_classes) # Request gradients after the tape is run. # Note: embs[0] includes position and segment encodings, as well as subword # embeddings. if self.config.compute_grads: # <tf.float32>[batch_size, num_tokens, emb_dim] batched_outputs["input_emb_grad"] = tape.gradient( scalar_pred_for_gradients, input_embs) detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()} # Sequence of dicts, one per example. unbatched_outputs = utils.unbatch_preds(detached_outputs) return map(self._postprocess, unbatched_outputs)
def predict_minibatch(self, inputs): features = self._make_feature_columns(inputs) probas = self.model(features) # <tf.float32>[batch_size, 1] preds = {'proba': tf.squeeze(probas, axis=-1).numpy()} return list(utils.unbatch_preds(preds))