def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, layer_norm: Optional[MaskedLayerNorm] = None, dropout: float = None, loss_scale: Union[float, str] = 1.0, remove_bos_eos: bool = True) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder self._layer_norm = layer_norm or (lambda x: x) if not contextualizer.is_bidirectional(): raise ConfigurationError("contextualizer must be bidirectional") self._contextualizer = contextualizer # The dimension for making predictions just in the forward # (or backward) direction. self._forward_dim = contextualizer.get_output_dim() // 2 # TODO(joelgrus): Allow SampledSoftmaxLoss here by configuration self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self._loss_scale = loss_scale self._remove_bos_eos = remove_bos_eos
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings, ) else: self._softmax_loss = SoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, ) # This buffer is now unused and exists only for backwards compatibility reasons. self.register_buffer("_last_average_loss", torch.zeros(1)) self._perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, loss_scale: Union[float, str] = 1.0, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() # TODO(joelgrus): more sampled softmax configuration options, as needed. if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) # TODO(brendanr): Output perplexity here. e^loss self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self._loss_scale = loss_scale if initializer is not None: initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() # TODO(joelgrus): more sampled softmax configuration options, as needed. if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) # TODO(brendanr): Output perplexity here. e^loss self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, hparams: Dict, ) -> None: super().__init__(vocab) self.text_field_embedder = text_field_embedder self.contextualizer = contextualizer self.bidirectional = contextualizer.is_bidirectional() if self.bidirectional: self.forward_dim = contextualizer.get_output_dim() // 2 else: self.forward_dim = contextualizer.get_output_dim() dropout = hparams["dropout"] if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = lambda x: x self.hidden2chord = torch.nn.Sequential( torch.nn.Linear(self.forward_dim, hparams["fc_hidden_dim"]), torch.nn.ReLU(True), torch.nn.Linear(hparams["fc_hidden_dim"], vocab.get_vocab_size()), ) self.perplexity = PerplexityCustom() self.accuracy = CategoricalAccuracy() self.real_loss = Average() self.similarity_matrix = hparams["similarity_matrix"] self.training_mode = hparams["training_mode"] self.T_initial = hparams["T_initial"] self.T = self.T_initial self.decay_rate = hparams["decay_rate"] self.batches_per_epoch = hparams["batches_per_epoch"] self.epoch = 0 self.batch_counter = 0
def embed_and_encode_ques_contexts( text_field_embedder: TextFieldEmbedder, qencoder: Seq2SeqEncoder, batch_size: int, question: Dict[str, torch.LongTensor], contexts: Dict[str, torch.LongTensor], ): """ Embed and Encode question and contexts Parameters: ----------- text_field_embedder: ``TextFieldEmbedder`` qencoder: ``Seq2SeqEncoder`` question: Dict[str, torch.LongTensor] Output of a TextField. Should yield tensors of shape (B, ques_length, D) contexts: Dict[str, torch.LongTensor] Output of a TextField. Should yield tensors of shape (B, num_contexts, ques_length, D) Returns: --------- embedded_questions: List[(ques_length, D)] Batch-sized list of embedded questions from the text_field_embedder encoded_questions: List[(ques_length, D)] Batch-sized list of encoded questions from the qencoder questions_mask: List[(ques_length)] Batch-sized list of questions masks encoded_ques_tensor: Shape: (batch_size, ques_len, D) Output of the qencoder questions_mask_tensor: Shape: (batch_size, ques_length) Questions mask as a tensor ques_encoded_final_state: Shape: (batch_size, D) For each question, the final state of the qencoder embedded_contexts: List[(num_contexts, context_length, D)] Batch-sized list of embedded contexts for each instance from the text_field_embedder contexts_mask: List[(num_contexts, context_length)] Batch-sized list of contexts_mask for each context in the instance """ # Shape: (B, question_length, D) embedded_questions_tensor = text_field_embedder(question) # Shape: (B, question_length) questions_mask_tensor = allenutil.get_text_field_mask(question).float() embedded_questions = [embedded_questions_tensor[i] for i in range(batch_size)] questions_mask = [questions_mask_tensor[i] for i in range(batch_size)] # Shape: (B, ques_len, D) encoded_ques_tensor = qencoder(embedded_questions_tensor, questions_mask_tensor) # Shape: (B, D) ques_encoded_final_state = allenutil.get_final_encoder_states( encoded_ques_tensor, questions_mask_tensor, qencoder.is_bidirectional() ) encoded_questions = [encoded_ques_tensor[i] for i in range(batch_size)] # # contexts is a (B, num_contexts, context_length, *) tensors # (tokenindexer, indexed_tensor) = next(iter(contexts.items())) # num_contexts = indexed_tensor.size()[1] # # Making a separate batched token_indexer_dict for each context -- [{token_inderxer: (C, T, *)}] # contexts_indices_list: List[Dict[str, torch.LongTensor]] = [{} for _ in range(batch_size)] # for token_indexer_name, token_indices_tensor in contexts.items(): # print(f"{token_indexer_name}: {token_indices_tensor.size()}") # for i in range(batch_size): # contexts_indices_list[i][token_indexer_name] = token_indices_tensor[i, ...] # # # Each tensor of shape (num_contexts, context_len, D) # embedded_contexts = [] # contexts_mask = [] # # Shape: (num_contexts, context_length, D) # for i in range(batch_size): # embedded_contexts_i = text_field_embedder(contexts_indices_list[i]) # embedded_contexts.append(embedded_contexts_i) # contexts_mask_i = allenutil.get_text_field_mask(contexts_indices_list[i]).float() # contexts_mask.append(contexts_mask_i) embedded_contexts_tensor = text_field_embedder(contexts, num_wrapping_dims=1) contexts_mask_tensor = allenutil.get_text_field_mask(contexts, num_wrapping_dims=1).float() embedded_contexts = [embedded_contexts_tensor[i] for i in range(batch_size)] contexts_mask = [contexts_mask_tensor[i] for i in range(batch_size)] return ( embedded_questions, encoded_questions, questions_mask, encoded_ques_tensor, questions_mask_tensor, ques_encoded_final_state, embedded_contexts, contexts_mask, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, aux_contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None, regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer_lang1 = aux_contextualizer self._contextualizer_lang2 = copy.deepcopy(aux_contextualizer) self._contextualizer = contextualizer self._bidirectional = bidirectional self._bidirectional_aux = aux_contextualizer.is_bidirectional() # The dimension for making predictions just in the forward # (or backward) direction. # main contextualizer forward dim if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() # aux contextualizer forward dim if self._bidirectional_aux: self._forward_dim_aux = aux_contextualizer.get_output_dim() // 2 else: self._forward_dim_aux = aux_contextualizer.get_output_dim() # TODO(joelgrus): more sampled softmax configuration options, as needed. if num_samples is not None: self._lang1_softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim_aux, num_samples=num_samples, sparse=sparse_embeddings, ) self._lang2_softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim_aux, num_samples=num_samples, sparse=sparse_embeddings, ) self._cm_softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings, ) else: self._lang1_softmax_loss = _SoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim_aux) self._lang2_softmax_loss = _SoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim_aux) self._cm_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) # This buffer is now unused and exists only for backwards compatibility reasons. self.register_buffer("_last_average_loss", torch.zeros(1)) self._lang1_perplexity = Perplexity() self._lang2_perplexity = Perplexity() self._cm_perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, labeler: Seq2SeqEncoder, projection_size: int, bidirectional: bool = False, use_hypothesis: bool = True, attention: str = "", # "" - none / cosine / bilinear initializer: InitializerApplicator = None, classifier_dir = "", del_perc_lambda = 1, del_perc = 0.3, del_metric_threshold = 0.1, teacher_lambda = 0.0, coverage_lambda = 0.0, transition_lamb = 0.0, gumbel = True, neutral_label = "") -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self.classifier_dir = classifier_dir self.classifier = None self.coverage_lambda = coverage_lambda self.del_perc_lambda = del_perc_lambda self.del_perc = del_perc self.teacher_lambda = teacher_lambda self.transition_lamb = transition_lamb self.gumbel = gumbel if classifier_dir != "": overrides = '{"model": {"dropout": 0, "output_feedforward": {"dropout": 0}}}' overrides = "" archive = load_archive(classifier_dir, overrides=overrides) self.classifier = archive.model # Freeze parameters for p in self.classifier.parameters(): p.requires_grad = False # A hack that prevents allennlp from crushing when running extend on all submodules def foo(*x, **y): return 1 self.classifier._text_field_embedder.token_embedder_tokens.extend_vocab = foo self.classifier.eval() # get index of the neutral label self.neutral_ind = self.classifier.vocab.get_token_index(neutral_label, 'labels') self.criterion = torch.nn.CrossEntropyLoss() self._contextualizer = contextualizer self._labeler = labeler self._bidirectional = bidirectional self.use_hypothesis = use_hypothesis self.attention = attention self.projection_size = projection_size # hypothesis aggr self.w_prem = torch.nn.Linear(contextualizer.get_output_dim(), projection_size) if use_hypothesis: self.w_hyp = torch.nn.Linear(contextualizer.get_output_dim(), projection_size) self._contextual_dim = contextualizer.get_output_dim() # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = self._contextual_dim // 2 else: self._forward_dim = self._contextual_dim if self.attention: if self.attention == "cosine": self.attention_mat = CosineMatrixAttention() elif self.attention == "bilinear": self.attention_mat = BilinearMatrixAttention(self._forward_dim, self._forward_dim) else: raise ConfigurationError("Undefined attention type") self.mask_linear = torch.nn.Linear(self._labeler.get_output_dim(), 2) self._accuracy = CategoricalAccuracy() self._avg_perc_masked = Average() self._avg_transition = Average() self._acc_vs_del = AccuracyVSDeletion(del_threshold=del_metric_threshold) self._acc_plus_del = AccuracyVSDeletion(del_threshold=0, aggr="sum") self._f1_deletions = F1SequenceMeasure(positive_label=1) if initializer is not None: initializer(self)