def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, layer_norm: Optional[MaskedLayerNorm] = None, dropout: float = None, loss_scale: Union[float, str] = 1.0, remove_bos_eos: bool = True) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder self._layer_norm = layer_norm or (lambda x: x) if not contextualizer.is_bidirectional(): raise ConfigurationError("contextualizer must be bidirectional") self._contextualizer = contextualizer # The dimension for making predictions just in the forward # (or backward) direction. self._forward_dim = contextualizer.get_output_dim() // 2 # TODO(joelgrus): Allow SampledSoftmaxLoss here by configuration self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self._loss_scale = loss_scale self._remove_bos_eos = remove_bos_eos
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary, positive_label: int = 1) -> None: super().__init__(vocab) # We need the embeddings to convert word IDs to their vector representations self.word_embeddings = word_embeddings self.encoder = encoder # After converting a sequence of vectors to a single vector, we feed it into # a fully-connected linear layer to reduce the dimension to the total number of labels. self.linear = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive) self.accuracy = CategoricalAccuracy() self.f1_measure = F1Measure(positive_label) # We use the cross entropy loss because this is a classification task. # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss, # which makes it unnecessary to add a separate softmax layer. self.loss_function = torch.nn.CrossEntropyLoss() self.W = nn.Parameter( torch.zeros(size=(2 * encoder.get_output_dim(), 1))) nn.init.xavier_uniform_(self.W.data) self.LeakyReLU = torch.nn.LeakyReLU(0.1)
def __init__(self, word_embeddings: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, relation_encoder: Seq2VecEncoder, vocab: Vocabulary, encoder_dropout: float = 0.5) -> None: # We have to pass the vocabulary to the constructor. super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout) self.text_encoder = text_encoder self.text_attn = LinearAttention( input_dim=text_encoder.get_output_dim()) self.relation_encoder = relation_encoder self.relation_attn = BilinearAttention( vector_dim=text_encoder.get_output_dim(), matrix_dim=relation_encoder.get_output_dim()) hidden_dim = (text_encoder.get_output_dim() + relation_encoder.get_output_dim()) self.output = torch.nn.Linear(in_features=hidden_dim, out_features=1)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder = None, dropout: float = 0.0, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer=InitializerApplicator(), **kwargs) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder self._contextualizer = contextualizer self._bidirectional = bidirectional if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() self._softmax_loss = SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) self._perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings, ) else: self._softmax_loss = SoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, ) # This buffer is now unused and exists only for backwards compatibility reasons. self.register_buffer("_last_average_loss", torch.zeros(1)) self._perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__( self, highway_encoder: Seq2SeqEncoder, transform_gate_encoder: Seq2SeqEncoder, carry_gate_encoder: Optional[Seq2SeqEncoder] = None, projection: bool = True, ) -> None: stateful = highway_encoder.stateful or transform_gate_encoder.stateful check_dimensions_match( highway_encoder.get_input_dim(), transform_gate_encoder.get_input_dim(), "highway_encoder input dim", "transform_gate_encoder input dim", ) if carry_gate_encoder is not None: stateful = stateful or carry_gate_encoder.stateful check_dimensions_match( highway_encoder.get_input_dim(), carry_gate_encoder.get_input_dim(), "highway_encoder input dim", "carry_gate_encoder input dim", ) super().__init__(stateful=stateful) self._input_dim = highway_encoder.get_input_dim() self._highway_encoder = highway_encoder self._transform_gate_encoder = transform_gate_encoder self._carry_gate_encoder = carry_gate_encoder self._highway_projection: Optional[torch.nn.Module] = None self._transform_gate_projection: Optional[torch.nn.Module] = None self._carry_gate_projection: Optional[torch.nn.Module] = None if projection: self._highway_projection = TimeDistributed( # type: ignore torch.nn.Linear( highway_encoder.get_output_dim(), highway_encoder.get_input_dim(), )) self._transform_gate_projection = TimeDistributed( # type: ignore torch.nn.Linear( transform_gate_encoder.get_output_dim(), transform_gate_encoder.get_input_dim(), ), ) if carry_gate_encoder is not None: self._carry_gate_projection = TimeDistributed( # type: ignore torch.nn.Linear( carry_gate_encoder.get_output_dim(), carry_gate_encoder.get_input_dim(), ), ) else: assert highway_encoder.get_output_dim() in (self._input_dim, 1) assert transform_gate_encoder.get_output_dim() in (self._input_dim, 1) if carry_gate_encoder is not None: assert carry_gate_encoder.get_output_dim() in (self._input_dim, 1)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.dbg_ctr = 0 self.word_embeddings = word_embeddings self.encoder = encoder.cuda(CUDA_DEVICE) self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')).cuda(CUDA_DEVICE) self.accuracy = CategoricalAccuracy() self.blank_index = vocab.get_token_index("_")
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, loss_scale: Union[float, str] = 1.0, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() # TODO(joelgrus): more sampled softmax configuration options, as needed. if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) # TODO(brendanr): Output perplexity here. e^loss self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self._loss_scale = loss_scale if initializer is not None: initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None) -> None: super().__init__(vocab) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() # TODO(joelgrus): more sampled softmax configuration options, as needed. if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) # TODO(brendanr): Output perplexity here. e^loss self.register_buffer('_last_average_loss', torch.zeros(1)) if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def __init__( self, #### The embedding layer is specified as an AllenNLP <code>TextFieldEmbedder</code> #### which represents a general way of turning tokens into tensors. #### (Here we know that we want to represent each unique word with a learned tensor, #### but using the general class allows us to easily experiment with different types #### of embeddings, for example <a href = "https://allennlp.org/elmo">ELMo</a>.) word_embeddings: TextFieldEmbedder, #### Similarly, the encoder is specified as a general <code>Seq2SeqEncoder</code> #### even though we know we want to use an LSTM. Again, this makes it easy to #### experiment with other sequence encoders, for example a Transformer. encoder: Seq2SeqEncoder, #### Every AllenNLP model also expects a <code>Vocabulary</code>, #### which contains the namespaced mappings of tokens to indices and labels to indices. vocab: Vocabulary ) -> None: #### Notice that we have to pass the vocab to the base class constructor. super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder #### The feed forward layer is not passed in as a parameter, but is constructed by us. #### Notice that it looks at the encoder to find the correct input dimension and looks #### at the vocabulary (and, in particular, at the label -> index mapping) to find the correct output dimension. self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) #### The last thing to notice is that we also instantiate a #### <code>CategoricalAccuracy</code> metric, which we'll use to track accuracy #### during each training and validation epoch. self.accuracy = CategoricalAccuracy()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.vocab = vocab self.label_vocab = vocab.get_index_to_token_vocabulary( namespace='labels') inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim()) self.class_avgs = [ inf_vec.clone() for i in range(len(self.label_vocab)) ] self.accuracy = CategoricalAccuracy() self.debug = False if self.debug: print("===MODEL DEBUG===") print( "Number of embeddings:", self.word_embeddings._token_embedders['tokens'].num_embeddings) # print("Token embedders:", self.word_embeddings._token_embedders) # print("Embedding weights", self.word_embeddings._token_embedders['tokens'].weight) print("vocab:", vocab) print("===MODEL DEBUG===")
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout: float = 0.1, ff_dim: int = 100): super().__init__(vocab) self.embedder = embedder self.encoder = encoder assert self.embedder.get_output_dim() == self.encoder.get_input_dim() self.feedforward = FeedForward( encoder.get_output_dim(), 1, hidden_dims=ff_dim, activations=Activation.by_name('relu')(), dropout=dropout) self.out = torch.nn.Linear( in_features=self.feedforward.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.crf = ConditionalRandomField(vocab.get_vocab_size('labels')) self.f1 = FBetaMeasure(average='micro') self.accuracy = CategoricalAccuracy() self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
def __init__( self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout: float = 0.2, decoder_hidden_dim: int = 128, decoder_ff_dim: int = 128, decoder_num_layers: int = 1, decoder_num_heads: int = 4, teacher_forcing: float = 1.0, num_teacher_forcing_steps: int = None, num_tags: int = 2, label_smoothing: float = None, ): super().__init__(vocab) # teacher forcing is how often we choose to force the correct answer. self.embedder = embedder self.encoder = encoder self.laserdecoder = LaserDecoder( hidden_dim=decoder_hidden_dim, encoder_dim=encoder.get_output_dim(), num_layers=decoder_num_layers, ff_dim=decoder_ff_dim, num_heads=decoder_num_heads, num_classes=num_tags, ) self.dropout = torch.nn.Dropout(dropout) self.accuracy = CategoricalAccuracy() self.f1 = F1Measure(1) self.teacher_forcing = teacher_forcing self.num_tf_steps = num_teacher_forcing_steps self.cur_tf_steps = 0 self.label_smoothing = label_smoothing
def __init__(self, word_embeddings: TextFieldEmbedder, sentence_encoder: Seq2SeqEncoder, document_encoder: Seq2SeqEncoder, vocab: Vocabulary, encoder_dropout: float = 0.0) -> None: # We have to pass the vocabulary to the constructor. super().__init__(vocab) self.word_embeddings = word_embeddings if encoder_dropout > 0: self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout) else: self.encoder_dropout = lambda x: x self.sentence_encoder = sentence_encoder self.sentence_attn = LinearSelfAttention( input_dim=self.sentence_encoder.get_output_dim(), bias=True) self.document_encoder = document_encoder self.document_attn = LinearSelfAttention( input_dim=self.document_encoder.get_output_dim(), bias=True) self.output = torch.nn.Linear( in_features=document_encoder.get_output_dim(), out_features=1)
def __init__(self, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.embedder = embedder self.encoder = encoder self.linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('pos')) self.accuracy = CategoricalAccuracy()
def __init__(self, vocab: Vocabulary, bert_embedder: Optional[PretrainedBertEmbedder] = None, encoder: Optional[Seq2SeqEncoder] = None, dropout: Optional[float] = None, use_crf: bool = True) -> None: super().__init__(vocab) if bert_embedder: self.use_bert = True self.bert_embedder = bert_embedder else: self.use_bert = False self.basic_embedder = BasicTextFieldEmbedder({ "tokens": Embedding(vocab.get_vocab_size(namespace="tokens"), 1024) }) self.rnn = Seq2SeqEncoder.from_params(Params({ "type": "lstm", "input_size": 1024, "hidden_size": 512, "bidirectional": True, "batch_first": True })) self.encoder = encoder if encoder: hidden2tag_in_dim = encoder.get_output_dim() else: hidden2tag_in_dim = bert_embedder.get_output_dim() self.hidden2tag = TimeDistributed(torch.nn.Linear( in_features=hidden2tag_in_dim, out_features=vocab.get_vocab_size("labels"))) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.use_crf = use_crf if use_crf: crf_constraints = allowed_transitions( constraint_type="BIO", labels=vocab.get_index_to_token_vocabulary("labels") ) self.crf = ConditionalRandomField( num_tags=vocab.get_vocab_size("labels"), constraints=crf_constraints, include_start_end_transitions=True ) self.f1 = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["news/type","negation", "demonstrative_reference", "timer/noun","timer/attributes"], label_encoding="BIO")
def build_decoder(task, d_inp, vocab, embedder, args): ''' Build a task specific decoder ''' rnn = s2s_e.by_name('lstm').from_params( Params({'input_size': embedder.get_output_dim(), 'hidden_size': args.d_hid_dec, 'num_layers': args.n_layers_dec, 'bidirectional': False})) decoder = SentenceEncoder(vocab, embedder, 0, rnn) hid2voc = nn.Linear(args.d_hid_dec, args.max_word_v_size) return decoder, hid2voc
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self._f1_metric = SpanBasedF1Measure( vocab, 'labels') # SpanBasedF1Measure: NER の評価
def test_stacked_bidirectional_lstm_can_build_from_params(self): params = Params({"type": "stacked_bidirectional_lstm", "input_size": 5, "hidden_size": 9, "num_layers": 3}) encoder = Seq2SeqEncoder.from_params(params) assert encoder.get_input_dim() == 5 assert encoder.get_output_dim() == 18 assert encoder.is_bidirectional
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy()
def __init__(self, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.embedder = embedder self.encoder = encoder self.hidden2labels = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy() self.f1 = SpanBasedF1Measure(vocab, tag_namespace='labels')
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, hparams: Dict, ) -> None: super().__init__(vocab) self.text_field_embedder = text_field_embedder self.contextualizer = contextualizer self.bidirectional = contextualizer.is_bidirectional() if self.bidirectional: self.forward_dim = contextualizer.get_output_dim() // 2 else: self.forward_dim = contextualizer.get_output_dim() dropout = hparams["dropout"] if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = lambda x: x self.hidden2chord = torch.nn.Sequential( torch.nn.Linear(self.forward_dim, hparams["fc_hidden_dim"]), torch.nn.ReLU(True), torch.nn.Linear(hparams["fc_hidden_dim"], vocab.get_vocab_size()), ) self.perplexity = PerplexityCustom() self.accuracy = CategoricalAccuracy() self.real_loss = Average() self.similarity_matrix = hparams["similarity_matrix"] self.training_mode = hparams["training_mode"] self.T_initial = hparams["T_initial"] self.T = self.T_initial self.decay_rate = hparams["decay_rate"] self.batches_per_epoch = hparams["batches_per_epoch"] self.epoch = 0 self.batch_counter = 0
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder) -> None: super().__init__(vocab) self._embedder = embedder self._encoder = encoder self._classifier = nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.f1 = SpanBasedF1Measure(vocab, 'labels')
def __init__( self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary, dropout: float = 0.5, n_linear_layers=1, ) -> None: """ :param word_embeddings: the embeddings to start with :param encoder: the seq2seq transformer of embeddings can be LSTM for example :param vocab: dataset input and output vocabulary """ super(BaseTextClassifier, self).__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder # Representations this is the layer that is just above the last layer and the non linearity (hidden[-1]) # is is used to calculate FID score, and similar metrics that's why we expose it into self.representations # class attribute self.representations = self.encoder if n_linear_layers > 0: extra_hiddens = [] for k in range(n_linear_layers): extra_hiddens += [ nn.Linear(self.encoder.get_output_dim(), self.encoder.get_output_dim()), nn.ReLU(True) ] self.extra_hiddens = nn.Sequential(*extra_hiddens) else: self.extra_hiddens = None self.hidden2label = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # self.accuracy = CategoricalAccuracy() self.criterion = CrossEntropyLoss() self.metrics = { "accuracy": CategoricalAccuracy(), "hinge-loss": Loss(HingeEmbeddingLoss()), "huber-loss": Loss(SmoothL1Loss()), "cross-entropy-loss": Loss(CrossEntropyLoss()), "confidence": Confidence() } self.dropout = nn.Dropout(dropout)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, forward_segmental_contextualizer: Seq2SeqEncoder, backward_segmental_contextualizer: Seq2SeqEncoder, label_feature_dim: int, softmax_projection_dim: int, label_namespace: str = "labels", dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = True, initializer: InitializerApplicator = None) -> None: super().__init__(vocab=vocab, text_field_embedder=text_field_embedder, contextualizer=contextualizer, dropout=dropout, num_samples=num_samples, sparse_embeddings=sparse_embeddings, bidirectional=bidirectional, initializer=initializer) self._forward_segmental_contextualizer = forward_segmental_contextualizer self._backward_segmental_contextualizer = backward_segmental_contextualizer if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=softmax_projection_dim, num_samples=num_samples, sparse=sparse_embeddings) else: self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=softmax_projection_dim) self.num_classes = self.vocab.get_vocab_size(label_namespace) self.label_feature_embedding = Embedding(self.num_classes, label_feature_dim) self._forward_dim = contextualizer.get_output_dim() // 2 + \ forward_segmental_contextualizer.get_output_dim() // 2 + \ label_feature_dim self.projection_layer = TimeDistributed(Linear(self._forward_dim, softmax_projection_dim))
def __init__(self, word_embeddings: TextFieldEmbedder, sequence_encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.sequence_encoder = sequence_encoder # Fully connected layer from sequence encoding to tags self.fc = torch.nn.Linear( in_features=sequence_encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary, num_categories: int) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder self.hidden2tag = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy() self.num_categories = num_categories self.fms = [F1Measure(i) for i in range(1, self.num_categories + 1)]
def build_decoder(task, d_inp, vocab, embedder, args): """ Build a task specific decoder """ rnn = s2s_e.by_name("lstm").from_params( Params({ "input_size": embedder.get_output_dim(), "hidden_size": args.s2s["d_hid_dec"], "num_layers": args.s2s["n_layers_dec"], "bidirectional": False, })) decoder = SentenceEncoder(vocab, embedder, 0, rnn) hid2voc = nn.Linear(args.s2s["d_hid_dec"], args.max_word_v_size) return decoder, hid2voc
def build_pair_attn(d_in, use_attn, d_hid_attn): ''' Build the pair model ''' if not use_attn: pair_attn = None else: d_inp_model = 2 * d_in modeling_layer = s2s_e.by_name('lstm').from_params( Params({'input_size': d_inp_model, 'hidden_size': d_hid_attn, 'num_layers': 1, 'bidirectional': True})) pair_attn = AttnPairEncoder(vocab, modeling_layer, dropout=params["dropout"]) return pair_attn
def __init__( self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, # you pass in the model with layers here. LSTM, etc. ): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size( "tokens") #get from the tokens namespace self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
def build_pair_attn(d_in, d_hid_attn): """ Build the pair model """ d_inp_model = 2 * d_in modeling_layer = s2s_e.by_name("lstm").from_params( Params({ "input_size": d_inp_model, "hidden_size": d_hid_attn, "num_layers": 1, "bidirectional": True, })) pair_attn = AttnPairEncoder(model.vocab, modeling_layer, dropout=params["dropout"]) return pair_attn
def __init__(self, #### The embedding layer is specified as an AllenNLP <code>TextFieldEmbedder</code> which represents a general way of turning tokens into tensors. (Here we know that we want to represent each unique word with a learned tensor, but using the general class allows us to easily experiment with different types of embeddings, for example <a href = "https://allennlp.org/elmo">ELMo</a>.) word_embeddings: TextFieldEmbedder, #### Similarly, the encoder is specified as a general <code>Seq2SeqEncoder</code> even though we know we want to use an LSTM. Again, this makes it easy to experiment with other sequence encoders, for example a Transformer. encoder: Seq2SeqEncoder, #### Every AllenNLP model also expects a <code>Vocabulary</code>, which contains the namespaced mappings of tokens to indices and labels to indices. vocab: Vocabulary) -> None: #### Notice that we have to pass the vocab to the base class constructor. super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder #### The feed forward layer is not passed in as a parameter, but is constructed by us. Notice that it looks at the encoder to find the correct input dimension and looks at the vocabulary (and, in particular, at the label -> index mapping) to find the correct output dimension. self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) #### The last thing to notice is that we also instantiate a <code>CategoricalAccuracy</code> metric, which we'll use to track accuracy during each training and validation epoch. self.accuracy = CategoricalAccuracy()
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to arguments args: - args (TODO): object with attributes: - vocab (Vocab): - pretrained_embs (TODO): word embeddings to use returns ''' d_word, n_layers_highway = args.d_word, args.n_layers_highway # Build embedding layers if args.glove: word_embs = pretrained_embs train_embs = bool(args.train_words) else: log.info("\tLearning embeddings from scratch!") word_embs = None train_embs = True word_embedder = Embedding(vocab.get_vocab_size('tokens'), d_word, weight=word_embs, trainable=train_embs, padding_index=vocab.get_token_index('@@PADDING@@')) d_inp_phrase = 0 # Handle elmo and cove token_embedder = {} if args.elmo: log.info("\tUsing ELMo embeddings!") if args.deep_elmo: n_reps = 2 log.info("\tUsing deep ELMo embeddings!") else: n_reps = 1 if args.elmo_no_glove: log.info("\tNOT using GLoVe embeddings!") else: token_embedder = {"words": word_embedder} log.info("\tUsing GLoVe embeddings!") d_inp_phrase += d_word elmo = Elmo(options_file=ELMO_OPT_PATH, weight_file=ELMO_WEIGHTS_PATH, num_output_representations=n_reps) d_inp_phrase += 1024 else: elmo = None token_embedder = {"words": word_embedder} d_inp_phrase += d_word text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \ else None d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase if args.cove: cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'), vectors=word_embedder.weight.data) d_inp_phrase += 600 log.info("\tUsing CoVe embeddings!") else: cove_layer = None # Build encoders phrase_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_phrase, 'hidden_size': d_hid_phrase, 'num_layers': args.n_layers_enc, 'bidirectional': True})) if args.pair_enc == 'bow': sent_encoder = BoWSentEncoder(vocab, text_field_embedder) # maybe should take in CoVe/ELMO? pair_encoder = None # model will just run sent_encoder on both inputs else: # output will be 2 x d_hid_phrase (+ deep elmo) sent_encoder = HeadlessSentEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, dropout=args.dropout, cove_layer=cove_layer, elmo_layer=elmo) d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 if args.pair_enc == 'simple': # output will be 4 x [2 x d_hid_phrase (+ deep elmo)] pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, cove_layer=cove_layer, elmo_layer=elmo, dropout=args.dropout) d_pair = d_single elif args.pair_enc == 'attn': log.info("\tUsing attention!") d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 d_hid_model = d_hid_phrase # make it as large as the original sentence encoding modeling_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_model, 'hidden_size': d_hid_model, 'num_layers': 1, 'bidirectional': True})) pair_encoder = HeadlessPairAttnEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, DotProductSimilarity(), modeling_layer, cove_layer=cove_layer, elmo_layer=elmo, deep_elmo=args.deep_elmo, dropout=args.dropout) d_pair = 2 * d_hid_phrase # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase # = 4 x [2 x 2 x d_hid_phrase] # Build model and classifiers model = MultiTaskModel(args, sent_encoder, pair_encoder) build_classifiers(tasks, model, d_pair, d_single) if args.cuda >= 0: model = model.cuda() return model