def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, attend_feedforward: FeedForward, similarity_function: SimilarityFunction, compare_feedforward: FeedForward, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(DecomposableAttention, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._attend_feedforward = TimeDistributed(attend_feedforward) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._compare_feedforward = TimeDistributed(compare_feedforward) self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder or premise_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(), "text field embedding dim", "attend feedforward input dim") check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels, "final output dimension", "number of labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def _read_embeddings_from_hdf5(embeddings_filename: str, embedding_dim: int, vocab: Vocabulary, namespace: str = "tokens") -> torch.FloatTensor: """ Reads from a hdf5 formatted file. The embedding matrix is assumed to be keyed by 'embedding' and of size ``(num_tokens, embedding_dim)``. """ with h5py.File(embeddings_filename, 'r') as fin: embeddings = fin['embedding'][...] if list(embeddings.shape) != [vocab.get_vocab_size(namespace), embedding_dim]: raise ConfigurationError( "Read shape {0} embeddings from the file, but expected {1}".format( list(embeddings.shape), [vocab.get_vocab_size(namespace), embedding_dim])) return torch.FloatTensor(embeddings)
def get_vocab(word2freq, max_v_sizes): '''Build vocabulary''' vocab = Vocabulary(counter=None, max_vocab_size=max_v_sizes['word']) words_by_freq = [(word, freq) for word, freq in word2freq.items()] words_by_freq.sort(key=lambda x: x[1], reverse=True) for word, _ in words_by_freq[:max_v_sizes['word']]: vocab.add_token_to_namespace(word, 'tokens') log.info("\tFinished building vocab. Using %d words", vocab.get_vocab_size('tokens')) return vocab
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def test_read_hdf5_raises_on_invalid_shape(self): vocab = Vocabulary() vocab.add_token_to_namespace("word") embeddings_filename = self.TEST_DIR + "embeddings.hdf5" embeddings = numpy.random.rand(vocab.get_vocab_size(), 10) with h5py.File(embeddings_filename, 'w') as fout: _ = fout.create_dataset( 'embedding', embeddings.shape, dtype='float32', data=embeddings ) params = Params({ 'pretrained_file': embeddings_filename, 'embedding_dim': 5, }) with pytest.raises(ConfigurationError): _ = Embedding.from_params(vocab, params)
def test_read_hdf5_format_file(self): vocab = Vocabulary() vocab.add_token_to_namespace("word") vocab.add_token_to_namespace("word2") embeddings_filename = self.TEST_DIR + "embeddings.hdf5" embeddings = numpy.random.rand(vocab.get_vocab_size(), 5) with h5py.File(embeddings_filename, 'w') as fout: _ = fout.create_dataset( 'embedding', embeddings.shape, dtype='float32', data=embeddings ) params = Params({ 'pretrained_file': embeddings_filename, 'embedding_dim': 5, }) embedding_layer = Embedding.from_params(vocab, params) assert numpy.allclose(embedding_layer.weight.data.numpy(), embeddings)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. """ num_embeddings = params.pop_int('num_embeddings', None) vocab_namespace = params.pop("vocab_namespace", "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embedding_file(pretrained_file, embedding_dim, vocab, vocab_namespace) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, beta: float, gamma: float, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, seq2seq_encoder: Seq2SeqEncoder = None, feedforward: Optional[FeedForward] = None, feedforward_hyp_only: Optional[FeedForward] = None, dropout: float = None, num_labels: int = None, label_namespace: str = "labels", evaluation_mode: bool = False, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.evaluation_mode = evaluation_mode self._text_field_embedder = text_field_embedder if seq2seq_encoder: self._seq2seq_encoder = seq2seq_encoder else: self._seq2seq_encoder = None self._seq2vec_encoder = seq2vec_encoder self._feedforward = feedforward self._feedforward_hyp_only = feedforward_hyp_only if feedforward is not None: self._classifier_input_dim = self._feedforward.get_output_dim() else: self._classifier_input_dim = self._seq2vec_encoder.get_output_dim() if feedforward_hyp_only is not None: self._classifier_hyp_only_input_dim = self._feedforward_hyp_only.get_output_dim() else: self._classifier_hyp_only_input_dim = self._seq2vec_encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size(namespace=self._label_namespace) self._classification_layer = torch.nn.Linear(self._classifier_input_dim, self._num_labels) self._classification_layer_hyp_only = torch.nn.Linear(self._classifier_hyp_only_input_dim, self._num_labels) self._beta = beta self._gamma = gamma self._accuracy = CategoricalAccuracy() self._hyp_only_accuracy = CategoricalAccuracy() self._element_cross_ent_loss = torch.nn.CrossEntropyLoss(reduction='none') self._cross_ent_loss = torch.nn.CrossEntropyLoss() initializer(self)
def _read_embeddings_from_text_file(file_uri: str, embedding_dim: int, vocab: Vocabulary, namespace: str = "tokens") -> torch.FloatTensor: """ Read pre-trained word vectors from an eventually compressed text file, possibly contained inside an archive with multiple files. The text file is assumed to be utf-8 encoded with space-separated fields: [word] [dim 1] [dim 2] ... Lines that contain more numerical tokens than ``embedding_dim`` raise a warning and are skipped. The remainder of the docstring is identical to ``_read_pretrained_embeddings_file``. """ tokens_to_keep = set(vocab.get_index_to_token_vocabulary(namespace).values()) vocab_size = vocab.get_vocab_size(namespace) embeddings = {} # First we read the embeddings from the file, only keeping vectors for the words we need. logger.info("Reading pretrained embeddings from file") with EmbeddingsTextFile(file_uri) as embeddings_file: for line in Tqdm.tqdm(embeddings_file): token = line.split(' ', 1)[0] if token in tokens_to_keep: fields = line.rstrip().split(' ') if len(fields) - 1 != embedding_dim: # Sometimes there are funny unicode parsing problems that lead to different # fields lengths (e.g., a word with a unicode space character that splits # into more than one column). We skip those lines. Note that if you have # some kind of long header, this could result in all of your lines getting # skipped. It's hard to check for that here; you just have to look in the # embedding_misses_file and at the model summary to make sure things look # like they are supposed to. logger.warning("Found line with wrong number of dimensions (expected: %d; actual: %d): %s", embedding_dim, len(fields) - 1, line) continue vector = numpy.asarray(fields[1:], dtype='float32') embeddings[token] = vector if not embeddings: raise ConfigurationError("No embeddings of correct dimension found; you probably " "misspecified your embedding_dim parameter, or didn't " "pre-populate your Vocabulary") all_embeddings = numpy.asarray(list(embeddings.values())) embeddings_mean = float(numpy.mean(all_embeddings)) embeddings_std = float(numpy.std(all_embeddings)) # Now we initialize the weight matrix for an embedding layer, starting with random vectors, # then filling in the word vectors we just read. logger.info("Initializing pre-trained embedding layer") embedding_matrix = torch.FloatTensor(vocab_size, embedding_dim).normal_(embeddings_mean, embeddings_std) num_tokens_found = 0 index_to_token = vocab.get_index_to_token_vocabulary(namespace) for i in range(vocab_size): token = index_to_token[i] # If we don't have a pre-trained vector for this word, we'll just leave this row alone, # so the word has a random initialization. if token in embeddings: embedding_matrix[i] = torch.FloatTensor(embeddings[token]) num_tokens_found += 1 else: logger.debug("Token %s was not found in the embedding file. Initialising randomly.", token) logger.info("Pretrained embeddings were found for %d out of %d tokens", num_tokens_found, vocab_size) return embedding_matrix
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], span_extractor: SpanExtractor, tree_mapper: TreeMapper, domain_utils: DomainUtils, is_weak_supervision: bool, feedforward: FeedForward = None, dropout: float = 0.0, num_labels: int = None, index: str = "bert", label_namespace: str = "labels", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), denotation_based_metric: Metric = None, token_based_metric: Metric = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) if isinstance(bert_model, str): self.bert_model = PretrainedBertModel.load(bert_model) else: self.bert_model = bert_model for param in self.bert_model.parameters(): param.requires_grad = trainable in_features = self.bert_model.config.hidden_size self._label_namespace = label_namespace self.span_extractor = span_extractor self.feedforward_layer = TimeDistributed(feedforward) if feedforward else None self.num_classes = self.vocab.get_vocab_size("labels") if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = span_extractor.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_classes)) if num_labels: out_features = num_labels else: out_features = vocab.get_vocab_size(namespace=self._label_namespace) self._dropout = torch.nn.Dropout(p=dropout) self._tree_mapper = tree_mapper labels = self.vocab.get_index_to_token_vocabulary(self._label_namespace) grammar = Grammar(labels) self._cky = CKY(grammar, tree_mapper, domain_utils) use_lexicon = True if use_lexicon: self.zero_shot_extractor = ZeroShotExtractor(labels, domain_utils) self._sim_weight = torch.nn.Parameter( torch.ones([1], dtype=torch.float32, requires_grad=True)) self._classification_layer = torch.nn.Linear(in_features, out_features) self._accuracy = CategoricalAccuracy() self._accuracy_all_no_span = CategoricalAccuracy() self._fmeasure = F1Measure(positive_label=1) self._denotation_based_metric = denotation_based_metric self._token_based_metric = token_based_metric self._loss = torch.nn.CrossEntropyLoss() self._index = index initializer(self._classification_layer) self._epoch_counter = 0 self._is_weak_supervision = is_weak_supervision if self._is_weak_supervision: self._weak_supervision_acc = WeakSupervisionAccuracy() self._label_preparer = LabelsPreparer(self.vocab.get_index_to_token_vocabulary(self._label_namespace)) self._sets_f1_metric = SetsF1() self._compute_spans_f1 = False
def extend_vocab( self, extended_vocab: Vocabulary, vocab_namespace: str = None, extension_pretrained_file: str = None, model_path: str = None, ): """ Extends the embedding matrix according to the extended vocabulary. If extension_pretrained_file is available, it will be used for initializing the new words embeddings in the extended vocabulary; otherwise we will check if _pretrained_file attribute is already available. If none is available, they will be initialized with xavier uniform. # Parameters extended_vocab : `Vocabulary` Vocabulary extended from original vocabulary used to construct this `Embedding`. vocab_namespace : `str`, (optional, default=None) In case you know what vocab_namespace should be used for extension, you can pass it. If not passed, it will check if vocab_namespace used at the time of `Embedding` construction is available. If so, this namespace will be used or else extend_vocab will be a no-op. extension_pretrained_file : `str`, (optional, default=None) A file containing pretrained embeddings can be specified here. It can be the path to a local file or an URL of a (cached) remote file. Check format details in `from_params` of `Embedding` class. model_path : `str`, (optional, default=None) Path traversing the model attributes upto this embedding module. Eg. "_text_field_embedder.token_embedder_tokens". This is only useful to give helpful error message when extend_vocab is implicitly called by fine-tune or any other command. """ # Caveat: For allennlp v0.8.1 and below, we weren't storing vocab_namespace as an attribute, # knowing which is necessary at time of embedding vocab extension. So old archive models are # currently unextendable. vocab_namespace = vocab_namespace or self._vocab_namespace if not vocab_namespace: # It's not safe to default to "tokens" or any other namespace. logging.info( "Loading a model trained before embedding extension was implemented; " "pass an explicit vocab namespace if you want to extend the vocabulary." ) return extended_num_embeddings = extended_vocab.get_vocab_size(vocab_namespace) if extended_num_embeddings == self.num_embeddings: # It's already been extended. No need to initialize / read pretrained file in first place (no-op) return if extended_num_embeddings < self.num_embeddings: raise ConfigurationError( f"Size of namespace, {vocab_namespace} for extended_vocab is smaller than " f"embedding. You likely passed incorrect vocab or namespace for extension." ) # Case 1: user passed extension_pretrained_file and it's available. if extension_pretrained_file and is_url_or_existing_file(extension_pretrained_file): # Don't have to do anything here, this is the happy case. pass # Case 2: user passed extension_pretrained_file and it's not available elif extension_pretrained_file: raise ConfigurationError( f"You passed pretrained embedding file {extension_pretrained_file} " f"for model_path {model_path} but it's not available." ) # Case 3: user didn't pass extension_pretrained_file, but pretrained_file attribute was # saved during training and is available. elif is_url_or_existing_file(self._pretrained_file): extension_pretrained_file = self._pretrained_file # Case 4: no file is available, hope that pretrained embeddings weren't used in the first place and warn else: extra_info = ( f"Originally pretrained_file was at " f"{self._pretrained_file}. " if self._pretrained_file else "" ) # It's better to warn here and not give error because there is no way to distinguish between # whether pretrained-file wasn't used during training or user forgot to pass / passed incorrect # mapping. Raising an error would prevent fine-tuning in the former case. logging.warning( f"Embedding at model_path, {model_path} cannot locate the pretrained_file. " f"{extra_info} If you are fine-tuning and want to use using pretrained_file for " f"embedding extension, please pass the mapping by --embedding-sources argument." ) embedding_dim = self.weight.data.shape[-1] if not extension_pretrained_file: extra_num_embeddings = extended_num_embeddings - self.num_embeddings extra_weight = torch.FloatTensor(extra_num_embeddings, embedding_dim) torch.nn.init.xavier_uniform_(extra_weight) else: # It's easiest to just reload the embeddings for the entire vocab, # then only keep the ones we need. whole_weight = _read_pretrained_embeddings_file( extension_pretrained_file, embedding_dim, extended_vocab, vocab_namespace ) extra_weight = whole_weight[self.num_embeddings :, :] device = self.weight.data.device extended_weight = torch.cat([self.weight.data, extra_weight.to(device)], dim=0) self.weight = torch.nn.Parameter(extended_weight, requires_grad=self.weight.requires_grad)
def __init__(self, vocab: Vocabulary, params: Params, regularizer: Optional[RegularizerApplicator] = None): super(LayerPOSChunkDepparLM, self).__init__(vocab=vocab, regularizer=regularizer) # Base text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # POS Stuffs ############ pos_params = params.pop("pos") # Encoder encoder_pos_params = pos_params.pop("encoder") encoder_pos = Seq2SeqEncoder.from_params(encoder_pos_params) self._encoder_pos = encoder_pos # Tagger POS - Simple Tagger tagger_pos_params = pos_params.pop("tagger") # Can be updated to the pos model that is created tagger_pos = PosSimpleTagger( vocab=vocab, text_field_embedder=self._text_field_embedder, encoder=self._encoder_pos, label_namespace=tagger_pos_params.pop("label_namespace", "labels"), regularizer=regularizer, ) self._tagger_pos = tagger_pos ############ # Chunk Stuffs ############ chunk_params = params.pop("chunk") # Encoder encoder_chunk_params = chunk_params.pop("encoder") encoder_chunk = Seq2SeqEncoder.from_params(encoder_chunk_params) self._encoder_chunk = encoder_chunk shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_pos] ) self._shortcut_text_field_embedder = shortcut_text_field_embedder # Tagger: Chunk - CRF Tagger tagger_chunk_params = chunk_params.pop("tagger") tagger_chunk = ChunkSimpleTagger( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder, encoder=self._encoder_chunk, label_namespace=tagger_chunk_params.pop("label_namespace", "labels"), label_encoding=tagger_chunk_params.pop("label_encoding", None), regularizer=regularizer, ) self._tagger_chunk = tagger_chunk ########### # Dependency Parsing Stuffs ########### deppar_params = params.pop("deppar") # Encoder encoder_deppar_params = deppar_params.pop("encoder") encoder_deppar = Seq2SeqEncoder.from_params(encoder_deppar_params) self._encoder_deppar = encoder_deppar shortcut_text_field_embedder_deppar = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_pos, self._encoder_chunk] ) self._shortcut_text_field_embedder_deppar = shortcut_text_field_embedder_deppar # Parser: Dependency Parser - Biaffine Parser parser_deppar_params = deppar_params.pop("parser") embedding_deppar_params = deppar_params.pop("pos_tag_embedding") embedding = Embedding(num_embeddings = vocab.get_vocab_size('tokens'), embedding_dim = embedding_deppar_params.pop_int("embedding_dim"), vocab_namespace = embedding_deppar_params.pop("vocab_namespace")) init_params = parser_deppar_params.pop("initializer", None) initializer = ( InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator() ) tagger_deppar = BiaffineDependencyParser( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_deppar, encoder=self._encoder_deppar, tag_representation_dim= parser_deppar_params.pop_int("tag_representation_dim"), arc_representation_dim= parser_deppar_params.pop_int("arc_representation_dim"), pos_tag_embedding = None, use_mst_decoding_for_validation = parser_deppar_params.pop("use_mst_decoding_for_validation"), dropout = parser_deppar_params.pop_float("dropout"), input_dropout = parser_deppar_params.pop_float("input_dropout"), initializer = initializer, regularizer = regularizer) self._tagger_deppar = tagger_deppar ########### # LM Stuffs ########### lm_params = params.pop("lm") # Encoder encoder_lm_params = lm_params.pop("encoder") encoder_lm = Seq2SeqEncoder.from_params(encoder_lm_params) self._encoder_lm = encoder_lm test_previous_encoders=[self._encoder_pos, self._encoder_chunk, self._encoder_deppar] shortcut_text_field_embedder_lm = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=test_previous_encoders ) self._shortcut_text_field_embedder_lm = shortcut_text_field_embedder_lm # Classifier: LM tagger_lm = LstmSwag( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_lm, encoder=self._encoder_lm) self._tagger_lm = tagger_lm logger.info("Multi-Task Learning Model has been instantiated.")
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, decoder_beam_search: BeamSearch, question_embedder: TextFieldEmbedder, input_attention: Attention, past_attention: Attention, max_decoding_steps: int, action_embedding_dim: int, gnn: bool = True, graph_loss_lambda: float = 0.5, decoder_use_graph_entities: bool = True, decoder_self_attend: bool = True, gnn_timesteps: int = 2, pruning_gnn_timesteps: int = 2, parse_sql_on_decoding: bool = True, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = True, dataset_path: str = 'dataset', training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels') -> None: super().__init__(vocab, encoder, entity_encoder, question_embedder, gnn_timesteps, dropout, rule_namespace) self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._parse_sql_on_decoding = parse_sql_on_decoding self._self_attend = decoder_self_attend self._decoder_use_graph_entities = decoder_use_graph_entities self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking self._action_padding_index = -1 # the padding value used by IndexField self._exact_match = Average() self._sql_evaluator_match = Average() self._action_similarity = Average() self._beam_hit = Average() self._action_embedding_dim = action_embedding_dim self._graph_loss_lambda = graph_loss_lambda num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) encoder_output_dim = encoder.get_output_dim() if gnn: encoder_output_dim += action_embedding_dim self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder_output_dim)) self._first_attended_output = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) torch.nn.init.normal_(self._first_attended_output) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._graph_pruning = GraphPruning(3, self._embedding_dim, encoder.get_output_dim(), dropout, timesteps=pruning_gnn_timesteps) if decoder_self_attend: self._transition_function = AttendPastSchemaItemsTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, past_attention=past_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) else: self._transition_function = LinkingTransitionFunction( encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) self._ent2ent_ff = FeedForward(action_embedding_dim, 1, action_embedding_dim, Activation.by_name('relu')()) # TODO: Remove hard-coded dirs self._evaluate_func = partial( evaluate, db_dir=os.path.join(dataset_path, 'database'), table=os.path.join(dataset_path, 'tables.json'), check_valid=False)
def __init__( self, vocab: Vocabulary, trigger_feedforward: FeedForward, trigger_candidate_feedforward: FeedForward, mention_feedforward: FeedForward, # Used if entity beam is off. argument_feedforward: FeedForward, context_attention: BilinearMatrixAttention, trigger_attention: Seq2SeqEncoder, span_prop: SpanProp, cls_projection: FeedForward, feature_size: int, trigger_spans_per_word: float, argument_spans_per_word: float, loss_weights, trigger_attention_context: bool, event_args_use_trigger_labels: bool, event_args_use_ner_labels: bool, event_args_label_emb: int, shared_attention_context: bool, label_embedding_method: str, event_args_label_predictor: str, event_args_gold_candidates: bool = False, # If True, use gold argument candidates. context_window: int = 0, softmax_correction: bool = False, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, entity_beam: bool = False, regularizer: Optional[RegularizerApplicator] = None) -> None: super(EventExtractor, self).__init__(vocab, regularizer) self._n_ner_labels = vocab.get_vocab_size("ner_labels") self._n_trigger_labels = vocab.get_vocab_size("trigger_labels") self._n_argument_labels = vocab.get_vocab_size("argument_labels") # Embeddings for trigger labels and ner labels, to be used by argument scorer. # These will be either one-hot encodings or learned embeddings, depending on "kind". self._ner_label_emb = make_embedder(kind=label_embedding_method, num_embeddings=self._n_ner_labels, embedding_dim=event_args_label_emb) self._trigger_label_emb = make_embedder( kind=label_embedding_method, num_embeddings=self._n_trigger_labels, embedding_dim=event_args_label_emb) self._label_embedding_method = label_embedding_method # Weight on trigger labeling and argument labeling. self._loss_weights = loss_weights.as_dict() # Trigger candidate scorer. null_label = vocab.get_token_index("", "trigger_labels") assert null_label == 0 # If not, the dummy class won't correspond to the null label. self._trigger_scorer = torch.nn.Sequential( TimeDistributed(trigger_feedforward), TimeDistributed( torch.nn.Linear(trigger_feedforward.get_output_dim(), self._n_trigger_labels - 1))) self._trigger_attention_context = trigger_attention_context if self._trigger_attention_context: self._trigger_attention = trigger_attention # Make pruners. If `entity_beam` is true, use NER and trigger scorers to construct the beam # and only keep candidates that the model predicts are actual entities or triggers. self._mention_pruner = make_pruner( mention_feedforward, entity_beam=entity_beam, gold_beam=event_args_gold_candidates) self._trigger_pruner = make_pruner(trigger_candidate_feedforward, entity_beam=entity_beam, gold_beam=False) # Argument scorer. self._event_args_use_trigger_labels = event_args_use_trigger_labels # If True, use trigger labels. self._event_args_use_ner_labels = event_args_use_ner_labels # If True, use ner labels to predict args. assert event_args_label_predictor in [ "hard", "softmax", "gold" ] # Method for predicting labels at test time. self._event_args_label_predictor = event_args_label_predictor self._event_args_gold_candidates = event_args_gold_candidates # If set to True, then construct a context vector from a bilinear attention over the trigger # / argument pair embeddings and the text. self._context_window = context_window # If greater than 0, concatenate context as features. self._argument_feedforward = argument_feedforward self._argument_scorer = torch.nn.Linear( argument_feedforward.get_output_dim(), self._n_argument_labels) # Distance embeddings. self._num_distance_buckets = 10 # Just use 10 which is the default. self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) # Class token projection. self._cls_projection = cls_projection self._cls_n_triggers = torch.nn.Linear( self._cls_projection.get_output_dim(), 5) self._cls_event_types = torch.nn.Linear( self._cls_projection.get_output_dim(), self._n_trigger_labels - 1) self._trigger_spans_per_word = trigger_spans_per_word self._argument_spans_per_word = argument_spans_per_word # Context attention for event argument scorer. self._shared_attention_context = shared_attention_context if self._shared_attention_context: self._shared_attention_context_module = context_attention # Span propagation object. # TODO(dwadden) initialize with `from_params` instead if this ends up working. self._span_prop = span_prop self._span_prop._trig_arg_embedder = self._compute_trig_arg_embeddings self._span_prop._argument_scorer = self._compute_argument_scores # Softmax correction parameters. self._softmax_correction = softmax_correction self._softmax_log_temp = torch.nn.Parameter( torch.zeros([1, 1, 1, self._n_argument_labels])) self._softmax_log_multiplier = torch.nn.Parameter( torch.zeros([1, 1, 1, self._n_argument_labels])) # TODO(dwadden) Add metrics. self._metrics = EventMetrics() self._argument_stats = ArgumentStats() self._trigger_loss = torch.nn.CrossEntropyLoss(reduction="sum") # TODO(dwadden) add loss weights. self._argument_loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1) initializer(self)
def __init__( self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention: Attention, mixture_feedforward: FeedForward = None, add_action_bias: bool = True, dropout: float = 0.0, num_linking_features: int = 0, num_entity_bits: int = 0, entity_bits_output: bool = True, use_entities: bool = False, denotation_only: bool = False, # Deprecated parameter to load older models entity_encoder: Seq2VecEncoder = None, entity_similarity_mode: str = "dot_product", rule_namespace: str = "rule_labels", ) -> None: super().__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._embedding_dim = question_embedder.get_output_dim() self._use_entities = use_entities # Note: there's only one non-trivial entity type in QuaRel for now, so most of the # entity_type stuff is irrelevant self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._entity_similarity_layer = None self._entity_similarity_mode = entity_similarity_mode if self._entity_similarity_mode == "weighted_dot_product": self._entity_similarity_layer = TimeDistributed( torch.nn.Linear(self._embedding_dim, 1, bias=False)) # Center initial values around unweighted dot product self._entity_similarity_layer._module.weight.data += 1 elif self._entity_similarity_mode == "dot_product": pass else: raise ValueError("Invalid entity_similarity_mode: {}".format( self._entity_similarity_mode)) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None self._decoder_trainer = MaximumMarginalLikelihood() self._encoder_output_dim = self._encoder.get_output_dim() if entity_bits_output: self._encoder_output_dim += num_entity_bits self._entity_bits_output = entity_bits_output self._debug_count = 10 self._num_denotation_cats = 2 # Hardcoded for simplicity self._denotation_only = denotation_only if self._denotation_only: self._denotation_accuracy_cat = CategoricalAccuracy() self._denotation_classifier = torch.nn.Linear( self._encoder_output_dim, self._num_denotation_cats) # Rest of init not needed for denotation only where no decoding to actions needed return self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._num_actions = num_actions self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # We are tying the action embeddings used for input and output # self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = self._action_embedder # tied weights self._add_action_bias = add_action_bias if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(self._encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) self._decoder_step = LinkingTransitionFunction( encoder_output_dim=self._encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=attention, add_action_bias=self._add_action_bias, mixture_feedforward=mixture_feedforward, dropout=dropout, )
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Optional[Seq2SeqEncoder] = None, choice_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), aggregate_question: Optional[str] = "max", aggregate_choice: Optional[str] = "max", embeddings_dropout_value: Optional[float] = 0.0, share_encoders: Optional[bool] = False, choices_init_from_question_states: Optional[bool] = False, use_choice_sum_instead_of_question: Optional[bool] = False, params=Params) -> None: super(QAMultiChoice_OneVsRest_Choices_v1, self).__init__(vocab) # TO DO: AllenNLP does not support statefull RNNS yet.. init_is_supported = False if not init_is_supported and (choices_init_from_question_states): raise ValueError( "choices_init_from_question_states=True or facts_init_from_question_states=True are not supported yet!" ) else: self._choices_init_from_question_states = choices_init_from_question_states self._use_cuda = (torch.cuda.is_available() and torch.cuda.current_device() >= 0) self._return_question_to_choices_att = False self._use_choice_sum_instead_of_question = use_choice_sum_instead_of_question self._params = params self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._question_encoder = question_encoder # choices encoding self._choice_encoder = choice_encoder self._question_aggregate = aggregate_question self._choice_aggregate = aggregate_choice self._num_labels = vocab.get_vocab_size(namespace="labels") question_output_dim = self._text_field_embedder.get_output_dim() if self._question_encoder is not None: question_output_dim = self._question_encoder.get_output_dim() choice_output_dim = self._text_field_embedder.get_output_dim() if self._choice_encoder is not None: choice_output_dim = self._choice_encoder.get_output_dim() if question_output_dim != choice_output_dim: raise ConfigurationError( "Output dimension of the question_encoder (dim: {}), " "plus choice_encoder (dim: {})" "must match! ".format(question_output_dim, choice_output_dim)) # question to choice attention att_question_to_choice_params = params.get("att_question_to_choice") if "tensor_1_dim" in att_question_to_choice_params: att_question_to_choice_params = update_params( att_question_to_choice_params, { "tensor_1_dim": question_output_dim, "tensor_2_dim": choice_output_dim }) self._matrix_attention_question_to_choice = LegacyMatrixAttention( SimilarityFunction.from_params(att_question_to_choice_params)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, embedding_dim: int, num_embeddings: int = None, projection_dim: int = None, weight: torch.FloatTensor = None, padding_index: int = None, trainable: bool = True, max_norm: float = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, sparse: bool = False, vocab_namespace: str = "tokens", pretrained_file: str = None, vocab: Vocabulary = None, ) -> None: super().__init__() if num_embeddings is None and vocab is None: raise ConfigurationError( "Embedding must be constructed with either num_embeddings or a vocabulary." ) if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) else: # If num_embeddings is present, set default namespace to None so that extend_vocab # call doesn't misinterpret that some namespace was originally used. vocab_namespace = None self.num_embeddings = num_embeddings self.padding_index = padding_index self.max_norm = max_norm self.norm_type = norm_type self.scale_grad_by_freq = scale_grad_by_freq self.sparse = sparse self._vocab_namespace = vocab_namespace self._pretrained_file = pretrained_file self.output_dim = projection_dim or embedding_dim if weight is not None and pretrained_file: raise ConfigurationError( "Embedding was constructed with both a weight and a pretrained file." ) elif pretrained_file is not None: if vocab is None: raise ConfigurationError( "To construct an Embedding from a pretrained file, you must also pass a vocabulary." ) # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. # TODO: having to pass tokens here is SUPER gross, but otherwise this breaks the # extend_vocab method, which relies on the value of vocab_namespace being None # to infer at what stage the embedding has been constructed. Phew. weight = _read_pretrained_embeddings_file( pretrained_file, embedding_dim, vocab, vocab_namespace or "tokens") self.weight = torch.nn.Parameter(weight, requires_grad=trainable) elif weight is not None: self.weight = torch.nn.Parameter(weight, requires_grad=trainable) else: weight = torch.FloatTensor(num_embeddings, embedding_dim) self.weight = torch.nn.Parameter(weight, requires_grad=trainable) torch.nn.init.xavier_uniform_(self.weight) # Whatever way we have constructed the embedding, it should be consistent with # num_embeddings and embedding_dim. if self.weight.size() != (num_embeddings, embedding_dim): raise ConfigurationError( "A weight matrix was passed with contradictory embedding shapes." ) if self.padding_index is not None: self.weight.data[self.padding_index].fill_(0) if projection_dim: self._projection = torch.nn.Linear(embedding_dim, projection_dim) else: self._projection = None
def __init__(self, vocab: Vocabulary, mention_feedforward: FeedForward, relation_feedforward: FeedForward, feature_size: int, spans_per_word: float, span_emb_dim: int, use_biaffine_rel: bool, rel_prop: int = 0, rel_prop_dropout_A: float = 0.0, rel_prop_dropout_f: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), positive_label_weight: float = 1.0, regularizer: Optional[RegularizerApplicator] = None) -> None: super(RelationExtractor, self).__init__(vocab, regularizer) # Need to hack this for cases where there's no relation data. It breaks Ulme's code. self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1) # Span candidate scorer. # TODO(dwadden) make sure I've got the input dim right on this one. feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = Pruner(feedforward_scorer) # Relation scorer. self._use_biaffine_rel = use_biaffine_rel if self._use_biaffine_rel: self._biaffine = torch.nn.Linear(span_emb_dim, span_emb_dim) else: self._relation_feedforward = relation_feedforward self._relation_scorer = torch.nn.Linear( relation_feedforward.get_output_dim(), self._n_labels) self._spans_per_word = spans_per_word # TODO(dwadden) Add code to compute relation F1. # self._candidate_recall = CandidateRecall() self._relation_metrics = RelationMetrics1() class_weights = torch.cat([ torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels) ]) self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights) self.rel_prop = rel_prop # Relation Propagation self._A_network = FeedForward(input_dim=self._n_labels, num_layers=1, hidden_dims=span_emb_dim, activations=lambda x: x, dropout=rel_prop_dropout_A) self._f_network = FeedForward(input_dim=2 * span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=rel_prop_dropout_f) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, att_question_to_choice: SimilarityFunction, question_encoder: Optional[Seq2SeqEncoder] = None, choice_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), aggregate_question: Optional[str] = "max", aggregate_choice: Optional[str] = "max", embeddings_dropout_value: Optional[float] = 0.0) -> None: super(QAMultiChoiceMaxAttention, self).__init__(vocab) self._use_cuda = (torch.cuda.is_available() and torch.cuda.current_device() >= 0) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._question_encoder = question_encoder # choices encoding self._choice_encoder = choice_encoder self._question_aggregate = aggregate_question self._choice_aggregate = aggregate_choice self._num_labels = vocab.get_vocab_size(namespace="labels") question_output_dim = self._text_field_embedder.get_output_dim() if self._question_encoder is not None: question_output_dim = self._question_encoder.get_output_dim() choice_output_dim = self._text_field_embedder.get_output_dim() if self._choice_encoder is not None: choice_output_dim = self._choice_encoder.get_output_dim() if question_output_dim != choice_output_dim: raise ConfigurationError( "Output dimension of the question_encoder (dim: {}) " "and choice_encoder (dim: {})" "must match! ".format(question_output_dim, choice_output_dim)) # Check input tensor dimensions for the question to choices attention (similarity function) if hasattr(att_question_to_choice, "tensor_1_dim"): tensor_1_dim = att_question_to_choice.tensor_1_dim if tensor_1_dim != question_output_dim: raise ConfigurationError( "Output dimension of the question_encoder (dim: {}) " "and tensor_1_dim (dim: {}) of att_question_to_choice" "must match! ".format(question_output_dim, tensor_1_dim)) if hasattr(att_question_to_choice, "tensor_2_dim"): tensor_2_dim = att_question_to_choice.tensor_2_dim if tensor_2_dim != question_output_dim: raise ConfigurationError( "Output dimension of the choice_encoder (dim: {}) " "and tensor_2_dim (dim: {}) of att_question_to_choice" "must match! ".format(choice_output_dim, tensor_2_dim)) self._matrix_attention_question_to_choice = LegacyMatrixAttention( att_question_to_choice) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, decoder_beam_search: BeamSearch, question_embedder: TextFieldEmbedder, schema_embedder:TextFieldEmbedder, input_attention: Attention, past_attention: Attention, max_decoding_steps: int, action_embedding_dim: int, gnn: bool = True, decoder_use_graph_entities: bool = True, decoder_self_attend: bool = True, gnn_timesteps: int = 2, parse_sql_on_decoding: bool = True, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = True, dataset_path: str = 'dataset', training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', scoring_dev_params: dict = None, debug_parsing: bool = False) -> None: super().__init__(vocab) self.vocab = vocab self._encoder = encoder self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._question_embedder = question_embedder self._schema_embedder = schema_embedder self._add_action_bias = add_action_bias self._scoring_dev_params = scoring_dev_params or {} self.parse_sql_on_decoding = parse_sql_on_decoding self._entity_encoder = TimeDistributed(entity_encoder) self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking self._self_attend = decoder_self_attend self._decoder_use_graph_entities = decoder_use_graph_entities self._action_padding_index = -1 # the padding value used by IndexField self._exact_match = Average() self._sql_evaluator_match = Average() self._action_similarity = Average() self._acc_single = Average() self._acc_multi = Average() self._beam_hit = Average() self._action_embedding_dim = action_embedding_dim num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) encoder_output_dim = encoder.get_output_dim() if gnn: encoder_output_dim += action_embedding_dim self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder_output_dim)) self._first_attended_output = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) torch.nn.init.normal_(self._first_attended_output) self._num_entity_types = 9 self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding(self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._linking_params = torch.nn.Linear(16, 1) torch.nn.init.uniform_(self._linking_params.weight, 0, 1) num_edge_types = 3 self._gnn = GatedGraphConv(self._embedding_dim, gnn_timesteps, num_edge_types=num_edge_types, dropout=dropout) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) if decoder_self_attend: self._transition_function = AttendPastSchemaItemsTransitionFunction(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, past_attention=past_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) else: self._transition_function = LinkingTransitionFunction(encoder_output_dim=encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers) self._ent2ent_ff = FeedForward(action_embedding_dim, 1, action_embedding_dim, Activation.by_name('relu')()) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) # TODO: Remove hard-coded dirs self._evaluate_func = partial(evaluate, db_dir=os.path.join(dataset_path, 'database'), table=os.path.join(dataset_path, 'tables.json'), check_valid=False) self.debug_parsing = debug_parsing
def __init__( self, vocab: Vocabulary, serialization_dir: str, pretrained_model: str, tokenizer_wrapper: HFTokenizerWrapper, num_labels: int, label_namespace: str = "labels", transformer_weights_path: str = None, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._tokenizer_wrapper = tokenizer_wrapper self._label_namespace = label_namespace pre_serialization_dir = os.environ.get("pre_serialization_dir", None) if pre_serialization_dir is not None: tokenizer_wrapper.tokenizer = tokenizer_wrapper.load( pre_serialization_dir) if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._accuracy = CategoricalAccuracy() self._classifier = AutoModelForSequenceClassification.from_pretrained( pretrained_model, num_labels=self._num_labels, return_dict=True) self._classifier.resize_token_embeddings( len(tokenizer_wrapper.tokenizer)) if transformer_weights_path is not None: with TemporaryDirectory() as tmpdirname: with tarfile.open(transformer_weights_path, mode="r:gz") as input_tar: logger.info("Extracting model...") input_tar.extractall(tmpdirname) model_state = torch.load( os.path.join(tmpdirname, "weights.th"), map_location=util.device_mapping(-1), ) source_prefix = "_transformers_model." target_prefix = "_classifier." + self._classifier.base_model_prefix + "." for target_name, parameter in self.named_parameters(): if not target_name.startswith(target_prefix): continue source_name = source_prefix + target_name[len(target_prefix ):] source_weights = model_state[source_name] parameter.data.copy_(source_weights.data) initializer(self) self._tokenizer_wrapper.tokenizer = self._tokenizer_wrapper.load( serialization_dir, pending=True) self._tokenizer_wrapper.save(serialization_dir) self._classifier.resize_token_embeddings( len(tokenizer_wrapper.tokenizer))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, word_dim: int, hidden_dim: int, action_dim: int, ratio_dim: int, num_layers: int, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, same_dropout_mask_per_instance: bool = True, input_dropout: float = 0.0, output_null_nodes: bool = True, max_heads: int = None, max_swaps_per_node: int = 3, fix_unconnected_egraph: bool = True, validate_every_n_instances: int = None, action_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None ) -> None: super(TransitionParser, self).__init__(vocab, regularizer) self._total_validation_instances = 0 self.num_actions = vocab.get_vocab_size('actions') self.text_field_embedder = text_field_embedder self.output_null_nodes = output_null_nodes self.max_heads = max_heads self.max_swaps_per_node = max_swaps_per_node self._fix_unconnected_egraph = fix_unconnected_egraph self.num_validation_instances = validate_every_n_instances self._xud_score = XUDScore(collapse=self.output_null_nodes) self.word_dim = word_dim self.hidden_dim = hidden_dim self.ratio_dim = ratio_dim self.action_dim = action_dim self.action_embedding = action_embedding if action_embedding is None: self.action_embedding = Embedding(num_embeddings=self.num_actions, embedding_dim=action_dim, trainable=False) # syntactic composition self.p_comp = torch.nn.Linear(self.hidden_dim * 5 + self.ratio_dim, self.word_dim) # parser state to hidden self.p_s2h = torch.nn.Linear(self.hidden_dim * 3 + self.ratio_dim, self.hidden_dim) # hidden to action self.p_act = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.num_actions) self.update_null_node = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.word_dim) self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.proot_stack_emb = torch.nn.Parameter(torch.randn(self.word_dim)) self.pempty_action_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.pempty_stack_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self._input_dropout = Dropout(input_dropout) self.buffer = StackRnn(input_size=self.word_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.stack = StackRnn(input_size=self.word_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.action_stack = StackRnn(input_size=self.action_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) initializer(self)
def get_pretrained_embedding_layer(embeddings_filename: str, vocab: Vocabulary, namespace: str = "tokens", trainable: bool = True): """ Reads a pre-trained embedding file and generates an Embedding layer that has weights initialized to the pre-trained embeddings. The Embedding layer can either be trainable or not. We use the ``Vocabulary`` to map from the word strings in the embeddings file to the indices that we need, and to know which words from the embeddings file we can safely ignore. Parameters ---------- embeddings_filename : str, required. The path to a file containing pretrined embeddings. The embeddings file is assumed to be gzipped and space delimited, e.g. [word] [dim 1] [dim 2] ... vocab : Vocabulary, required. A Vocabulary object. namespace : str, (optional, default=tokens) The namespace of the vocabulary to find pretrained embeddings for. trainable : bool, (optional, default=True) Whether or not the embedding parameters should be optimized. Returns ------- An Embedding Module initialised with a weight matrix of shape (vocab.get_vocab_size(namespace), pretrained_embedding_dim), where the indices of words appearing in the pretrained embedding file are initialized to the pretrained embedding value. """ words_to_keep = set(vocab.get_index_to_token_vocabulary(namespace).values()) vocab_size = vocab.get_vocab_size(namespace) embeddings = {} embedding_dim = None # First we read the embeddings from the file, only keeping vectors for the words we need. logger.info("Reading embeddings from file") with gzip.open(embeddings_filename, 'rb') as embeddings_file: for line in embeddings_file: fields = line.decode('utf-8').strip().split(' ') if embedding_dim is None: embedding_dim = len(fields) - 1 assert embedding_dim > 1, "Found embedding size of 1; do you have a header?" else: if len(fields) - 1 != embedding_dim: # Sometimes there are funny unicode parsing problems that lead to different # fields lengths (e.g., a word with a unicode space character that splits # into more than one column). We skip those lines. Note that if you have # some kind of long header, this could result in all of your lines getting # skipped. It's hard to check for that here; you just have to look in the # embedding_misses_file and at the model summary to make sure things look # like they are supposed to. continue word = fields[0] if word in words_to_keep: vector = numpy.asarray(fields[1:], dtype='float32') embeddings[word] = vector # Now we initialize the weight matrix for an embedding layer, starting with random vectors, # then filling in the word vectors we just read. logger.info("Initializing pre-trained embedding layer") embedding_matrix = torch.FloatTensor(vocab_size, embedding_dim).normal_(0, 1) for i in range(0, vocab_size): word = vocab.get_token_from_index(i, namespace) # If we don't have a pre-trained vector for this word, we'll just leave this row alone, # so the word has a random initialization. if word in embeddings: embedding_matrix[i] = torch.FloatTensor(embeddings[word]) else: logger.debug("Word %s was not found in the embedding file. Initialising randomly.", word) # The weight matrix is initialized, so we construct and return the actual Embedding. return Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_index=0, weight=embedding_matrix, trainable=trainable)
def _read_pretrained_word2vec_format_embedding_file(embeddings_filename: str, # pylint: disable=invalid-name embedding_dim: int, vocab: Vocabulary, namespace: str = "tokens") -> torch.FloatTensor: """ Read from a gzipped-word2vec format file. The embeddings file is assumed to be gzipped and space delimited, e.g. [word] [dim 1] [dim 2] ... The remainder of the docstring is identical to ``_read_pretrained_embedding_file``. """ words_to_keep = set(vocab.get_index_to_token_vocabulary(namespace).values()) vocab_size = vocab.get_vocab_size(namespace) embeddings = {} # First we read the embeddings from the file, only keeping vectors for the words we need. logger.info("Reading embeddings from file") with gzip.open(cached_path(embeddings_filename), 'rb') as embeddings_file: for line in embeddings_file: fields = line.decode('utf-8').strip().split(' ') if len(fields) - 1 != embedding_dim: # Sometimes there are funny unicode parsing problems that lead to different # fields lengths (e.g., a word with a unicode space character that splits # into more than one column). We skip those lines. Note that if you have # some kind of long header, this could result in all of your lines getting # skipped. It's hard to check for that here; you just have to look in the # embedding_misses_file and at the model summary to make sure things look # like they are supposed to. logger.warning("Found line with wrong number of dimensions (expected %d, was %d): %s", embedding_dim, len(fields) - 1, line) continue word = fields[0] if word in words_to_keep: vector = numpy.asarray(fields[1:], dtype='float32') embeddings[word] = vector if not embeddings: raise ConfigurationError("No embeddings of correct dimension found; you probably " "misspecified your embedding_dim parameter, or didn't " "pre-populate your Vocabulary") all_embeddings = numpy.asarray(list(embeddings.values())) embeddings_mean = float(numpy.mean(all_embeddings)) embeddings_std = float(numpy.std(all_embeddings)) # Now we initialize the weight matrix for an embedding layer, starting with random vectors, # then filling in the word vectors we just read. logger.info("Initializing pre-trained embedding layer") embedding_matrix = torch.FloatTensor(vocab_size, embedding_dim).normal_(embeddings_mean, embeddings_std) for i in range(0, vocab_size): word = vocab.get_token_from_index(i, namespace) # If we don't have a pre-trained vector for this word, we'll just leave this row alone, # so the word has a random initialization. if word in embeddings: embedding_matrix[i] = torch.FloatTensor(embeddings[word]) else: logger.debug("Word %s was not found in the embedding file. Initialising randomly.", word) # The weight matrix is initialized, so we construct and return the actual Embedding. return embedding_matrix
def from_params(cls, vocab: Vocabulary, params: Params) -> 'EmbeddingMultilang': # type: ignore """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. In the configuration file, a file containing pretrained embeddings can be specified using the parameter ``"pretrained_files"``. It can be the path to a local file or an URL of a (cached) remote file. Two formats are supported: * hdf5 file - containing an embedding matrix in the form of a torch.Tensor; * text file - an utf-8 encoded text file with space separated fields:: [word] [dim 1] [dim 2] ... The text file can eventually be compressed with gzip, bz2, lzma or zip. You can even select a single file inside an archive containing multiple files using the URI:: "(archive_uri)#file_path_inside_the_archive" where ``archive_uri`` can be a file system path or a URL. For example:: "(http://nlp.stanford.edu/data/glove.twitter.27B.zip)#glove.twitter.27B.200d.txt" """ # pylint: disable=arguments-differ num_embeddings = params.pop_int('num_embeddings', None) # If num_embeddings is present, set default namespace to None so that extend_vocab # call doesn't misinterpret that some namespace was originally used. vocab_namespace = params.pop("vocab_namespace", None if num_embeddings else "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_files = params.pop("pretrained_files", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) # Could have a multilang_embeddings with language keys and average the returned results? #multilang_embeddings = defaultdict(lambda: {}) # value = np.mean(np.array([original_vector, new_vector]), axis=0) # Create multilang_embeddings and update for each 'embeddings' dict we retrieve. multilang_embeddings = {} if pretrained_files: for lang in pretrained_files.keys(): pretrained_file = pretrained_files[lang] logger.info("Searching embeddings for lang %s with file %s", lang, pretrained_file) embeddings = _read_pretrained_embeddings_file(pretrained_file, embedding_dim, vocab, vocab_namespace) print("found {} embeddings".format(len(embeddings))) # Rather than overwrite existing dictionary values, take the average of matching tokens' vectors. for token, vector in embeddings.items(): if token not in multilang_embeddings: multilang_embeddings[token] = vector else: original_vector = multilang_embeddings[token] # take the mean of the original and new vector mean_vector = np.mean(np.array([original_vector, vector]), axis=0) multilang_embeddings[token] = mean_vector #multilang_embeddings.update(embeddings) #multilang_embeddings[lang] = embeddings print("size of multilang embeddings: ", len(multilang_embeddings)) # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _create_weight_matrix(multilang_embeddings, embedding_dim, vocab, vocab_namespace) print("weight size", weight.size()) # weight size torch.Size([87551, 100]) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, vocab_namespace=vocab_namespace)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, aggregate_feedforward: FeedForward, premise_encoder: Optional[Seq2SeqEncoder] = None, hypothesis_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), aggregate_premise: Optional[str] = "max", aggregate_hypothesis: Optional[str] = "max", embeddings_dropout_value: Optional[float] = 0.0, share_encoders: Optional[bool] = False) -> None: super(StackedNNAggregateCustom, self).__init__(vocab) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._aggregate_feedforward = aggregate_feedforward self._premise_encoder = premise_encoder self._hypothesis_encoder = hypothesis_encoder self._premise_aggregate = aggregate_premise self._hypothesis_aggregate = aggregate_hypothesis self._num_labels = vocab.get_vocab_size(namespace="labels") premise_output_dim = self._text_field_embedder.get_output_dim() if self._premise_encoder is not None: premise_output_dim = self._premise_encoder.get_output_dim() hypothesis_output_dim = self._text_field_embedder.get_output_dim() if self._hypothesis_encoder is not None: hypothesis_output_dim = self._hypothesis_encoder.get_output_dim() if premise_output_dim != hypothesis_output_dim: raise ConfigurationError( "Output dimension of the premise_encoder (dim: {}), " "plus hypothesis_encoder (dim: {})" "must match! ".format(premise_output_dim, hypothesis_output_dim)) if premise_output_dim * 4 != \ aggregate_feedforward.get_input_dim(): raise ConfigurationError( "The output of aggregate_feedforward input dim ({2}) " "should be {3} = 4 x {0} ({1} = premise_output_dim == hypothesis_output_dim)!" .format(premise_output_dim, hypothesis_output_dim, aggregate_feedforward.get_input_dim(), 4 * premise_output_dim)) if aggregate_feedforward.get_output_dim() != self._num_labels: raise ConfigurationError( "Final output dimension (%d) must equal num labels (%d)" % (aggregate_feedforward.get_output_dim(), self._num_labels)) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocabulary: Vocabulary, image_feature_size: int, embedding_size: int, hidden_size: int, attention_projection_size: int, penultimate_feature_size: int, saliency_attention_projection_size: int, max_caption_length: int = 20, beam_size: int = 1, use_cbs: bool = False, min_constraints_to_satisfy: int = 2, ) -> None: super().__init__() self._vocabulary = vocabulary self.image_feature_size = image_feature_size self.embedding_size = embedding_size self.hidden_size = hidden_size self.attention_projection_size = attention_projection_size self.penultimate_feature_size = penultimate_feature_size self.saliency_attention_projection_size = saliency_attention_projection_size self._max_caption_length = max_caption_length self._use_cbs = use_cbs self._min_constraints_to_satisfy = min_constraints_to_satisfy # Short hand variable names for convenience _vocab_size = vocabulary.get_vocab_size() self._pad_index = vocabulary.get_token_index("@@UNKNOWN@@") self._boundary_index = vocabulary.get_token_index("@@BOUNDARY@@") self._is_val = True # Initialize embedding layer with GloVe embeddings and freeze it if the specified size # is 300. CBS cannot be supported for any other embedding size, using CBS is optional # with embedding size 300. So in either cases, embeddig size is the deciding factor. if self.embedding_size == 300: glove_vectors = self._initialize_glove() self._embedding_layer = nn.Embedding.from_pretrained( glove_vectors, freeze=True, padding_idx=self._pad_index) else: self._embedding_layer = nn.Embedding(_vocab_size, embedding_size, padding_idx=self._pad_index) assert not use_cbs, "CBS is not supported without Frozen GloVe embeddings (300d), " f"found embedding size to be {self.embedding_size}." self._updown_saliency_cell = UpDownSaliencyCell( image_feature_size, embedding_size, hidden_size, attention_projection_size, penultimate_feature_size, saliency_attention_projection_size) if self.embedding_size == 300: # Tie the input and output word embeddings when using frozen GloVe embeddings. # In this case, project hidden states to GloVe dimension (with a non-linearity). self._output_projection = nn.Sequential( nn.Linear(hidden_size, self.embedding_size), nn.Tanh()) self._output_layer = nn.Linear(self.embedding_size, _vocab_size, bias=False) self._output_layer.weight = self._embedding_layer.weight # for saliency LSTM self._output_projection_saliency = nn.Sequential( nn.Linear(hidden_size, self.embedding_size), nn.Tanh()) self._output_layer_saliency = nn.Linear(self.embedding_size, _vocab_size, bias=False) self._output_layer_saliency.weight = self._embedding_layer.weight else: # Else don't tie them when learning embeddings during training. # In this case, project hidden states directly to output vocab space. self._output_projection = nn.Identity() # type: ignore self._output_layer = nn.Linear(hidden_size, _vocab_size) self._log_softmax = nn.LogSoftmax(dim=1) self._softmax = nn.Softmax(dim=1) self._softmax_saliency = nn.Softmax(dim=1) # We use beam search to find the most likely caption during inference. BeamSearchClass = ConstrainedBeamSearch if use_cbs else BeamSearch self._beam_search = BeamSearchClass( self._boundary_index, max_steps=max_caption_length, beam_size=beam_size, per_node_beam_size=beam_size // 2, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Optional[Seq2SeqEncoder], choice_encoder: Optional[Seq2SeqEncoder], similarity_function: SimilarityFunction, projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, embeddings_dropout_value: Optional[float] = 0.0, encoder_dropout_value: Optional[float] = 0.0, ) -> None: super(QAMultiChoiceESIM, self).__init__(vocab) self._matrix_attention = LegacyMatrixAttention(similarity_function) self._projection_feedforward = projection_feedforward self._inference_encoder = inference_encoder self._output_feedforward = output_feedforward self._output_logit = output_logit check_dimensions_match(choice_encoder.get_output_dim(), question_encoder.get_output_dim(), "choice_encoder output dim", "question_encoder output dim") check_dimensions_match(text_field_embedder.get_output_dim(), question_encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(question_encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") self._use_cuda = (torch.cuda.is_available() and torch.cuda.current_device() >= 0) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x if encoder_dropout_value: self.dropout = torch.nn.Dropout(encoder_dropout_value) self.rnn_input_dropout = VariationalDropout(encoder_dropout_value) else: self.dropout = None self.rnn_input_dropout = None self._question_encoder = question_encoder # choices encoding self._choice_encoder = choice_encoder self._num_labels = vocab.get_vocab_size(namespace="labels") question_output_dim = self._text_field_embedder.get_output_dim() if self._question_encoder is not None: question_output_dim = self._question_encoder.get_output_dim() choice_output_dim = self._text_field_embedder.get_output_dim() if self._choice_encoder is not None: choice_output_dim = self._choice_encoder.get_output_dim() self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, relex_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, relex_spans_per_word: float, max_antecedents: int, mention_feedforward: FeedForward, coref_mention_feedforward: FeedForward = None, relex_mention_feedforward: FeedForward = None, symmetric_relations: bool = False, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, loss_coref_weight: float = 1, loss_relex_weight: float = 1, loss_ner_weight: float = 1, preserve_metadata: List = None, relex_namespace: str = 'relation_labels', collect_clusters: bool = False) -> None: # If separate coref mention and relex mention feedforward scorers # are not provided, share the one of NER module if coref_mention_feedforward is None: coref_mention_feedforward = mention_feedforward if relex_mention_feedforward is None: relex_mention_feedforward = mention_feedforward super().__init__(vocab, text_field_embedder, context_layer, coref_mention_feedforward, antecedent_feedforward, feature_size, max_span_width, spans_per_word, max_antecedents, lexical_dropout, initializer, regularizer) self._symmetric_relations = symmetric_relations self._relex_spans_per_word = relex_spans_per_word self._loss_coref_weight = loss_coref_weight self._loss_relex_weight = loss_relex_weight self._loss_ner_weight = loss_ner_weight self._preserve_metadata = preserve_metadata or ['id'] self._relex_namespace = relex_namespace self._collect_clusters = collect_clusters relex_labels = list( vocab.get_token_to_index_vocabulary(self._relex_namespace)) self._relex_mention_recall = RelexMentionRecall() self._relex_precision_recall_fscore = PrecisionRecallFScore( labels=relex_labels) relex_mention_scorer = Sequential( TimeDistributed(relex_mention_feedforward), TimeDistributed( Projection(relex_mention_feedforward.get_output_dim()))) self._relex_mention_pruner = MultiTimeDistributed( Pruner(relex_mention_scorer)) self._ner_scorer = Sequential( TimeDistributed(mention_feedforward), TimeDistributed( Projection(mention_feedforward.get_output_dim(), vocab.get_vocab_size('ner_labels'), with_dummy=True))) self._relex_scorer = Sequential( TimeDistributed(relex_feedforward), TimeDistributed( Projection(relex_feedforward.get_output_dim(), vocab.get_vocab_size(self._relex_namespace), with_dummy=True)))
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, projection_feedforward: FeedForward, key_projection_feedforward: FeedForward, inference_encoder: Seq2SeqEncoder, link_key_encoder: Seq2SeqEncoder, key_compare_feedforward: FeedForward, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_map = vocab.get_token_to_index_vocabulary('labels') l_map = [None] * len(self.label_map) for lb, lb_idx in self.label_map.items(): l_map[lb_idx] = lb self.label_map = l_map self._text_field_embedder = text_field_embedder self._word_embedding_dimension = text_field_embedder.get_output_dim() self._sentence_encoder = encoder self._encoded_word_dimension = self._sentence_encoder.get_output_dim() self._matrix_attention = DotProductMatrixAttention() self._projection_feedforward = projection_feedforward self._key_projection_feedforward = key_projection_feedforward self._inference_encoder = inference_encoder self._link_key_encoder = link_key_encoder self._embedded_key_dimension = self._link_key_encoder.get_output_dim() self._key_compare_feedforward = key_compare_feedforward if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(encoder.get_output_dim() * 4, key_projection_feedforward.get_input_dim(), "encoder output dim", "projection feedforward input") check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(), "proj feedforward output dim", "inference lstm input dim") check_dimensions_match(key_projection_feedforward.get_output_dim(), link_key_encoder.get_input_dim(), "key proj feedforward output dim", "link key lstm input dim") check_dimensions_match(key_projection_feedforward.get_output_dim(), link_key_encoder.get_input_dim(), "key proj feedforward output dim", "inference lstm input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, task: str, encoder: Seq2SeqEncoder, prev_task: str, prev_task_embed_dim: int = None, label_smoothing: float = 0.0, dropout: float = 0.0, adaptive: bool = False, features: List[str] = None, metric: str = "acc", loss_weight: float = 1.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(TagDecoder, self).__init__(vocab, regularizer) self.task = task self.dropout = torch.nn.Dropout(p=dropout) self.encoder = encoder self.output_dim = encoder.get_output_dim() self.label_smoothing = label_smoothing self.num_classes = self.vocab.get_vocab_size(task) self.adaptive = adaptive self.features = features if features else [] self.metric = metric self.loss_weight = loss_weight # A: add all possible relative encoding to vocabulary if self.vocab.get_token_index('100,root') == 1: for head in self.vocab.get_token_to_index_vocabulary('head_tags').keys(): all_encodings = get_all_relative_encodings(head) self.vocab.add_tokens_to_namespace(tokens=all_encodings, namespace='dep_encoded') # make sure to put end token '100,root' self.vocab.add_token_to_namespace(token='100,root', namespace='dep_encoded') self.prev_task_tag_embedding = None if prev_task_embed_dim is not None and prev_task_embed_dim is not 0 and prev_task is not None: if not prev_task == 'rependency': self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size(prev_task), prev_task_embed_dim) else: self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size('dep_encoded'), prev_task_embed_dim) # Choose the metric to use for the evaluation (from the defined # "metric" value of the task). If not specified, default to accuracy. if self.metric == "acc": self.metrics = {"acc": CategoricalAccuracy()} elif self.metric == "span_f1": self.metrics = {"span_f1": SpanBasedF1Measure( self.vocab, tag_namespace=self.task, label_encoding="BIO")} else: logger.warning(f"ERROR. Metric: {self.metric} unrecognized. Using accuracy instead.") self.metrics = {"acc": CategoricalAccuracy()} if self.adaptive: # TODO adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)] self.task_output = AdaptiveLogSoftmaxWithLoss(self.output_dim, self.num_classes, cutoffs=adaptive_cutoffs, div_value=4.0) else: self.task_output = TimeDistributed(Linear(self.output_dim, self.num_classes)) self.feature_outputs = torch.nn.ModuleDict() self.features_metrics = {} for feature in self.features: self.feature_outputs[feature] = TimeDistributed(Linear(self.output_dim, vocab.get_vocab_size(feature))) self.features_metrics[feature] = { "acc": CategoricalAccuracy(), } initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': # type: ignore """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. In the configuration file, a file containing pretrained embeddings can be specified using the parameter ``"pretrained_file"``. It can be the path to a local file or an URL of a (cached) remote file. Two formats are supported: * hdf5 file - containing an embedding matrix in the form of a torch.Tensor; * text file - an utf-8 encoded text file with space separated fields:: [word] [dim 1] [dim 2] ... The text file can eventually be compressed with gzip, bz2, lzma or zip. You can even select a single file inside an archive containing multiple files using the URI:: "(archive_uri)#file_path_inside_the_archive" where ``archive_uri`` can be a file system path or a URL. For example:: "(https://nlp.stanford.edu/data/glove.twitter.27B.zip)#glove.twitter.27B.200d.txt" """ # pylint: disable=arguments-differ num_embeddings = params.pop_int('num_embeddings', None) # If num_embeddings is present, set default namespace to None so that extend_vocab # call doesn't misinterpret that some namespace was originally used. vocab_namespace = params.pop("vocab_namespace", None if num_embeddings else "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embeddings_file(pretrained_file, embedding_dim, vocab, vocab_namespace) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, vocab_namespace=vocab_namespace)
def _read_embeddings_from_text_file( file_uri: str, embedding_dim: int, vocab: Vocabulary, namespace: str = "tokens") -> torch.FloatTensor: """ Read pre-trained word vectors from an eventually compressed text file, possibly contained inside an archive with multiple files. The text file is assumed to be utf-8 encoded with space-separated fields: [word] [dim 1] [dim 2] ... Lines that contain more numerical tokens than ``embedding_dim`` raise a warning and are skipped. The remainder of the docstring is identical to ``_read_pretrained_embeddings_file``. """ tokens_to_keep = set( vocab.get_index_to_token_vocabulary(namespace).values()) vocab_size = vocab.get_vocab_size(namespace) embeddings = {} # First we read the embeddings from the file, only keeping vectors for the words we need. logger.info("Reading pretrained embeddings from file") with EmbeddingsTextFile(file_uri) as embeddings_file: for line in Tqdm.tqdm(embeddings_file): token = line.split(' ', 1)[0] if token in tokens_to_keep: fields = line.rstrip().split(' ') if len(fields) - 1 != embedding_dim: # Sometimes there are funny unicode parsing problems that lead to different # fields lengths (e.g., a word with a unicode space character that splits # into more than one column). We skip those lines. Note that if you have # some kind of long header, this could result in all of your lines getting # skipped. It's hard to check for that here; you just have to look in the # embedding_misses_file and at the model summary to make sure things look # like they are supposed to. logger.warning( "Found line with wrong number of dimensions (expected: %d; actual: %d): %s", embedding_dim, len(fields) - 1, line) continue vector = numpy.asarray(fields[1:], dtype='float32') embeddings[token] = vector if not embeddings: raise ConfigurationError( "No embeddings of correct dimension found; you probably " "misspecified your embedding_dim parameter, or didn't " "pre-populate your Vocabulary") all_embeddings = numpy.asarray(list(embeddings.values())) embeddings_mean = float(numpy.mean(all_embeddings)) embeddings_std = float(numpy.std(all_embeddings)) # Now we initialize the weight matrix for an embedding layer, starting with random vectors, # then filling in the word vectors we just read. logger.info("Initializing pre-trained embedding layer") embedding_matrix = torch.FloatTensor(vocab_size, embedding_dim).normal_( embeddings_mean, embeddings_std) num_tokens_found = 0 index_to_token = vocab.get_index_to_token_vocabulary(namespace) for i in range(vocab_size): token = index_to_token[i] # If we don't have a pre-trained vector for this word, we'll just leave this row alone, # so the word has a random initialization. if token in embeddings: embedding_matrix[i] = torch.FloatTensor(embeddings[token]) num_tokens_found += 1 else: logger.debug( "Token %s was not found in the embedding file. Initialising randomly.", token) logger.info("Pretrained embeddings were found for %d out of %d tokens", num_tokens_found, vocab_size) return embedding_matrix
def _read_pretrained_word2vec_format_embedding_file(embeddings_filename: str, # pylint: disable=invalid-name embedding_dim: int, vocab: Vocabulary, namespace: str = "tokens") -> torch.FloatTensor: """ Read from a gzipped-word2vec format file. The embeddings file is assumed to be gzipped and space delimited, e.g. [word] [dim 1] [dim 2] ... The remainder of the docstring is identical to ``_read_pretrained_embedding_file``. """ words_to_keep = set(vocab.get_index_to_token_vocabulary(namespace).values()) vocab_size = vocab.get_vocab_size(namespace) embeddings = {} # First we read the embeddings from the file, only keeping vectors for the words we need. logger.info("Reading embeddings from file") with gzip.open(cached_path(embeddings_filename), 'rb') as embeddings_file: for line in embeddings_file: fields = line.decode('utf-8').strip().split(' ') if len(fields) - 1 != embedding_dim: # Sometimes there are funny unicode parsing problems that lead to different # fields lengths (e.g., a word with a unicode space character that splits # into more than one column). We skip those lines. Note that if you have # some kind of long header, this could result in all of your lines getting # skipped. It's hard to check for that here; you just have to look in the # embedding_misses_file and at the model summary to make sure things look # like they are supposed to. logger.warning("Found line with wrong number of dimensions (expected %d, was %d): %s", embedding_dim, len(fields) - 1, line) continue word = fields[0] if word in words_to_keep: vector = numpy.asarray(fields[1:], dtype='float32') embeddings[word] = vector if not embeddings: raise ConfigurationError("No embeddings of correct dimension found; you probably " "misspecified your embedding_dim parameter, or didn't " "pre-populate your Vocabulary") all_embeddings = numpy.asarray(list(embeddings.values())) embeddings_mean = float(numpy.mean(all_embeddings)) embeddings_std = float(numpy.std(all_embeddings)) # Now we initialize the weight matrix for an embedding layer, starting with random vectors, # then filling in the word vectors we just read. logger.info("Initializing pre-trained embedding layer") embedding_matrix = torch.FloatTensor(vocab_size, embedding_dim).normal_(embeddings_mean, embeddings_std) for i in range(0, vocab_size): word = vocab.get_token_from_index(i, namespace) # If we don't have a pre-trained vector for this word, we'll just leave this row alone, # so the word has a random initialization. if word in embeddings: embedding_matrix[i] = torch.FloatTensor(embeddings[word]) else: logger.debug("Word %s was not found in the embedding file. Initialising randomly.", word) # The weight matrix is initialized, so we construct and return the actual Embedding. return embedding_matrix
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, add_action_bias: bool = True, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels') -> None: super().__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._entity_type_encoder_embedding = Embedding( self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding( self._num_entity_types, action_embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, attention: Attention, mixture_feedforward: FeedForward = None, add_action_bias: bool = True, dropout: float = 0.0, num_linking_features: int = 0, num_entity_bits: int = 0, entity_bits_output: bool = True, use_entities: bool = False, denotation_only: bool = False, # Deprecated parameter to load older models entity_encoder: Seq2VecEncoder = None, # pylint: disable=unused-argument entity_similarity_mode: str = "dot_product", rule_namespace: str = 'rule_labels') -> None: super(QuarelSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._beam_search = decoder_beam_search self._max_decoding_steps = max_decoding_steps if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = Average() self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._embedding_dim = question_embedder.get_output_dim() self._use_entities = use_entities # Note: there's only one non-trivial entity type in QuaRel for now, so most of the # entity_type stuff is irrelevant self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 1 # Hardcoded until we feed lf syntax into the model self._entity_type_encoder_embedding = Embedding(self._num_entity_types, self._embedding_dim) self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._entity_similarity_layer = None self._entity_similarity_mode = entity_similarity_mode if self._entity_similarity_mode == "weighted_dot_product": self._entity_similarity_layer = \ TimeDistributed(torch.nn.Linear(self._embedding_dim, 1, bias=False)) # Center initial values around unweighted dot product self._entity_similarity_layer._module.weight.data += 1 # pylint: disable=protected-access elif self._entity_similarity_mode == "dot_product": pass else: raise ValueError("Invalid entity_similarity_mode: {}".format(self._entity_similarity_mode)) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None self._decoder_trainer = MaximumMarginalLikelihood() self._encoder_output_dim = self._encoder.get_output_dim() if entity_bits_output: self._encoder_output_dim += num_entity_bits self._entity_bits_output = entity_bits_output self._debug_count = 10 self._num_denotation_cats = 2 # Hardcoded for simplicity self._denotation_only = denotation_only if self._denotation_only: self._denotation_accuracy_cat = CategoricalAccuracy() self._denotation_classifier = torch.nn.Linear(self._encoder_output_dim, self._num_denotation_cats) # Rest of init not needed for denotation only where no decoding to actions needed return self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._num_actions = num_actions self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # We are tying the action embeddings used for input and output # self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = self._action_embedder # tied weights self._add_action_bias = add_action_bias if self._add_action_bias: self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(self._encoder_output_dim)) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) self._decoder_step = LinkingTransitionFunction(encoder_output_dim=self._encoder_output_dim, action_embedding_dim=action_embedding_dim, input_attention=attention, num_start_types=self._num_start_types, predict_start_type_separately=False, add_action_bias=self._add_action_bias, mixture_feedforward=mixture_feedforward, dropout=dropout)
def from_vocab_or_file( cls, vocab: Vocabulary, embedding_dim: int, num_embeddings: int = None, vocab_namespace: str = "tokens", pretrained_file: str = None, projection_dim: int = None, trainable: bool = True, padding_index: int = None, max_norm: float = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, sparse: bool = False, ) -> "Embedding": """ Similar to `__init__`, but does two functions on top of what's there: (1) if `num_embeddings` is not given, it checks the vocabulary for how many embeddings to construct; and (2) if a pretrained file is given, it loads weights from that file (while looking at the given vocabulary) and passes those weights to `__init__`. We need the vocabulary here to know how many items we need to embed, and we look for a `vocab_namespace` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the `num_embeddings` key directly, and the vocabulary will be ignored. A file containing pretrained embeddings can be specified using the parameter `"pretrained_file"`. It can be the path to a local file or an URL of a (cached) remote file. Two formats are supported: * hdf5 file - containing an embedding matrix in the form of a torch.Tensor; * text file - an utf-8 encoded text file with space separated fields:: [word] [dim 1] [dim 2] ... The text file can eventually be compressed with gzip, bz2, lzma or zip. You can even select a single file inside an archive containing multiple files using the URI:: "(archive_uri)#file_path_inside_the_archive" where `archive_uri` can be a file system path or a URL. For example:: "(https://nlp.stanford.edu/data/glove.twitter.27B.zip)#glove.twitter.27B.200d.txt" """ if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) else: # If num_embeddings is present, set default namespace to None so that extend_vocab # call doesn't misinterpret that some namespace was originally used. vocab_namespace = None if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embeddings_file( pretrained_file, embedding_dim, vocab, vocab_namespace ) else: weight = None return cls( num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, vocab_namespace=vocab_namespace, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, coverage_ff: FeedForward, relation_predictor: FeedForward, scale_relation_loss: float = 1.0, aggregate: str = "max", combination: str = "x,y", answer_choice_combination: Optional[str] = None, coverage_combination: Optional[str] = None, var_dropout: float = 0.0, use_projection: bool = False, ignore_spans: bool = True, ignore_relns: bool = False, ignore_ann: bool = False, span_extractor: Optional[SpanExtractor] = None, reln_ff: Optional[FeedForward] = None, attention: Optional[MatrixAttention] = None, encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator() ) -> None: """ :param vocab: AllenNLP Vocabulary :param text_field_embedder: AllenNLP Textfield embedder :param coverage_ff: Feedforward network that computes the "Fact-Relevance" score_f i.e. how well does the fact "cover" the question + answer :param relation_predictor: Feedforward network that predicts the relation label R_j :param scale_relation_loss: Scalar used to scale the relation loss term, \lambda :param aggregate: Pooling function used to aggregate question/fact vector representations in "Relation Prediction Score". Choices: max, avg, last :param combination: Combination string used to combine vector representation \bigotimes :param answer_choice_combination: If set, use this combination string instead of combination for combining the answer-based and choice-based fact representation :param coverage_combination: If set, use this combination string instead of combination for combining the question-choice-based fact rep and fact rep :param var_dropout: Variational dropout probability on the input embeddings :param use_projection: If set to true, learn a projector to map relation representations to a #rel-dimensional vector. Otherwise, the relation predictor should produce embeddings that match the #rels. :param ignore_spans: If set to true, don't use span representation of the answers in the fact_choice_question_rep (default: true) :param ignore_relns: If set to true, don't use the relation labels/scores (no relation representations computed or scored) :param ignore_ann: If set to true, ignore all auxilliary annotation i.e. spans and relations Use the entire fact to compute answer span-based representations. No loss computed against the relation label. Note that latent relation representations will still be computed :param span_extractor: SpanExtractor used to compute answer span representation :param reln_ff: Feedforward used to calculate the relation prediction score :param attention: Attention function used :param encoder: Encoder used to convert seq of word embeddings into contextual (e.g. LSTM) representations :param initializer: Initializer used for parameters """ super(SpanRelationPredFactAttModel, self).__init__(vocab) self._text_field_embedder = text_field_embedder self._coverage_ff = coverage_ff if attention: self._attention = attention else: self._attention = DotProductMatrixAttention() if var_dropout > 0.0: self._var_dropout = InputVariationalDropout(var_dropout) else: self._var_dropout = None self._num_relations = vocab.get_vocab_size(namespace="relation_labels") self._ignore_spans = ignore_spans self._aggregate = aggregate self._scale_relation_loss = scale_relation_loss if span_extractor is None and not ignore_spans: raise ConfigurationError( "ignore_spans set to False but no span_extractor provided!") self._span_extractor = span_extractor self._relation_predictor = relation_predictor # simple projector if use_projection: self._relation_projector = torch.nn.Linear( self._relation_predictor.get_output_dim(), self._num_relations) else: self._relation_projector = None self._combination = combination if answer_choice_combination: self._answer_choice_combination = answer_choice_combination else: self._answer_choice_combination = combination if coverage_combination: self._coverage_combination = coverage_combination else: self._coverage_combination = combination self._ignore_ann = ignore_ann self._ignore_relns = ignore_relns if reln_ff is None and not ignore_relns: raise ConfigurationError( "ignore_relns set to False but no reln_ff provided!") self._reln_ff = reln_ff self._encoder = encoder self._aggr_label_accuracy = BooleanAccuracy() self._aggr_choice_accuracy = CategoricalAccuracy() self._relation_loss = torch.nn.BCEWithLogitsLoss() self._choice_loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': # type: ignore """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. In the configuration file, a file containing pretrained embeddings can be specified using the parameter ``"pretrained_file"``. It can be the path to a local file or an URL of a (cached) remote file. Two formats are supported: * hdf5 file - containing an embedding matrix in the form of a torch.Tensor; * text file - an utf-8 encoded text file with space separated fields:: [word] [dim 1] [dim 2] ... The text file can eventually be compressed with gzip, bz2, lzma or zip. You can even select a single file inside an archive containing multiple files using the URI:: "(archive_uri)#file_path_inside_the_archive" where ``archive_uri`` can be a file system path or a URL. For example:: "(http://nlp.stanford.edu/data/glove.twitter.27B.zip)#glove.twitter.27B.200d.txt" """ # pylint: disable=arguments-differ num_embeddings = params.pop_int('num_embeddings', None) vocab_namespace = params.pop("vocab_namespace", "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embeddings_file(pretrained_file, embedding_dim, vocab, vocab_namespace) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, word_dim: int, hidden_dim: int, action_dim: int, ratio_dim: int, num_layers: int, mces_metric: Metric = None, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, same_dropout_mask_per_instance: bool = True, input_dropout: float = 0.0, lemma_text_field_embedder: TextFieldEmbedder = None, pos_tag_embedding: Embedding = None, action_embedding: Embedding = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None ) -> None: super(TransitionParser, self).__init__(vocab, regularizer) self._primary_labeled_correct = 0 self._primary_unlabeled_correct = 0 self._primary_total_edges_predicted = 0 self._primary_total_edges_actual = 0 self._primary_exact_labeled_correct = 0 self._primary_exact_unlabeled_correct = 0 self._remote_labeled_correct = 0 self._remote_unlabeled_correct = 0 self._remote_total_edges_predicted = 0 self._remote_total_edges_actual = 0 self._remote_exact_labeled_correct = 0 self._remote_exact_unlabeled_correct = 0 self._total_sentences = 0 self.num_actions = vocab.get_vocab_size('actions') self.text_field_embedder = text_field_embedder self.lemma_text_field_embedder = lemma_text_field_embedder self._pos_tag_embedding = pos_tag_embedding self._mces_metric = mces_metric self.word_dim = word_dim self.hidden_dim = hidden_dim self.ratio_dim = ratio_dim self.action_dim = action_dim self.action_embedding = action_embedding if action_embedding is None: self.action_embedding = Embedding(num_embeddings=self.num_actions, embedding_dim=self.action_dim, trainable=False) # syntactic composition self.p_comp = torch.nn.Linear(self.hidden_dim * 5 + self.ratio_dim, self.word_dim) # parser state to hidden self.p_s2h = torch.nn.Linear(self.hidden_dim * 3 + self.ratio_dim, self.hidden_dim) # hidden to action self.p_act = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.num_actions) self.update_concept_node = torch.nn.Linear(self.hidden_dim + self.ratio_dim, self.word_dim) self.pempty_buffer_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.proot_stack_emb = torch.nn.Parameter(torch.randn(self.word_dim)) self.pempty_action_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self.pempty_stack_emb = torch.nn.Parameter(torch.randn(self.hidden_dim)) self._input_dropout = Dropout(input_dropout) self.buffer = StackRnn(input_size=self.word_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.stack = StackRnn(input_size=self.word_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) self.action_stack = StackRnn(input_size=self.action_dim, hidden_size=self.hidden_dim, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, same_dropout_mask_per_instance=same_dropout_mask_per_instance) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, aggregate_feedforward: FeedForward, span_extractor: Optional[SpanExtractor], he_e1wh_projector: FeedForward, e1_ca_projector: FeedForward, path_projector: FeedForward, allchoice_projector: FeedForward, question_projector: FeedForward, combined_q_projector: FeedForward, combined_s_projector: FeedForward, joint_encoder: JointEncoder, doc_aggregator: AttnPooling, choice_aggregator: AttnPooling, path_aggregator: FeedForward, path_loc_aggregator: str = 'max', question_encoder: Optional[Seq2SeqEncoder] = None, document_encoder: Optional[Seq2SeqEncoder] = None, choice_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), embeddings_dropout_value: Optional[float] = 0.0, allchoice_loc: bool = True, path_enc: bool = True, path_enc_doc_based: bool = True, path_enc_loc_based: bool = True, combine_scores: str = 'add_cat', # share_encoders: Optional[bool] = False ) -> None: super(QAMultiChoicePDCDPRAM, self).__init__(vocab) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._question_encoder = question_encoder # bidirectional RNN self._document_encoder = document_encoder self._choice_encoder = choice_encoder self._span_extractor = span_extractor self._allchoice_loc = allchoice_loc self._path_enc = path_enc self._path_enc_doc_based = path_enc_doc_based self._path_enc_loc_based = path_enc_loc_based if not self._allchoice_loc and not self._path_enc: raise ConfigurationError("One of Allchoice Location based or " "Path encoding must have to be set True!") if not self._path_enc: self._path_enc_loc_based = False self._path_enc_doc_based = False if self._path_enc: if not self._path_enc_doc_based and not self._path_enc_loc_based: raise ConfigurationError( "One of the path encoding component has to be True!") self._he_e1wh_projector = he_e1wh_projector self._e1_ca_projector = e1_ca_projector self._path_projector = path_projector self._allchoice_projector = allchoice_projector self._question_projector = question_projector self._combined_q_projector = combined_q_projector self._combined_s_projector = combined_s_projector self._aggregate_feedforward = aggregate_feedforward self._path_loc_aggregator = path_loc_aggregator self._choice_aggregator = choice_aggregator self._joint_encoder = joint_encoder self._doc_aggregator = doc_aggregator self._path_aggregator = path_aggregator if self._path_loc_aggregator == 'max': self._agg_func = masked_max elif self._path_loc_aggregator == 'mean': self._agg_func = masked_mean else: raise NotImplementedError self._combine_scores = combine_scores self._num_labels = vocab.get_vocab_size(namespace="labels") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.NLLLoss() initializer(self)
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', database_file='/atis/atis.db') -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers)