def __init__(self, vocab: Vocabulary, embedding_size: int, hidden_size: int, num_layers: int, splits: List[int] = [], dropout: float = 0.4, dropouth: float = 0.3, dropouti: float = 0.65, dropoute: float = 0.1, wdrop: float = 0.5, alpha: float = 2.0, beta: float = 1.0, tie_weights: bool = False, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(AwdLstmLanguageModel, self).__init__(vocab) # Model architecture self.embedding_size = embedding_size self.hidden_size = hidden_size self.num_layers = num_layers self.tie_weights = tie_weights self.splits = splits self.alpha = alpha self.beta = beta # Dropout stuff self.locked_dropout = LockedDropout() self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.dropout = dropout # Initialize empty state dict self._state: Optional[Dict[str, Any]] = None # Tokens are manually embedded instead of using a TokenEmbedder to make using # embedding_dropout easier. self.embedder = torch.nn.Embedding(vocab.get_vocab_size(namespace='tokens'), embedding_size) rnns: List[torch.nn.Module] = [] for i in range(num_layers): if i == 0: input_size = embedding_size else: input_size = hidden_size if (i == num_layers - 1) and tie_weights: output_size = embedding_size else: output_size = hidden_size rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns] self.rnns = torch.nn.ModuleList(rnns) self.decoder = torch.nn.Linear(output_size, vocab.get_vocab_size(namespace='tokens')) # Optionally tie weights if tie_weights: # pylint: disable=protected-access self.decoder.weight = self.embedder.weight initializer(self) self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) self.ppl = Ppl() self.upp = Ppl()
def __init__(self, vocab: Vocabulary, token_embedder: TextFieldEmbedder, entity_embedder: TextFieldEmbedder, relation_embedder: TextFieldEmbedder, knowledge_graph_path: str, use_shortlist: bool, hidden_size: int, num_layers: int, cutoff: int = 30, tie_weights: bool = False, dropout: float = 0.4, dropouth: float = 0.3, dropouti: float = 0.65, dropoute: float = 0.1, wdrop: float = 0.5, alpha: float = 2.0, beta: float = 1.0, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(KglmDisc, self).__init__(vocab) # We extract the `Embedding` layers from the `TokenEmbedders` to apply dropout later on. # pylint: disable=protected-access self._token_embedder = token_embedder._token_embedders['tokens'] self._entity_embedder = entity_embedder._token_embedders['entity_ids'] self._relation_embedder = relation_embedder._token_embedders['relations'] self._recent_entities = RecentEntities(cutoff=cutoff) self._knowledge_graph_lookup = KnowledgeGraphLookup(knowledge_graph_path, vocab=vocab) self._use_shortlist = use_shortlist self._hidden_size = hidden_size self._num_layers = num_layers self._cutoff = cutoff self._tie_weights = tie_weights # Dropout self._locked_dropout = LockedDropout() self._dropout = dropout self._dropouth = dropouth self._dropouti = dropouti self._dropoute = dropoute self._wdrop = wdrop # Regularization strength self._alpha = alpha self._beta = beta # RNN Encoders. entity_embedding_dim = entity_embedder.get_output_dim() token_embedding_dim = token_embedder.get_output_dim() self.entity_embedding_dim = entity_embedding_dim self.token_embedding_dim = token_embedding_dim rnns: List[torch.nn.Module] = [] for i in range(num_layers): if i == 0: input_size = token_embedding_dim else: input_size = hidden_size if i == num_layers - 1: output_size = token_embedding_dim + 2 * entity_embedding_dim else: output_size = hidden_size rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns] self.rnns = torch.nn.ModuleList(rnns) # Various linear transformations. self._fc_mention_type = torch.nn.Linear( in_features=token_embedding_dim, out_features=4) if not use_shortlist: self._fc_new_entity = torch.nn.Linear( in_features=entity_embedding_dim, out_features=vocab.get_vocab_size('entity_ids')) if tie_weights: self._fc_new_entity.weight = self._entity_embedder.weight self._state: Optional[Dict[str, Any]] = None # Metrics self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) self._avg_mention_type_loss = Average() self._avg_new_entity_loss = Average() self._avg_knowledge_graph_entity_loss = Average() self._new_mention_f1 = F1Measure(positive_label=1) self._kg_mention_f1 = F1Measure(positive_label=2) self._new_entity_accuracy = CategoricalAccuracy() self._new_entity_accuracy20 = CategoricalAccuracy(top_k=20) self._parent_ppl = Ppl() self._relation_ppl = Ppl() initializer(self)
def __init__(self, vocab: Vocabulary, token_embedder: TextFieldEmbedder, entity_embedder: TextFieldEmbedder, alias_encoder: Seq2SeqEncoder, hidden_size: int, num_layers: int, dropout: float = 0.4, dropouth: float = 0.3, dropouti: float = 0.65, dropoute: float = 0.1, wdrop: float = 0.5, alpha: float = 2.0, beta: float = 1.0, tie_weights: bool = False, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(AliasCopynet, self).__init__(vocab) # Model architecture - Note: we need to extract the `Embedding` layers from the # `TokenEmbedders` to apply dropout later on. # pylint: disable=protected-access self._token_embedder = token_embedder._token_embedders['tokens'] self._entity_embedder = entity_embedder._token_embedders['entity_ids'] self._alias_encoder = alias_encoder self._hidden_size = hidden_size self._num_layers = num_layers self._tie_weights = tie_weights # Dropout self._locked_dropout = LockedDropout() self._dropout = dropout self._dropouth = dropouth self._dropouti = dropouti self._dropoute = dropoute self._wdrop = wdrop # Regularization strength self._alpha = alpha self._beta = beta # RNN Encoders. TODO: Experiment with seperate encoder for aliases. entity_embedding_dim = entity_embedder.get_output_dim() token_embedding_dim = entity_embedder.get_output_dim() assert entity_embedding_dim == token_embedding_dim embedding_dim = token_embedding_dim rnns: List[torch.nn.Module] = [] for i in range(num_layers): if i == 0: input_size = token_embedding_dim else: input_size = hidden_size if (i == num_layers - 1) and tie_weights: output_size = token_embedding_dim else: output_size = hidden_size rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns] self.rnns = torch.nn.ModuleList(rnns) # Various linear transformations. self._fc_mention = torch.nn.Linear( in_features=embedding_dim, out_features=2) self._fc_entity = torch.nn.Linear( in_features=embedding_dim, out_features=embedding_dim) self._fc_condense = torch.nn.Linear( in_features=2 * embedding_dim, out_features=embedding_dim) self._fc_generate = torch.nn.Linear( in_features=embedding_dim, out_features=vocab.get_vocab_size('tokens')) self._fc_copy = torch.nn.Linear( in_features=embedding_dim, out_features=embedding_dim) if tie_weights: self._fc_generate.weight = self._token_embedder.weight self._state: Optional[Dict[str, Any]]= None # Metrics # self._avg_mention_loss = Average() # self._avg_entity_loss = Average() # self._avg_vocab_loss = Average() self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) self._ppl = Ppl() self._upp = Ppl() self._kg_ppl = Ppl() # Knowledge-graph ppl self._bg_ppl = Ppl() # Background ppl initializer(self)