def __init__( self, embeddings: List[TokenEmbeddings], hidden_size=128, reproject_words: bool = True, reproject_words_dimension: int = None, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, ): """ code mainly copy-pasted from DocumentRNNEmbeddings-class """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings) self.reproject_words = reproject_words self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.static_embeddings = False self.__embedding_length: int = hidden_size self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension ) self.cnn = CNN_Text(input_dim=self.embeddings_dimension, output_dim=hidden_size, kernel_sizes=[1] ) self.name = "document_" + self.cnn._get_name() # dropouts if locked_dropout > 0.0: self.dropout: torch.nn.Module = LockedDropout(locked_dropout) else: self.dropout = torch.nn.Dropout(dropout) self.use_word_dropout: bool = word_dropout > 0.0 if self.use_word_dropout: self.word_dropout = WordDropout(word_dropout) torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device)
def __init__( self, embeddings: List[TokenEmbeddings], hidden_size=128, rnn_layers=1, reproject_words: bool = True, reproject_words_dimension: int = None, bidirectional: bool = False, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, rnn_type="GRU", fine_tune: bool = True, ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param hidden_size: the number of hidden states in the rnn :param rnn_layers: the number of layers for the rnn :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param bidirectional: boolean value, indicating whether to use a bidirectional rnn or not :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used :param rnn_type: 'GRU' or 'LSTM' """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings) self.rnn_type = rnn_type self.reproject_words = reproject_words self.bidirectional = bidirectional self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.static_embeddings = False if fine_tune else True self.__embedding_length: int = hidden_size if self.bidirectional: self.__embedding_length *= 4 self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension ) # bidirectional RNN on top of embedding layer if rnn_type == "LSTM": self.rnn = torch.nn.LSTM( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) else: self.rnn = torch.nn.GRU( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) self.name = "document_" + self.rnn._get_name() # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = ( LockedDropout(locked_dropout) if locked_dropout > 0.0 else None ) self.word_dropout = WordDropout(word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()
def __init__(self, embeddings: List[TokenEmbeddings], hidden_size=128, rnn_layers=1, reproject_words: bool = True, reproject_words_dimension: int = None, bidirectional: bool = True, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, fine_tune: bool = True, attention_size=100): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param hidden_size: the number of hidden states in the rnn :param rnn_layers: the number of layers for the rnn :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param bidirectional: boolean value, indicating whether to use a bidirectional rnn or not :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embeddings) self.reproject_words = reproject_words self.bidirectional = bidirectional self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.static_embeddings = False if fine_tune else True self.__embedding_length: int = hidden_size if self.bidirectional: self.__embedding_length *= 2 self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension) self.attention_size = attention_size # Word level encoder self.rnn = torch.nn.GRU( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) # One-layer MLP to get hidden representation of word annotation if self.bidirectional: self.word_attention = torch.nn.Linear(2 * hidden_size, self.attention_size) else: self.word_attention = torch.nn.Linear(hidden_size, self.attention_size) # Word level context vector to measure importance of word: forward method does dot-product for us # --> output = input.matmul(weight.t()) self.word_context_vector = torch.nn.Linear(self.attention_size, 1, bias=False) self.name = "document_gru" # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = (LockedDropout(locked_dropout) if locked_dropout > 0.0 else None) self.word_dropout = WordDropout( word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()
def __init__( self, embeddings: List[TokenEmbeddings], kernels=((100, 3), (100, 4), (100, 5)), reproject_words: bool = True, reproject_words_dimension: int = None, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, fine_tune: bool = True, ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param kernels: list of (number of kernels, kernel size) :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings) self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.kernels = kernels self.reproject_words = reproject_words self.static_embeddings = False if fine_tune else True self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension ) # CNN self.__embedding_length: int = sum([kernel_num for kernel_num, kernel_size in self.kernels]) self.convs = torch.nn.ModuleList( [ torch.nn.Conv1d(self.embeddings_dimension, kernel_num, kernel_size) for kernel_num, kernel_size in self.kernels ] ) self.pool = torch.nn.AdaptiveMaxPool1d(1) self.name = "document_cnn" # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = ( LockedDropout(locked_dropout) if locked_dropout > 0.0 else None ) self.word_dropout = WordDropout(word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()
def __init__( self, hidden_size: int, embeddings: TokenEmbeddings, tag_dictionary: Dictionary, tag_type: str, # Canasai's addition: column_format: Dict[int, str], use_crf: bool = True, use_rnn: bool = True, rnn_layers: int = 1, # Canasai's addition: bidirectional: bool = True, dropout: float = 0.0, word_dropout: float = 0.0, locked_dropout: float = 0.5, train_initial_hidden_state: bool = False, rnn_type: str = "LSTM", pickle_module: str = "pickle", use_attn: bool = False, attn_type: str = "self", scaling: str = "no", num_heads: int = 8, pooling_operation="none", use_sent_query: bool = False, ): """ Initializes a SequenceTagger :param hidden_size: number of hidden states in RNN :param embeddings: word embeddings used in tagger :param tag_dictionary: dictionary of tags you want to predict :param tag_type: string identifier for tag type :param use_crf: if True use CRF decoder, else project directly to tag space :param use_rnn: if True use RNN layer, otherwise use word embeddings directly :param rnn_layers: number of RNN layers :param dropout: dropout probability :param word_dropout: word dropout probability :param locked_dropout: locked dropout probability :param train_initial_hidden_state: if True, trains initial hidden state of RNN """ super(SequenceTagger, self).__init__() self.use_rnn = use_rnn self.hidden_size = hidden_size # Canasai's addition: num_directions = 2 if bidirectional else 1 assert hidden_size % num_directions == 0 hidden_size = hidden_size // num_directions self.use_crf: bool = use_crf self.rnn_layers: int = rnn_layers self.trained_epochs: int = 0 self.embeddings = embeddings # set the dictionaries self.tag_dictionary: Dictionary = tag_dictionary self.tag_type: str = tag_type self.tagset_size: int = len(tag_dictionary) # Canasai's addition self.column_format: Dict = column_format # initialize the network architecture self.nlayers: int = rnn_layers self.hidden_word = None # dropouts self.use_dropout: float = dropout self.use_word_dropout: float = word_dropout self.use_locked_dropout: float = locked_dropout self.pickle_module = pickle_module # if dropout > 0.0: # self.dropout = torch.nn.Dropout(dropout) self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None # if word_dropout > 0.0: # self.word_dropout = flair.nn.WordDropout(word_dropout) self.word_dropout = WordDropout(word_dropout) if word_dropout > 0.0 else None # if locked_dropout > 0.0: # self.locked_dropout = flair.nn.LockedDropout(locked_dropout) self.locked_dropout = ( LockedDropout(locked_dropout) if locked_dropout > 0.0 else None ) rnn_input_dim: int = self.embeddings.embedding_length self.relearn_embeddings: bool = True if self.relearn_embeddings: self.embedding2nn = torch.nn.Linear(rnn_input_dim, rnn_input_dim) self.train_initial_hidden_state = train_initial_hidden_state # Canasai's comment out: self.bidirectional = True self.bidirectional = bidirectional self.rnn_type = rnn_type # bidirectional LSTM on top of embedding layer if self.use_rnn: num_directions = 2 if self.bidirectional else 1 if self.rnn_type in ["LSTM", "GRU"]: self.rnn = getattr(torch.nn, self.rnn_type)( rnn_input_dim, hidden_size, num_layers=self.nlayers, dropout=0.0 if self.nlayers == 1 else 0.5, bidirectional=True, batch_first=True, ) # Create initial hidden state and initialize it if self.train_initial_hidden_state: self.hs_initializer = torch.nn.init.xavier_normal_ self.lstm_init_h = Parameter( torch.randn(self.nlayers * num_directions, self.hidden_size), # torch.randn(self.nlayers * num_directions, hidden_size), requires_grad=True, ) self.lstm_init_c = Parameter( torch.randn(self.nlayers * num_directions, self.hidden_size), # torch.randn(self.nlayers * num_directions, hidden_size), requires_grad=True, ) # TODO: Decide how to initialize the hidden state variables # self.hs_initializer(self.lstm_init_h) # self.hs_initializer(self.lstm_init_c) # final linear map to tag space self.linear = torch.nn.Linear( hidden_size * num_directions, len(tag_dictionary) ) else: self.linear = torch.nn.Linear( self.embeddings.embedding_length, len(tag_dictionary) ) # Canasai's addition: self.use_attn: bool = use_attn self.attn_type: str = attn_type self.scaling: bool = scaling self.pooling_operation = pooling_operation self.use_sent_query = use_sent_query self.encoder_final = None self.memory_bank = None self.self_attn = None self.soft_attn = None if self.use_attn: if self.attn_type == "self": self.self_attn = MultiHeadedAttention( num_heads, num_directions * hidden_size, scaling=self.scaling ) elif self.attn_type == "soft": self.soft_attn = SoftAttention(num_directions * hidden_size) else: raise NotImplementedError if self.use_crf: self.transitions = torch.nn.Parameter( torch.randn(self.tagset_size, self.tagset_size) ) self.transitions.detach()[ self.tag_dictionary.get_idx_for_item(START_TAG), : ] = -10000 self.transitions.detach()[ :, self.tag_dictionary.get_idx_for_item(STOP_TAG) ] = -10000 self.to(flair.device)