def __init__( self, embeddings: List[TokenEmbeddings], fine_tune_mode="linear", pooling: str = "mean", ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param pooling: a string which can any value from ['mean', 'max', 'min'] """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embeddings) self.__embedding_length = self.embeddings.embedding_length # optional fine-tuning on top of embedding layer self.fine_tune_mode = fine_tune_mode if self.fine_tune_mode in ["nonlinear", "linear"]: self.embedding_flex = torch.nn.Linear(self.embedding_length, self.embedding_length, bias=False) self.embedding_flex.weight.data.copy_( torch.eye(self.embedding_length)) if self.fine_tune_mode in ["nonlinear"]: self.embedding_flex_nonlinear = torch.nn.ReLU( self.embedding_length) self.embedding_flex_nonlinear_map = torch.nn.Linear( self.embedding_length, self.embedding_length) self.__embedding_length: int = self.embeddings.embedding_length self.to(flair.device) self.pooling = pooling if self.pooling == "mean": self.pool_op = torch.mean elif pooling == "max": self.pool_op = torch.max elif pooling == "min": self.pool_op = torch.min else: raise ValueError( f"Pooling operation for {self.mode!r} is not defined") self.name: str = f"document_{self.pooling}"
def __init__( self, embeddings: Union[TokenEmbeddings, List[TokenEmbeddings]], fine_tune_mode: str = "none", pooling: str = "mean", ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param fine_tune_mode: if set to "linear" a trainable layer is added, if set to "nonlinear", a nonlinearity is added as well. Set this to make the pooling trainable. :param pooling: a string which can any value from ['mean', 'max', 'min'] """ super().__init__() if isinstance(embeddings, TokenEmbeddings): embeddings = [embeddings] self.embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embeddings) self.__embedding_length = self.embeddings.embedding_length # optional fine-tuning on top of embedding layer self.fine_tune_mode = fine_tune_mode if self.fine_tune_mode in ["nonlinear", "linear"]: self.embedding_flex = torch.nn.Linear(self.embedding_length, self.embedding_length, bias=False) self.embedding_flex.weight.data.copy_( torch.eye(self.embedding_length)) if self.fine_tune_mode in ["nonlinear"]: self.embedding_flex_nonlinear = torch.nn.ReLU() self.embedding_flex_nonlinear_map = torch.nn.Linear( self.embedding_length, self.embedding_length) self.__embedding_length = self.embeddings.embedding_length self.to(flair.device) if pooling not in ["min", "max", "mean"]: raise ValueError( f"Pooling operation for {self.mode!r} is not defined") self.pooling = pooling self.name: str = f"document_{self.pooling}"
def __init__( self, embeddings: List[TokenEmbeddings], hidden_size=128, rnn_layers=1, reproject_words: bool = True, reproject_words_dimension: int = None, bidirectional: bool = False, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, rnn_type="GRU", fine_tune: bool = True, ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param hidden_size: the number of hidden states in the rnn :param rnn_layers: the number of layers for the rnn :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param bidirectional: boolean value, indicating whether to use a bidirectional rnn or not :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used :param rnn_type: 'GRU' or 'LSTM' """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings) self.rnn_type = rnn_type self.reproject_words = reproject_words self.bidirectional = bidirectional self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.static_embeddings = False if fine_tune else True self.__embedding_length: int = hidden_size if self.bidirectional: self.__embedding_length *= 4 self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension ) # bidirectional RNN on top of embedding layer if rnn_type == "LSTM": self.rnn = torch.nn.LSTM( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) else: self.rnn = torch.nn.GRU( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) self.name = "document_" + self.rnn._get_name() # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = ( LockedDropout(locked_dropout) if locked_dropout > 0.0 else None ) self.word_dropout = WordDropout(word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()
def __init__( self, embeddings: List[TokenEmbeddings], kernels=((100, 3), (100, 4), (100, 5)), reproject_words: bool = True, reproject_words_dimension: int = None, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, fine_tune: bool = True, ): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param kernels: list of (number of kernels, kernel size) :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embeddings) self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.kernels = kernels self.reproject_words = reproject_words self.static_embeddings = False if fine_tune else True self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension ) # CNN self.__embedding_length: int = sum([kernel_num for kernel_num, kernel_size in self.kernels]) self.convs = torch.nn.ModuleList( [ torch.nn.Conv1d(self.embeddings_dimension, kernel_num, kernel_size) for kernel_num, kernel_size in self.kernels ] ) self.pool = torch.nn.AdaptiveMaxPool1d(1) self.name = "document_cnn" # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = ( LockedDropout(locked_dropout) if locked_dropout > 0.0 else None ) self.word_dropout = WordDropout(word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()
def __init__(self, embeddings: List[TokenEmbeddings], hidden_size=128, rnn_layers=1, reproject_words: bool = True, reproject_words_dimension: int = None, bidirectional: bool = True, dropout: float = 0.5, word_dropout: float = 0.0, locked_dropout: float = 0.0, fine_tune: bool = True, attention_size=100): """The constructor takes a list of embeddings to be combined. :param embeddings: a list of token embeddings :param hidden_size: the number of hidden states in the rnn :param rnn_layers: the number of layers for the rnn :param reproject_words: boolean value, indicating whether to reproject the token embeddings in a separate linear layer before putting them into the rnn or not :param reproject_words_dimension: output dimension of reprojecting token embeddings. If None the same output dimension as before will be taken. :param bidirectional: boolean value, indicating whether to use a bidirectional rnn or not :param dropout: the dropout value to be used :param word_dropout: the word dropout value to be used, if 0.0 word dropout is not used :param locked_dropout: the locked dropout value to be used, if 0.0 locked dropout is not used """ super().__init__() self.embeddings: StackedEmbeddings = StackedEmbeddings( embeddings=embeddings) self.reproject_words = reproject_words self.bidirectional = bidirectional self.length_of_all_token_embeddings: int = self.embeddings.embedding_length self.static_embeddings = False if fine_tune else True self.__embedding_length: int = hidden_size if self.bidirectional: self.__embedding_length *= 2 self.embeddings_dimension: int = self.length_of_all_token_embeddings if self.reproject_words and reproject_words_dimension is not None: self.embeddings_dimension = reproject_words_dimension self.word_reprojection_map = torch.nn.Linear( self.length_of_all_token_embeddings, self.embeddings_dimension) self.attention_size = attention_size # Word level encoder self.rnn = torch.nn.GRU( self.embeddings_dimension, hidden_size, num_layers=rnn_layers, bidirectional=self.bidirectional, batch_first=True, ) # One-layer MLP to get hidden representation of word annotation if self.bidirectional: self.word_attention = torch.nn.Linear(2 * hidden_size, self.attention_size) else: self.word_attention = torch.nn.Linear(hidden_size, self.attention_size) # Word level context vector to measure importance of word: forward method does dot-product for us # --> output = input.matmul(weight.t()) self.word_context_vector = torch.nn.Linear(self.attention_size, 1, bias=False) self.name = "document_gru" # dropouts self.dropout = torch.nn.Dropout(dropout) if dropout > 0.0 else None self.locked_dropout = (LockedDropout(locked_dropout) if locked_dropout > 0.0 else None) self.word_dropout = WordDropout( word_dropout) if word_dropout > 0.0 else None torch.nn.init.xavier_uniform_(self.word_reprojection_map.weight) self.to(flair.device) self.eval()