def __init__(self, config, dataset): super().__init__(config) input_size = dataset.get_input_vocab_size() output_size = len(dataset.vocabs.label) input_seqlen = dataset.get_max_seqlen() self.embedding_dropout = nn.Dropout(self.config.dropout) self.embedding = nn.Embedding(input_size, self.config.embedding_size) nn.init.xavier_uniform_(self.embedding.weight) self.cnn = Conv1DEncoder(self.config.embedding_size, self.config.cnn_output_size, input_seqlen, self.config.conv_layers) self.mlp = MLP( input_size=2 * self.config.cnn_output_size, layers=self.config.mlp_layers, nonlinearity=self.config.mlp_nonlinearity, output_size=output_size, ) self.criterion = nn.CrossEntropyLoss()
def __init__(self, config, dataset): super().__init__(config) self.dataset = dataset model_name = getattr(self.config, 'bert_model', 'bert-base-multilingual-cased') self.dropout = nn.Dropout(self.config.dropout) self.bert = BERTEmbedder(model_name, self.config.layer, use_cache=self.config.use_cache) if 'large' in model_name: hidden = 1024 else: hidden = 768 self.mlp = MLP( input_size=2 * hidden, layers=self.config.mlp_layers, nonlinearity=self.config.mlp_nonlinearity, output_size=2, ) self.criterion = nn.CrossEntropyLoss()
def __init__(self, config, dataset): super().__init__(config) input_size = len(dataset.vocabs.input) output_size = len(dataset.vocabs.label) self.lstm = LSTMEncoder( input_size, output_size, lstm_hidden_size=self.config.hidden_size, lstm_num_layers=self.config.num_layers, lstm_dropout=self.config.dropout, embedding_size=self.config.embedding_size, embedding_dropout=self.config.dropout, ) hidden = self.config.hidden_size self.mlp = MLP( input_size=hidden, layers=self.config.mlp_layers, nonlinearity=self.config.mlp_nonlinearity, output_size=output_size, ) self.criterion = nn.CrossEntropyLoss()
def __init__(self, config, dataset): super().__init__(config) self.dataset = dataset self.output_size = len(dataset.vocabs.pos) model_name = getattr(self.config, 'bert_model', 'bert-base-multilingual-cased') self.bert = BertModel.from_pretrained(model_name) self.bert_layer = self.config.bert_layer bert_size = 768 if 'base' in model_name else 1024 n_layer = 12 if 'base' in model_name else 24 if self.bert_layer == 'weighted_sum': self.bert_weights = nn.Parameter( torch.ones(n_layer, dtype=torch.float)) if hasattr(self.config, 'lstm_size'): self.lstm = nn.LSTM(bert_size, self.config.lstm_size, batch_first=True, dropout=self.config.dropout, num_layers=self.config.lstm_num_layers, bidirectional=True) hidden_size = self.config.lstm_size * 2 else: self.lstm = None hidden_size = bert_size if self.bert_layer == 'weighted_sum': self.bert_weights = nn.Parameter( torch.ones(n_layer, dtype=torch.float)) self.output_proj = nn.Linear(hidden_size, self.output_size) self.output_proj = MLP( input_size=bert_size, layers=self.config.mlp_layers, nonlinearity=self.config.mlp_nonlinearity, output_size=self.output_size, ) # ignore <pad> = 3 self.criterion = nn.CrossEntropyLoss( ignore_index=self.dataset.vocabs.pos['<pad>']) for param in self.bert.parameters(): param.requires_grad = False