def __init__(self, input_size, label_num, dropout, hidden_size=300, eval_transitions=False): super().__init__() self.label_num = label_num self.mlp_rel_h = MLP(input_size, hidden_size, dropout, activation=nn.ReLU) self.mlp_rel_d = MLP(input_size, hidden_size, dropout, activation=nn.ReLU) self.rel_atten = Bilinear(hidden_size, hidden_size, label_num, bias_x=True, bias_y=True, expand=True) self.rel_crf = CRF(label_num) self.eval_transitions = eval_transitions
def __init__(self, input_size, label_num, dropout, arc_hidden_size, rel_hidden_size, **kwargs): super(Graph, self).__init__(input_size, label_num, dropout) arc_dropout = kwargs.pop('arc_dropout', dropout) rel_dropout = kwargs.pop('rel_dropout', dropout) activation = kwargs.pop('activation', {}) self.mlp_arc_h = MLP(input_size, arc_hidden_size, arc_dropout, **activation) self.mlp_arc_d = MLP(input_size, arc_hidden_size, arc_dropout, **activation) self.mlp_rel_h = MLP(input_size, rel_hidden_size, rel_dropout, **activation) self.mlp_rel_d = MLP(input_size, rel_hidden_size, rel_dropout, **activation) self.arc_atten = Bilinear(arc_hidden_size, arc_hidden_size, 1, bias_x=True, bias_y=False, expand=True) self.rel_atten = Bilinear(rel_hidden_size, rel_hidden_size, label_num, bias_x=True, bias_y=True, expand=True)
def __init__(self, input_size, label_num, dropout: float = 0.2, hidden_size=None, **kwargs): super().__init__(input_size, label_num, dropout) activation = kwargs.pop('activation', {'LeakyReLU': {}}) self.mlp_rel_h = MLP(input_size, hidden_size, dropout=dropout, **activation) self.mlp_rel_d = MLP(input_size, hidden_size, dropout=dropout, **activation) self.biaffine = nn.Bilinear(hidden_size, hidden_size, label_num, bias_x=True, bias_y=True, expand=True) self.crf = nn.CRF(label_num)
def __init__(self, input_size, label_num, dropout, arc_hidden_size=500, rel_hidden_size=100, loss_interpolation=0.4, loss_func=dep_loss): super().__init__() self.label_num = label_num self.loss_interpolation = loss_interpolation self.mlp_arc_h = MLP(input_size, arc_hidden_size, dropout, activation=nn.ReLU) self.mlp_arc_d = MLP(input_size, arc_hidden_size, dropout, activation=nn.ReLU) self.mlp_rel_h = MLP(input_size, rel_hidden_size, dropout, activation=nn.ReLU) self.mlp_rel_d = MLP(input_size, rel_hidden_size, dropout, activation=nn.ReLU) self.arc_atten = Bilinear(arc_hidden_size, arc_hidden_size, 1, bias_x=True, bias_y=False, expand=True) self.rel_atten = Bilinear(rel_hidden_size, rel_hidden_size, label_num, bias_x=True, bias_y=True, expand=True) self.loss_func = loss_func
def __init__(self, input_size, label_num, dropout, hidden_size=300): super().__init__() self.label_num = label_num self.mlp_rel_h = MLP([input_size, hidden_size], output_dropout=dropout, output_activation=nn.ReLU) self.mlp_rel_d = MLP([input_size, hidden_size], output_dropout=dropout, output_activation=nn.ReLU) self.rel_atten = Bilinear(hidden_size, hidden_size, label_num, bias_x=True, bias_y=True, expand=True) self.crf = CRF(label_num)
def __init__(self, input_size, hidden_sizes, num_labels, dropout, use_cls=False, use_sep=False, use_crf=False, crf_reduction='sum'): super().__init__() self.use_cls = use_cls self.use_sep = use_sep self.linear = MLP([input_size, *hidden_sizes, num_labels], dropout=dropout) if use_crf: self.crf = CRF(num_labels, batch_first=True) self.crf_reduction = crf_reduction else: self.crf = None
class MLPClassfier(nn.Module): def __init__(self, input_size, hidden_sizes, num_labels, dropout, use_cls=False, use_sep=False, use_crf=False, crf_reduction='sum'): super().__init__() self.use_cls = use_cls self.use_sep = use_sep self.linear = MLP([input_size, *hidden_sizes, num_labels], dropout=dropout) if use_crf: self.crf = CRF(num_labels, batch_first=True) self.crf_reduction = crf_reduction else: self.crf = None def forward(self, input, attention_mask=None, word_index=None, word_attention_mask=None, labels=None, is_processed=False) -> TokenClassifierResult: if not is_processed: if not self.use_cls: input = input[:, 1:, :] if not self.use_cls: input = input[:, :-1, :] if word_attention_mask is None: assert word_index is None bias = int(not self.use_cls) + int(not self.use_sep) word_attention_mask = attention_mask[:, bias:] == 1 if word_index is not None: input = torch.gather(input, dim=1, index=word_index.unsqueeze(-1).expand( -1, -1, input.size(-1))) logits = self.linear.forward(input) loss = None if labels is not None: loss_fct = nn.CrossEntropyLoss() # Only keep active parts of the loss if word_attention_mask is not None and self.crf is not None: logits = F.log_softmax(logits, dim=-1) loss = -self.crf.forward(logits, labels, word_attention_mask, reduction=self.crf_reduction) elif word_attention_mask is not None: active_loss = word_attention_mask.view(-1) active_logits = logits.view( -1, self.classifier.out_features)[active_loss] active_labels = labels.view(-1)[active_loss] loss = loss_fct(active_logits, active_labels) else: loss = loss_fct(logits.view(-1, self.classifier.out_features), labels.view(-1)) decoded = None if not self.training and self.crf is not None: decoded = self.crf.decode(emissions=logits, mask=word_attention_mask) if self.use_cls: decoded = [sent[1:] for sent in decoded] labels = labels[:, 1:] if self.use_sep: decoded = [sent[:-1] for sent in decoded] labels = labels[:, :-1] return TokenClassifierResult(loss=loss, logits=logits, decoded=decoded, labels=labels)
def __init__(self, input_size, label_num, dropout, lstm_hidden_size=600, lstm_num_layers=3, bin_hidden_size=150, arc_hidden_size=600, rel_hidden_size=600, loss_interpolation=0.4, inference='mfvi', max_iter=3): super().__init__() self.label_num = label_num self.loss_interpolation = loss_interpolation if lstm_num_layers > 0: self.lstm = LSTM(input_size=input_size, hidden_size=lstm_hidden_size, num_layers=lstm_num_layers, bidirectional=True, dropout=dropout) self.lstm_dropout = SharedDropout(p=dropout) hidden_size = lstm_hidden_size * 2 else: self.lstm = None hidden_size = input_size self.mlp_bin_d = MLP([hidden_size, bin_hidden_size], output_dropout=dropout) self.mlp_bin_h = MLP([hidden_size, bin_hidden_size], output_dropout=dropout) self.mlp_bin_g = MLP([hidden_size, bin_hidden_size], output_dropout=dropout) self.mlp_arc_h = MLP([hidden_size, arc_hidden_size], output_dropout=dropout) self.mlp_arc_d = MLP([hidden_size, arc_hidden_size], output_dropout=dropout) self.mlp_rel_h = MLP([hidden_size, rel_hidden_size], output_dropout=dropout) self.mlp_rel_d = MLP([hidden_size, rel_hidden_size], output_dropout=dropout) self.sib_attn = Triaffine(bin_hidden_size, bias_x=True, bias_y=True) self.cop_attn = Triaffine(bin_hidden_size, bias_x=True, bias_y=True) self.grd_attn = Triaffine(bin_hidden_size, bias_x=True, bias_y=True) self.arc_atten = Bilinear(arc_hidden_size, arc_hidden_size, 1, bias_x=True, bias_y=True, expand=True) self.rel_atten = Bilinear(rel_hidden_size, rel_hidden_size, label_num, bias_x=True, bias_y=True, expand=True) self.vi = (MFVISemanticDependency if inference == 'mfvi' else LBPSemanticDependency)(max_iter)