def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, rnn_mode, hidden_size, num_layers, num_labels, arc_space, type_space, embedd_word=None, embedd_char=None, embedd_pos=None, p_in=0.33, p_out=0.33, p_rnn=(0.33, 0.33), pos=True, activation='elu'): super(DeepBiAffine, self).__init__() self.word_embed = nn.Embedding(num_words, word_dim, _weight=embedd_word, padding_idx=1) self.pos_embed = nn.Embedding(num_pos, pos_dim, _weight=embedd_pos, padding_idx=1) if pos else None self.char_embed = nn.Embedding(num_chars, char_dim, _weight=embedd_char, padding_idx=1) self.char_cnn = CharCNN(2, char_dim, char_dim, hidden_channels=char_dim * 4, activation=activation) self.dropout_in = nn.Dropout2d(p=p_in) self.dropout_out = nn.Dropout2d(p=p_out) self.num_labels = num_labels if rnn_mode == 'RNN': RNN = VarRNN elif rnn_mode == 'LSTM': RNN = VarLSTM elif rnn_mode == 'FastLSTM': RNN = VarFastLSTM elif rnn_mode == 'GRU': RNN = VarGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) dim_enc = word_dim + char_dim if pos: dim_enc += pos_dim self.rnn = RNN(dim_enc, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=p_rnn) out_dim = hidden_size * 2 self.arc_h = nn.Linear(out_dim, arc_space) self.arc_c = nn.Linear(out_dim, arc_space) self.biaffine = BiAffine(arc_space, arc_space) self.type_h = nn.Linear(out_dim, type_space) self.type_c = nn.Linear(out_dim, type_space) self.bilinear = BiLinear(type_space, type_space, self.num_labels) assert activation in ['elu', 'tanh'] if activation == 'elu': self.activation = nn.ELU(inplace=True) else: self.activation = nn.Tanh() self.criterion = nn.CrossEntropyLoss(reduction='none') self.reset_parameters(embedd_word, embedd_char, embedd_pos)
def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, rnn_mode, hidden_size, encoder_layers, decoder_layers, num_labels, arc_space, type_space, embedd_word=None, embedd_char=None, embedd_pos=None, p_in=0.33, p_out=0.33, p_rnn=(0.33, 0.33), pos=True, prior_order='inside_out', grandPar=False, sibling=False, activation='elu'): super(StackPtrNet, self).__init__() self.word_embed = nn.Embedding(num_words, word_dim, _weight=embedd_word, padding_idx=1) self.pos_embed = nn.Embedding( num_pos, pos_dim, _weight=embedd_pos, padding_idx=1) if pos else None self.char_embed = nn.Embedding(num_chars, char_dim, _weight=embedd_char, padding_idx=1) self.char_cnn = CharCNN(2, char_dim, char_dim, hidden_channels=char_dim * 4, activation=activation) self.dropout_in = nn.Dropout2d(p=p_in) self.dropout_out = nn.Dropout2d(p=p_out) self.num_labels = num_labels if prior_order in ['deep_first', 'shallow_first']: self.prior_order = PriorOrder.DEPTH elif prior_order == 'inside_out': self.prior_order = PriorOrder.INSIDE_OUT elif prior_order == 'left2right': self.prior_order = PriorOrder.LEFT2RIGTH else: raise ValueError('Unknown prior order: %s' % prior_order) self.grandPar = grandPar self.sibling = sibling if rnn_mode == 'RNN': RNN_ENCODER = VarRNN RNN_DECODER = VarRNN elif rnn_mode == 'LSTM': RNN_ENCODER = VarLSTM RNN_DECODER = VarLSTM elif rnn_mode == 'FastLSTM': RNN_ENCODER = VarFastLSTM RNN_DECODER = VarFastLSTM elif rnn_mode == 'GRU': RNN_ENCODER = VarGRU RNN_DECODER = VarGRU else: raise ValueError('Unknown RNN mode: %s' % rnn_mode) dim_enc = word_dim + char_dim if pos: dim_enc += pos_dim self.encoder_layers = encoder_layers self.encoder = RNN_ENCODER(dim_enc, hidden_size, num_layers=encoder_layers, batch_first=True, bidirectional=True, dropout=p_rnn) dim_dec = hidden_size // 2 self.src_dense = nn.Linear(2 * hidden_size, dim_dec) self.decoder_layers = decoder_layers self.decoder = RNN_DECODER(dim_dec, hidden_size, num_layers=decoder_layers, batch_first=True, bidirectional=False, dropout=p_rnn) self.hx_dense = nn.Linear(2 * hidden_size, hidden_size) self.arc_h = nn.Linear(hidden_size, arc_space) # arc dense for decoder self.arc_c = nn.Linear(hidden_size * 2, arc_space) # arc dense for encoder self.biaffine = BiAffine(arc_space, arc_space) self.type_h = nn.Linear(hidden_size, type_space) # type dense for decoder self.type_c = nn.Linear(hidden_size * 2, type_space) # type dense for encoder self.bilinear = BiLinear(type_space, type_space, self.num_labels) assert activation in ['elu', 'tanh'] if activation == 'elu': self.activation = nn.ELU(inplace=True) else: self.activation = nn.Tanh() self.criterion = nn.CrossEntropyLoss(reduction='none') self.reset_parameters(embedd_word, embedd_char, embedd_pos)