def __init__(self, src_vocab, rel_vocab, input_size, hidden_size): super(Encoder, self).__init__() self.src_vocab = src_vocab # Use PAD self.embeddings = Embeddings(num_embeddings=src_vocab.max_n_words, embedding_dim=input_size, dropout=0.0, padding_idx=src_vocab.PAD, add_position_embedding=False) self.rel_vocab = rel_vocab self.rel_embedding = Embeddings(num_embeddings=rel_vocab.max_n_rels, embedding_dim=input_size, dropout=0.0, add_position_embedding=False, padding_idx=rel_vocab.PAD) self.dt_tree = DTTreeGRU(2 * input_size, hidden_size) self.td_tree = TDTreeGRU(2 * input_size, hidden_size) self.transform = nn.Linear(in_features=2 * hidden_size, out_features=hidden_size, bias=True) self.gru = RNN(type="gru", batch_first=True, input_size=hidden_size, hidden_size=hidden_size, bidirectional=True)
def load_extention_vocab(self): vocabs = self.config.extention_vocabs_path if vocabs is None: raise ValueError( 'Extention vocabs is None, can not load with none type') self.src_fusion_list = self.config.src_fusion_list self.tgt_fusion_list = self.config.tgt_fusion_list self.extention_embeddings_size = self.config.extention_embeddings_size self.extention_vocabs = {} self.extention_embeddings = {} for name, path in vocabs.items(): vocab = Vocabulary(self.config.src_vocab_type, path) self.extention_vocabs[name] = vocab embedding = Embeddings( num_embeddings=vocab.max_n_words - 4, embedding_dim=self.extention_embeddings_size[name], dropout=self.config.dropout_emb, add_position_embedding=self.config.add_position_emb) if name in self.config.extention_embeddings_path: state_dict = torch.load( self.config.extention_embeddings_path[name]) # if name not in state_dict: # print("Warning: {0} has no content saved!".format(name)) # else: print("Loading {0}".format(name)) embedding.embeddings.load_state_dict(state_dict) else: print("Warning: {0} has no content saved!".format(name)) if name in self.src_fusion_list: print('begin load {}'.format(name)) self.transfer_embeddings(vocab, self.src_vocab, embedding) self.src_extention_size += self.extention_embeddings_size[name] elif name in self.tgt_fusion_list: print('begin load {}'.format(name)) self.transfer_embeddings(vocab, self.tgt_vocab, embedding) if name in self.tgt_fusion_list: self.tgt_extention_size += self.extention_embeddings_size[name] self.extention_embeddings[name] = embedding.cuda(self.device) # set embedding fuction for encoder and decoder self.model.set_embeddings_fuc(self.fusion_src_embeddings, self.fusion_tgt_embeddings, self.src_extention_size, self.tgt_extention_size)
def __init__(self, n_words, input_size, hidden_size, bridge_type="mlp", dropout_rate=0.0): super(Decoder, self).__init__() self.bridge_type = bridge_type self.hidden_size = hidden_size self.context_size = hidden_size * 2 self.embedding = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.cgru_cell = CGRUCell(input_size=input_size, hidden_size=hidden_size) self.linear_input = nn.Linear(in_features=input_size, out_features=input_size) self.linear_hidden = nn.Linear(in_features=hidden_size, out_features=input_size) self.linear_ctx = nn.Linear(in_features=hidden_size * 2, out_features=input_size) self.dropout = nn.Dropout(dropout_rate) self._reset_parameters() self._build_bridge()
def __init__(self, n_tgt_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Decoder, self).__init__() self.n_head = n_head self.num_layers = n_layers self.d_model = d_model self.embeddings = Embeddings(n_tgt_vocab, d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ DecoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.out_layer_norm = nn.LayerNorm(d_model)
def __init__(self, parser_config, n_words, input_size, hidden_size ): super(Encoder, self).__init__() # Use PAD self.embedding = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.transformer_emb = nn.Linear(parser_config.word_dims, input_size, bias=False) parser_dim = 2 * parser_config.lstm_hiddens transformer_lstm = [] for layer in range(parser_config.lstm_layers): transformer_lstm.append(nn.Linear(parser_dim, input_size, bias=False)) self.transformer_lstm = nn.ModuleList(transformer_lstm) parser_mlp_dim = parser_config.mlp_arc_size + parser_config.mlp_rel_size self.transformer_dep = nn.Linear(parser_mlp_dim, input_size, bias=False) self.transformer_head = nn.Linear(parser_mlp_dim, input_size, bias=False) self.parser_lstm_layers = parser_config.lstm_layers self.synscale = ScalarMix(mixture_size=3+parser_config.lstm_layers) self.gru = RNN(type="gru", batch_first=True, input_size=2*input_size, hidden_size=hidden_size, bidirectional=True)
def __init__(self, src_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super().__init__() self.src_vocab = src_vocab self.d_word_vec = d_word_vec self.d_model = d_model self.num_layers = n_layers self.embeddings = Embeddings(num_embeddings=src_vocab.max_n_words, embedding_dim=d_word_vec, dropout=dropout, padding_idx=src_vocab.PAD, add_position_embedding=True) self.block_stack = nn.ModuleList([ EncoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model) self.extension_fuc = None
def __init__(self, tgt_vocab, ext_tgt_emb, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super(Decoder, self).__init__() self.extword_embed = nn.Embedding(tgt_vocab.extvocab_size, d_word_vec, padding_idx=0) self.extword_embed.weight.data.copy_(torch.from_numpy(ext_tgt_emb)) self.extword_embed.weight.requires_grad = False self.n_head = n_head self.num_layers = n_layers self.d_model = d_model self.embeddings = Embeddings(tgt_vocab.vocab_size, d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ DecoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.out_layer_norm = LayerNorm(d_model)
def __init__(self, src_vocab, rel_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super().__init__() self.src_vocab = src_vocab self.d_word_vec = d_word_vec self.d_model = d_model self.num_layers = n_layers self.embeddings = Embeddings(num_embeddings=src_vocab.max_n_words, embedding_dim=d_word_vec, dropout=dropout, padding_idx=src_vocab.PAD, add_position_embedding=True) self.rel_vocab = rel_vocab self.rel_embedding = Embeddings(num_embeddings=rel_vocab.max_n_rels, embedding_dim=d_word_vec, dropout=0.0, add_position_embedding=False, padding_idx=rel_vocab.PAD) self.dt_tree = DTTreeGRU(2 * d_word_vec, d_model) self.td_tree = TDTreeGRU(2 * d_word_vec, d_model) self.transform = nn.Linear(in_features=2 * d_model, out_features=d_model, bias=True) self.block_stack = nn.ModuleList([ EncoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model) self.extension_fuc = None
def __init__(self, parser_config, n_src_vocab, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super().__init__() self.num_layers = n_layers self.embeddings = Embeddings(num_embeddings=n_src_vocab, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=True) self.transformer_emb = nn.Linear(parser_config.word_dims, d_word_vec, bias=False) parser_dim = 2 * parser_config.lstm_hiddens transformer_lstm = [] for layer in range(parser_config.lstm_layers): transformer_lstm.append( nn.Linear(parser_dim, d_word_vec, bias=False)) self.transformer_lstm = nn.ModuleList(transformer_lstm) parser_mlp_dim = parser_config.mlp_arc_size + parser_config.mlp_rel_size self.transformer_dep = nn.Linear(parser_mlp_dim, d_word_vec, bias=False) self.transformer_head = nn.Linear(parser_mlp_dim, d_word_vec, bias=False) self.parser_lstm_layers = parser_config.lstm_layers self.synscale = ScalarMix(mixture_size=3 + parser_config.lstm_layers) self.transformer = nn.Linear(d_word_vec + d_word_vec, d_model, bias=False) self.block_stack = nn.ModuleList([ EncoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model)
def __init__(self, n_words, input_size, hidden_size): super(Encoder, self).__init__() # Use PAD self.embedding = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.gru = RNN(type="gru", batch_first=True, input_size=input_size, hidden_size=hidden_size, bidirectional=True)
def __init__(self, src_vocab, input_size, hidden_size): super(Encoder, self).__init__() self.src_vocab = src_vocab # Use PAD self.embeddings = Embeddings(num_embeddings=src_vocab.max_n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.gru = RNN(type="gru", batch_first=True, input_size=input_size, hidden_size=hidden_size, bidirectional=True) self.extention_fuc = None
def __init__(self, src_vocab, ext_src_emb, syn_hidden, n_layers=6, n_head=8, d_word_vec=512, d_model=512, d_inner_hid=1024, dropout=0.1): super().__init__() self.extword_embed = nn.Embedding(src_vocab.extvocab_size, d_word_vec, padding_idx=0) self.extword_embed.weight.data.copy_(torch.from_numpy(ext_src_emb)) self.extword_embed.weight.requires_grad = False self.syn_linear = nn.Linear(in_features=syn_hidden, out_features=d_word_vec, bias=True) #self.emb_linear = nn.Linear(100, d_word_vec, True) #nn.init.xavier_uniform_(self.emb_linear.weight) self.num_layers = n_layers self.embeddings = Embeddings(num_embeddings=src_vocab.vocab_size, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=True) self.block_stack = nn.ModuleList([ EncoderBlock(d_model=d_model, d_inner_hid=d_inner_hid, n_head=n_head, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = LayerNorm(d_model)