def build_embeddings(opt, word_dict, for_encoder='src'): """ Build an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): build Embeddings for encoder or decoder? """ if for_encoder == 'src': embedding_dim = opt.src_word_vec_size # 512 elif for_encoder == 'tgt': embedding_dim = opt.tgt_word_vec_size elif for_encoder == 'structure': embedding_dim = 64 word_padding_idx = word_dict.stoi[Constants.PAD_WORD] num_word_embeddings = len(word_dict) if for_encoder == 'src' or for_encoder == 'tgt': return Embeddings(word_vec_size=embedding_dim, position_encoding=opt.position_encoding, dropout=opt.dropout, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings, sparse=opt.optim == "sparseadam") elif for_encoder == 'structure': return Embeddings(word_vec_size=embedding_dim, position_encoding=False, dropout=opt.dropout, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings, sparse=opt.optim == "sparseadam")
def build_embeddings(opt, word_dict, for_encoder=True): """ Build an Embeddings instance. Args: opt: the option in current environment. word_dict(Vocab): words dictionary. feature_dicts([Vocab], optional): a list of feature dictionary. for_encoder(bool): build Embeddings for encoder or decoder? """ if for_encoder: # word_embedding大小默认是512 embedding_dim = opt.src_word_vec_size else: embedding_dim = opt.tgt_word_vec_size # 获取填充数值 1 word_padding_idx = word_dict.stoi[Constants.PAD_WORD] # 获取vocab大小 num_word_embeddings = len(word_dict) return Embeddings(word_vec_size=embedding_dim, dropout=opt.dropout, word_padding_idx=word_padding_idx, word_vocab_size=num_word_embeddings, sparse=opt.optim == "sparseadam")