def build_embeddings(opt, word_dict, for_encoder='src'):
    """
    Build an Embeddings instance.
    Args:
        opt: the option in current environment.
        word_dict(Vocab): words dictionary.
        feature_dicts([Vocab], optional): a list of feature dictionary.
        for_encoder(bool): build Embeddings for encoder or decoder?
    """
    if for_encoder == 'src':
        embedding_dim = opt.src_word_vec_size  # 512
    elif for_encoder == 'tgt':
        embedding_dim = opt.tgt_word_vec_size
    elif for_encoder == 'structure':
        embedding_dim = 64

    word_padding_idx = word_dict.stoi[Constants.PAD_WORD]
    num_word_embeddings = len(word_dict)

    if for_encoder == 'src' or for_encoder == 'tgt':

        return Embeddings(word_vec_size=embedding_dim,
                          position_encoding=opt.position_encoding,
                          dropout=opt.dropout,
                          word_padding_idx=word_padding_idx,
                          word_vocab_size=num_word_embeddings,
                          sparse=opt.optim == "sparseadam")
    elif for_encoder == 'structure':
        return Embeddings(word_vec_size=embedding_dim,
                          position_encoding=False,
                          dropout=opt.dropout,
                          word_padding_idx=word_padding_idx,
                          word_vocab_size=num_word_embeddings,
                          sparse=opt.optim == "sparseadam")
示例#2
0
def build_embeddings(opt, word_dict, for_encoder=True):
  """
  Build an Embeddings instance.
  Args:
      opt: the option in current environment.
      word_dict(Vocab): words dictionary.
      feature_dicts([Vocab], optional): a list of feature dictionary.
      for_encoder(bool): build Embeddings for encoder or decoder?
  """
  if for_encoder:
    # word_embedding大小默认是512
    embedding_dim = opt.src_word_vec_size
  else:
    embedding_dim = opt.tgt_word_vec_size

  # 获取填充数值 1
  word_padding_idx = word_dict.stoi[Constants.PAD_WORD]
  # 获取vocab大小
  num_word_embeddings = len(word_dict)

  return Embeddings(word_vec_size=embedding_dim,
                    dropout=opt.dropout,
                    word_padding_idx=word_padding_idx,
                    word_vocab_size=num_word_embeddings,
                    sparse=opt.optim == "sparseadam")