def __init__(self, vocab_size, embedding_size, init_scale=0.1): #vocab_size定义了这个skipgram这个模型的词表大小 #embedding_size定义了词向量的维度是多少 #init_scale定义了词向量初始化的范围,一般来说,比较小的初始化范围有助于模型训练 super(SkipGram, self).__init__() self.vocab_size = vocab_size self.embedding_size = embedding_size #使用paddle.fluid.dygraph提供的Embedding函数,构造一个词向量参数 #这个参数的大小为:[self.vocab_size, self.embedding_size] #数据类型为:float32 #这个参数的名称为:embedding_para #这个参数的初始化方式为在[-init_scale, init_scale]区间进行均匀采样 self.embedding = Embedding( size=[self.vocab_size, self.embedding_size], dtype='float32', param_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / embedding_size, high=0.5 / embedding_size))) #使用paddle.fluid.dygraph提供的Embedding函数,构造另外一个词向量参数 #这个参数的大小为:[self.vocab_size, self.embedding_size] #数据类型为:float32 #这个参数的名称为:embedding_para_out #这个参数的初始化方式为在[-init_scale, init_scale]区间进行均匀采样 #跟上面不同的是,这个参数的名称跟上面不同,因此, #embedding_para_out和embedding_para虽然有相同的shape,但是权重不共享 self.embedding_out = Embedding( size=[self.vocab_size, self.embedding_size], dtype='float32', param_attr=fluid.ParamAttr( name='embedding_out_para', initializer=fluid.initializer.UniformInitializer( low=-0.5 / embedding_size, high=0.5 / embedding_size)))
def _init_embedding(self): # the embedding of all words '''self.word_emb = Embedding(size=(self.vocab_size, self.embedding_size), dtype='float32', param_attr=fluid.ParamAttr(name='word_emb_para', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale/self.embedding_size, high=self.init_scale/self.embedding_size)))''' # the embedding of all subwords # padding embedding, this embedding is used to match the embedding of padding label, without updating gradient self.subword_emb = Embedding( size=(self.subvocab_size + 1, self.embedding_size), dtype='float32', padding_idx=self.subvocab_size, param_attr=fluid.ParamAttr( name='subword_emb_para', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale / self.embedding_size, high=self.init_scale / self.embedding_size), trainable=True)) # the embedding of all the words, different from word_emb, this is a hidden parameter that will not be the final # embedding self.word_emb_hidden = Embedding( size=(self.vocab_size, self.embedding_size), dtype='float32', param_attr=fluid.ParamAttr( name='hidden_emb_para', initializer=fluid.initializer.UniformInitializer( low=-self.init_scale / self.embedding_size, high=self.init_scale / self.embedding_size), trainable=True))
def __init__(self, src_vocab_size, src_emb_dim, src_max_len, dropout_rate, is_sparse=False, word_emb_param_name=None, pos_enc_param_name=None): super(PrepareEncoderDecoderLayer, self).__init__() self._src_max_len = src_max_len self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate self._input_emb = Embedding(size=[src_vocab_size, src_emb_dim], is_sparse=is_sparse, padding_idx=0, param_attr=fluid.ParamAttr( name=word_emb_param_name, initializer=fluid.initializer.Normal( 0., src_emb_dim**-0.5))) if pos_enc_param_name is pos_enc_param_names[0]: pos_inp = pos_inp1 else: pos_inp = pos_inp2 self._pos_emb = Embedding( size=[self._src_max_len, src_emb_dim], is_sparse=is_sparse, param_attr=fluid.ParamAttr( name=pos_enc_param_name, initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False))
def __init__(self, name_scope, src_vocab_size, src_emb_dim, src_max_len, dropout_rate, word_emb_param_name=None, pos_enc_param_name=None): super(PrepareEncoderDecoderLayer, self).__init__(name_scope) self._src_max_len = src_max_len self._src_emb_dim = src_emb_dim self._src_vocab_size = src_vocab_size self._dropout_rate = dropout_rate self._input_emb = Embedding( name_scope=self.full_name(), size=[src_vocab_size, src_emb_dim], padding_idx=0, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5))) pos_inp = self.position_encoding_init(self._src_max_len + 1, self._src_emb_dim) self._pos_emb = Embedding( name_scope=self.full_name(), size=[self._src_max_len, src_emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), trainable=False))
def __init__(self, use_poster, use_mov_title, use_mov_cat, use_age_job): super(MovModel, self).__init__() # 将传入的name信息和bool型参数添加到模型类中 self.use_mov_poster = use_poster self.use_mov_title = use_mov_title self.use_usr_age_job = use_age_job self.use_mov_cat = use_mov_cat # 获取数据集的信息,并构建训练和验证集的数据迭代器 Dataset = MovieLen(self.use_mov_poster) self.Dataset = Dataset self.trainset = self.Dataset.train_dataset self.valset = self.Dataset.valid_dataset self.train_loader = self.Dataset.load_data(dataset=self.trainset, mode='train') self.valid_loader = self.Dataset.load_data(dataset=self.valset, mode='valid') """ define network layer for embedding usr info """ # 对电影ID信息做映射,并紧接着一个Linear层 MOV_DICT_SIZE = Dataset.max_mov_id + 1 self.mov_emb = Embedding([MOV_DICT_SIZE, 32]) self.mov_fc = Linear(32, 32) # 对电影类别做映射 CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1 self.mov_cat_emb = Embedding([CATEGORY_DICT_SIZE, 32], is_sparse=False) self.mov_cat_fc = Linear(32, 32) # 对电影名称做映射 MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1 self.mov_title_emb = Embedding([MOV_TITLE_DICT_SIZE, 32], is_sparse=False) self.mov_title_conv = Conv2D(1, 1, filter_size=(3, 1), stride=(2, 1), padding=0, act='relu') self.mov_title_conv2 = Conv2D(1, 1, filter_size=(3, 1), stride=1, padding=0, act='relu') # 新建一个Linear层,用于整合电影特征 self.mov_concat_embed = Linear(96, 200, act='tanh')
def __init__(self, vocab_size, emb_dim, bos_idx=0): super(Embedder, self).__init__() self.word_embedder = Embedding( size=[vocab_size, emb_dim], padding_idx=bos_idx, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal(0., emb_dim**-0.5)))
def __init__(self, trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, share_input_output_embed, word_embedder): super(WrapDecoder, self).__init__() self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder self.pos_encoder = Embedding( size=[max_length, self.emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim)), trainable=False)) self.decoder = Decoder(n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd) if share_input_output_embed: self.linear = lambda x: layers.matmul(x=x, y=self.word_embedder. word_embedder.weight, transpose_y=True) else: self.linear = Linear( input_dim=d_model, output_dim=trg_vocab_size, bias_attr=False)
def ops(self): """ operation """ emb = Embedding(size=[self.dict_size, self.emb_dim], is_sparse=True, padding_idx=self.padding_idx, param_attr=attr.ParamAttr( name=self.name, initializer=fluid.initializer.Xavier())) return emb
def embedding_layer(self, input, zero_pad=True, scale=True): """ embedding layer """ emb = Embedding( size=[self.vocab_size, self.emb_size], padding_idx=(0 if zero_pad else None), param_attr=fluid.ParamAttr( name="word_embedding", initializer=fluid.initializer.Xavier())) emb = emb(input) if scale: emb = emb * (self.emb_size**0.5) return emb
def __init__(self, name_scope, hidden_dim, num_token_embeddings, num_pos_embeddings, num_type_embeddings, num_turn_embeddings, padding_idx=None, dropout=0.1, pos_trainable=False): super().__init__(name_scope) self.token_embedding = Embedding(name_scope=self.full_name(), size=[num_token_embeddings, hidden_dim]) self.pos_embedding = Embedding(name_scope=self.full_name(), size=[num_pos_embeddings, hidden_dim], param_attr=fluid.ParamAttr(trainable=pos_trainable)) self.type_embedding = Embedding(name_scope=self.full_name(), size=[num_type_embeddings, hidden_dim]) self.turn_embedding = Embedding(name_scope=self.full_name(), size=[num_turn_embeddings, hidden_dim]) self.dropout = dropout return
def ops(self): """ operation """ # TODO(huihuangzheng): The original code set the is_sparse=True, but it # causes crush in dy2stat. Set it to True after fixing it. emb = Embedding(size=[self.dict_size, self.emb_dim], is_sparse=True, padding_idx=self.padding_idx, param_attr=attr.ParamAttr( name=self.name, initializer=fluid.initializer.Xavier())) return emb
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Encoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.stack_lstm = RNN(EncoderCell(num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False)
def __init__(self, src_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, word_embedder): super(WrapEncoder, self).__init__() self.emb_dropout = prepostprocess_dropout self.emb_dim = d_model self.word_embedder = word_embedder self.pos_encoder = Embedding( size=[max_length, self.emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer( position_encoding_init(max_length, self.emb_dim)), trainable=False)) self.encoder = Encoder(n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd)
def model_init(self, vocab_size, embed_dim, hidden_size, bos_id=0, eos_id=1, beam_size=4, max_step_num=20): embedder = Embedding(size=[vocab_size, embed_dim]) output_layer = Linear(hidden_size, vocab_size) cell = BasicLSTMCell(embed_dim, hidden_size) decoder = BeamSearchDecoder(cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=embedder, output_fn=output_layer) self.beam_search_decoder = DynamicDecode(decoder, max_step_num=max_step_num, is_test=True)
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Decoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.lstm_attention = RNN(DecoderCell( num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False) self.output_layer = Linear( hidden_size, vocab_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=False)
def embeder_init(self, size): Layer.__init__(self) self.embedder = Embedding(size)
def __init__(self, config, return_pooled_out=True, use_fp16=False): super(BertEncoder, self).__init__() self.config = config self._emb_size = config['hidden_size'] self._n_layer = config['num_hidden_layers'] self._n_head = config['num_attention_heads'] self._voc_size = config['vocab_size'] self._max_position_seq_len = config['max_position_embeddings'] self._sent_types = config['type_vocab_size'] self._hidden_act = config['hidden_act'] self._prepostprocess_dropout = config['hidden_dropout_prob'] self._attention_dropout = config['attention_probs_dropout_prob'] self.return_pooled_out = return_pooled_out self._word_emb_name = "word_embedding" self._pos_emb_name = "pos_embedding" self._sent_emb_name = "sent_embedding" self._dtype = "float16" if use_fp16 else "float32" self._param_initializer = fluid.initializer.TruncatedNormal( scale=config['initializer_range']) self._src_emb = Embedding( size=[self._voc_size, self._emb_size], param_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer), dtype=self._dtype) self._pos_emb = Embedding( size=[self._max_position_seq_len, self._emb_size], param_attr=fluid.ParamAttr( name=self._pos_emb_name, initializer=self._param_initializer), dtype=self._dtype) self._sent_emb = Embedding( size=[self._sent_types, self._emb_size], param_attr=fluid.ParamAttr( name=self._sent_emb_name, initializer=self._param_initializer), dtype=self._dtype) self.pooled_fc = Linear( input_dim=self._emb_size, output_dim=self._emb_size, param_attr=fluid.ParamAttr( name="pooled_fc.w_0", initializer=self._param_initializer), bias_attr="pooled_fc.b_0", act="tanh") self.pre_process_layer = PrePostProcessLayer( "nd", self._emb_size, self._prepostprocess_dropout, None) self._encoder = TransformerEncoder( n_layer=self._n_layer, n_head=self._n_head, d_key=self._emb_size // self._n_head, d_value=self._emb_size // self._n_head, d_model=self._emb_size, d_inner_hid=self._emb_size * 4, prepostprocess_dropout=self._prepostprocess_dropout, attention_dropout=self._attention_dropout, relu_dropout=0, preprocess_cmd="", postprocess_cmd="dan", ffn_fc1_act=self._hidden_act)
def __init__(self, args, length=None): super(LexNet, self).__init__() """ define the lexical analysis network structure word: stores the input of the model for_infer: a boolean value, indicating if the model to be created is for training or predicting. return: for infer: return the prediction otherwise: return the prediction """ self.word_emb_dim = args.word_emb_dim self.vocab_size = args.vocab_size self.num_labels = args.num_labels self.grnn_hidden_dim = args.grnn_hidden_dim self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir( args) else 1.0 self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir( args) else 1.0 self.bigru_num = args.bigru_num self.init_bound = 0.1 self.word_embedding = Embedding( size=[self.vocab_size, self.word_emb_dim], dtype='float32', param_attr=fluid.ParamAttr( learning_rate=self.emb_lr, name="word_emb", initializer=fluid.initializer.Uniform( low=-self.init_bound, high=self.init_bound))) h_0 = np.zeros((args.batch_size, self.grnn_hidden_dim), dtype="float32") h_0 = to_variable(h_0) self.bigru_units = [] for i in range(self.bigru_num): if i == 0: self.bigru_units.append( self.add_sublayer( "bigru_units%d" % i, BiGRU( self.grnn_hidden_dim, self.grnn_hidden_dim, self.init_bound, h_0=h_0))) else: self.bigru_units.append( self.add_sublayer( "bigru_units%d" % i, BiGRU( self.grnn_hidden_dim * 2, self.grnn_hidden_dim, self.init_bound, h_0=h_0))) self.fc = Linear( input_dim=self.grnn_hidden_dim * 2, output_dim=self.num_labels, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( low=-self.init_bound, high=self.init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) self.linear_chain_crf = LinearChainCRF( param_attr=fluid.ParamAttr( name='linear_chain_crfw', learning_rate=self.crf_lr), size=self.num_labels) self.crf_decoding = CRFDecoding( param_attr=fluid.ParamAttr( name='crfw', learning_rate=self.crf_lr), size=self.num_labels) # share weight self.crf_decoding.weight = self.linear_chain_crf.weight
def __init__(self, emb_size=128, hidden_size=768, n_layer=12, voc_size=30522, max_position_seq_len=512, sent_types=2, return_pooled_out=True, initializer_range=1.0, conv_type="conv_bn", search_layer=False, use_fp16=False, use_fixed_gumbel=False, gumbel_alphas=None): super(BertModelLayer, self).__init__() self._emb_size = emb_size self._hidden_size = hidden_size self._n_layer = n_layer self._voc_size = voc_size self._max_position_seq_len = max_position_seq_len self._sent_types = sent_types self.return_pooled_out = return_pooled_out self.use_fixed_gumbel = use_fixed_gumbel self._word_emb_name = "s_word_embedding" self._pos_emb_name = "s_pos_embedding" self._sent_emb_name = "s_sent_embedding" self._dtype = "float16" if use_fp16 else "float32" self._conv_type = conv_type self._search_layer = search_layer self._param_initializer = fluid.initializer.TruncatedNormal( scale=initializer_range) self._src_emb = Embedding(size=[self._voc_size, self._emb_size], param_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer), dtype=self._dtype) self._pos_emb = Embedding( size=[self._max_position_seq_len, self._emb_size], param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer), dtype=self._dtype) self._sent_emb = Embedding(size=[self._sent_types, self._emb_size], param_attr=fluid.ParamAttr( name=self._sent_emb_name, initializer=self._param_initializer), dtype=self._dtype) self._emb_fac = Linear( input_dim=self._emb_size, output_dim=self._hidden_size, param_attr=fluid.ParamAttr(name="s_emb_factorization")) self._encoder = EncoderLayer(n_layer=self._n_layer, hidden_size=self._hidden_size, search_layer=self._search_layer, use_fixed_gumbel=self.use_fixed_gumbel, gumbel_alphas=gumbel_alphas)
def run_main(self, np_arr, place): with guard(place): embedding = Embedding(size=[10, 10]) var = to_variable(np_arr) self.assertTrue(np.array_equal(np_arr, var.numpy()))
def __init__(self, name_scope, use_poster, use_mov_title, use_mov_cat, use_age_job): super(Model, self).__init__(name_scope) name = self.full_name() # 将传入的name信息和bool型参数添加到模型类中 self.use_mov_poster = use_poster self.use_mov_title = use_mov_title self.use_usr_age_job = use_age_job self.use_mov_cat = use_mov_cat # 获取数据集的信息,并构建训练和验证集的数据迭代器 Dataset = MovieLen(self.use_mov_poster) self.Dataset = Dataset self.trainset = self.Dataset.train_dataset self.valset = self.Dataset.valid_dataset self.train_loader = self.Dataset.load_data(dataset=self.trainset, mode='train') self.valid_loader = self.Dataset.load_data(dataset=self.valset, mode='valid') """ define network layer for embedding usr info """ USR_ID_NUM = Dataset.max_usr_id + 1 # 对用户ID做映射,并紧接着一个FC层 self.usr_emb = Embedding([USR_ID_NUM, 32], is_sparse=False) self.usr_fc = Linear(32, 32) # 对用户性别信息做映射,并紧接着一个FC层 USR_GENDER_DICT_SIZE = 2 self.usr_gender_emb = Embedding([USR_GENDER_DICT_SIZE, 16]) self.usr_gender_fc = Linear(16, 16) # 对用户年龄信息做映射,并紧接着一个FC层 USR_AGE_DICT_SIZE = Dataset.max_usr_age + 1 self.usr_age_emb = Embedding([USR_AGE_DICT_SIZE, 16]) self.usr_age_fc = Linear(16, 16) # 对用户职业信息做映射,并紧接着一个FC层 USR_JOB_DICT_SIZE = Dataset.max_usr_job + 1 self.usr_job_emb = Embedding([USR_JOB_DICT_SIZE, 16]) self.usr_job_fc = Linear(16, 16) # 新建一个FC层,用于整合用户数据信息 self.usr_combined = Linear(80, 200, act='tanh') """ define network layer for embedding usr info """ # 对电影ID信息做映射,并紧接着一个FC层 MOV_DICT_SIZE = Dataset.max_mov_id + 1 self.mov_emb = Embedding([MOV_DICT_SIZE, 32]) self.mov_fc = Linear(32, 32) # 对电影类别做映射 CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1 self.mov_cat_emb = Embedding([CATEGORY_DICT_SIZE, 32], is_sparse=False) self.mov_cat_fc = Linear(32, 32) # 对电影名称做映射 MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1 self.mov_title_emb = Embedding([MOV_TITLE_DICT_SIZE, 32], is_sparse=False) self.mov_title_conv = Conv2D(1, 1, filter_size=(3, 1), stride=(2, 1), padding=0, act='relu') self.mov_title_conv2 = Conv2D(1, 1, filter_size=(3, 1), stride=1, padding=0, act='relu') # 新建一个FC层,用于整合电影特征 self.mov_concat_embed = Linear(96, 200, act='tanh')