def __init__(self, name_scope, d_key, d_value, d_model, n_head=1, dropout_rate=0., cache=None, gather_idx=None, static_kv=False): super(MultiHeadAttentionLayer, self).__init__(name_scope) self._n_head = n_head self._d_key = d_key self._d_value = d_value self._d_model = d_model self._dropout_rate = dropout_rate self._q_fc = FC(name_scope=self.full_name(), size=d_key * n_head, bias_attr=False, num_flatten_dims=2) self._k_fc = FC(name_scope=self.full_name(), size=d_key * n_head, bias_attr=False, num_flatten_dims=2) self._v_fc = FC(name_scope=self.full_name(), size=d_value * n_head, bias_attr=False, num_flatten_dims=2) self._proj_fc = FC(name_scope=self.full_name(), size=self._d_model, bias_attr=False, num_flatten_dims=2)
def __init__(self, name_scope, d_inner_hid, d_hid, dropout_rate): super(PositionwiseFeedForwardLayer, self).__init__(name_scope) self._i2h = FC(name_scope=self.full_name(), size=d_inner_hid, num_flatten_dims=2, act="relu") self._h2o = FC(name_scope=self.full_name(), size=d_hid, num_flatten_dims=2) self._dropout_rate = dropout_rate
def __init__(self, name_scope, hidden_dim, inner_dim, dropout): super().__init__(name_scope) self.hidden_dim = hidden_dim self.inner_dim = inner_dim self.linear_hidden = FC(name_scope=self.full_name(), size=inner_dim, num_flatten_dims=2, act="gelu") self.linear_out = FC(name_scope=self.full_name(), size=hidden_dim, num_flatten_dims=2) self.dropout = dropout return
def __init__(self, name_scope, hidden_dim, num_heads, dropout): assert hidden_dim % num_heads == 0 super().__init__(name_scope) self.hidden_dim = hidden_dim self.num_heads = num_heads self.head_dim = hidden_dim // num_heads self.scale = self.head_dim**-0.5 self.linear_qkv = FC(name_scope=self.full_name(), size=hidden_dim * 3, num_flatten_dims=2) self.linear_out = FC(name_scope=self.full_name(), size=hidden_dim, num_flatten_dims=2) self.dropout = dropout return
def __init__(self, name_scope, trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, caches=None, gather_idx=None): """ The wrapper assembles together all needed layers for the encoder. """ super(WrapDecoderLayer, self).__init__(name_scope) self._prepare_decoder_layer = PrepareEncoderDecoderLayer( self.full_name(), trg_vocab_size, d_model, max_length, prepostprocess_dropout, word_emb_param_name=word_emb_param_names[1], pos_enc_param_name=pos_enc_param_names[1]) self._decoder_layer = DecoderLayer( self.full_name(), n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, caches=caches, gather_idx=gather_idx) self._weight_sharing = weight_sharing if not weight_sharing: self._fc = FC(self.full_name(), size=trg_vocab_size, bias_attr=False)
def __init__(self, name_scope, hparams, generator, dtype="float32"): super().__init__(name_scope, hparams) self.generator = generator self.num_token_embeddings = hparams.num_token_embeddings self.num_pos_embeddings = hparams.num_pos_embeddings self.num_type_embeddings = hparams.num_type_embeddings self.num_turn_embeddings = hparams.num_turn_embeddings self.num_latent = hparams.num_latent self.tau = hparams.tau self.with_bow = hparams.with_bow self.hidden_dim = hparams.hidden_dim self.num_heads = hparams.num_heads self.num_layers = hparams.num_layers self.padding_idx = hparams.padding_idx self.dropout = hparams.dropout self.embed_dropout = hparams.embed_dropout self.attn_dropout = hparams.attn_dropout self.ff_dropout = hparams.ff_dropout self.use_discriminator = hparams.use_discriminator self.weight_sharing = hparams.weight_sharing self.pos_trainable = hparams.pos_trainable self.two_layer_predictor = hparams.two_layer_predictor self.bidirectional_context = hparams.bidirectional_context self.label_smooth = hparams.label_smooth self.initializer_range = hparams.initializer_range self.embedder = Embedder(self.full_name(), self.hidden_dim, self.num_token_embeddings, self.num_pos_embeddings, self.num_type_embeddings, self.num_turn_embeddings, padding_idx=self.padding_idx, dropout=self.embed_dropout, pos_trainable=self.pos_trainable) self.embed_layer_norm = LayerNorm(self.full_name(), begin_norm_axis=2, epsilon=1e-12, param_attr=fluid.ParamAttr( regularizer=fluid.regularizer.L2Decay(0.0)), bias_attr=fluid.ParamAttr( regularizer=fluid.regularizer.L2Decay(0.0))) self.layers = [] for i in range(hparams.num_layers): layer = TransformerBlock(self.full_name(), self.hidden_dim, self.num_heads, self.dropout, self.attn_dropout, self.ff_dropout) self.layers.append(layer) self.add_sublayer(f"layer_{i}", layer) if self.num_latent > 0: self.post_network = FC(name_scope=self.full_name() + ".post_network", size=self.num_latent, bias_attr=False) if self.use_discriminator: self.dis_ratio = hparams.dis_ratio self.discriminator = FC(name_scope=self.full_name() + ".discriminator", size=1, act="sigmoid") if self.two_layer_predictor: self.pre_predictor = FC(name_scope=self.full_name() + ".pre_predictor", size=self.hidden_dim, num_flatten_dims=2, act="gelu") if self.num_latent > 0 and self.with_bow: self.pre_bow_predictor = FC(name_scope=self.full_name() + ".pre_bow_predictor", size=self.hidden_dim, act="gelu") if not self.weight_sharing: self.predictor = FC(name_scope=self.full_name() + ".predictor", size=self.num_token_embeddings, num_flatten_dims=2, bias_attr=False) if self.num_latent > 0 and self.with_bow: self.bow_predictor = FC(name_scope=self.full_name() + ".bow_predictor", size=self.num_token_embeddings, bias_attr=False) self.max_grad_norm = hparams.max_grad_norm if self.max_grad_norm is not None: self.grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(hparams.max_grad_norm) else: self.grad_clip = None self.weight_decay = hparams.weight_decay self.optimizer = fluid.optimizer.AdamOptimizer( learning_rate=hparams.lr, regularization=fluid.regularizer.L2Decay(self.weight_decay)) self._dtype = dtype # DataDistributed self.before_backward_fn = None self.after_backward_fn = None return
def __init__(self, name_scope): super(Regressor, self).__init__(name_scope) name_scope = self.full_name() # 定义一层全连接层,输出维度是1,激活函数为None,即不使用激活函数 self.fc = FC(name_scope, size=1, act=None)
def __init__(self, name_scope, use_poster, use_mov_title, use_mov_cat, use_age_job): super(Model, self).__init__(name_scope) name = self.full_name() # 将传入的name信息和bool型参数添加到模型类中 self.use_mov_poster = use_poster self.use_mov_title = use_mov_title self.use_usr_age_job = use_age_job self.use_mov_cat = use_mov_cat # 获取数据集的信息,并构建训练和验证集的数据迭代器 Dataset = MovieLen(self.use_mov_poster) self.Dataset = Dataset self.trainset = self.Dataset.train_dataset self.valset = self.Dataset.valid_dataset self.train_loader = self.Dataset.load_data(dataset=self.trainset, mode='train') self.valid_loader = self.Dataset.load_data(dataset=self.valset, mode='valid') """ define network layer for embedding usr info """ USR_ID_NUM = Dataset.max_usr_id + 1 # 对用户ID做映射,并紧接着一个FC层 self.usr_emb = Embedding(name, [USR_ID_NUM, 32], is_sparse=False) self.usr_fc = FC(name, size=32) # 对用户性别信息做映射,并紧接着一个FC层 USR_GENDER_DICT_SIZE = 2 self.usr_gender_emb = Embedding(name, [USR_GENDER_DICT_SIZE, 16]) self.usr_gender_fc = FC(name, 16) # 对用户年龄信息做映射,并紧接着一个FC层 USR_AGE_DICT_SIZE = Dataset.max_usr_age + 1 self.usr_age_emb = Embedding(name, [USR_AGE_DICT_SIZE, 16]) self.usr_age_fc = FC(name, 16) # 对用户职业信息做映射,并紧接着一个FC层 USR_JOB_DICT_SIZE = Dataset.max_usr_job + 1 self.usr_job_emb = Embedding(name, [USR_JOB_DICT_SIZE, 16]) self.usr_job_fc = FC(name, 16) # 新建一个FC层,用于整合用户数据信息 self.usr_combined = FC(name, 200, act='tanh') """ define network layer for embedding usr info """ # 对电影ID信息做映射,并紧接着一个FC层 MOV_DICT_SIZE = Dataset.max_mov_id + 1 self.mov_emb = Embedding(name, [MOV_DICT_SIZE, 32]) self.mov_fc = FC(name, 32) # 对电影类别做映射 CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1 self.mov_cat_emb = Embedding(name, [CATEGORY_DICT_SIZE, 32], is_sparse=False) self.mov_cat_fc = FC(name, 32) # 对电影名称做映射 MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1 self.mov_title_emb = Embedding(name, [MOV_TITLE_DICT_SIZE, 32], is_sparse=False) self.mov_title_conv = Conv2D(name, 1, filter_size=(3, 1), stride=(2, 1), padding=0, act='relu') self.mov_title_conv2 = Conv2D(name, 1, filter_size=(3, 1), stride=1, padding=0, act='relu') # 新建一个FC层,用于整合电影特征 self.mov_concat_embed = FC(name, size=200, act='tanh')