Пример #1
0
 def __init__(self,
              name_scope,
              d_key,
              d_value,
              d_model,
              n_head=1,
              dropout_rate=0.,
              cache=None,
              gather_idx=None,
              static_kv=False):
     super(MultiHeadAttentionLayer, self).__init__(name_scope)
     self._n_head = n_head
     self._d_key = d_key
     self._d_value = d_value
     self._d_model = d_model
     self._dropout_rate = dropout_rate
     self._q_fc = FC(name_scope=self.full_name(),
                     size=d_key * n_head,
                     bias_attr=False,
                     num_flatten_dims=2)
     self._k_fc = FC(name_scope=self.full_name(),
                     size=d_key * n_head,
                     bias_attr=False,
                     num_flatten_dims=2)
     self._v_fc = FC(name_scope=self.full_name(),
                     size=d_value * n_head,
                     bias_attr=False,
                     num_flatten_dims=2)
     self._proj_fc = FC(name_scope=self.full_name(),
                        size=self._d_model,
                        bias_attr=False,
                        num_flatten_dims=2)
Пример #2
0
 def __init__(self, name_scope, d_inner_hid, d_hid, dropout_rate):
     super(PositionwiseFeedForwardLayer, self).__init__(name_scope)
     self._i2h = FC(name_scope=self.full_name(),
                    size=d_inner_hid,
                    num_flatten_dims=2,
                    act="relu")
     self._h2o = FC(name_scope=self.full_name(),
                    size=d_hid,
                    num_flatten_dims=2)
     self._dropout_rate = dropout_rate
Пример #3
0
    def __init__(self, name_scope, hidden_dim, inner_dim, dropout):
        super().__init__(name_scope)

        self.hidden_dim = hidden_dim
        self.inner_dim = inner_dim
        self.linear_hidden = FC(name_scope=self.full_name(),
                                size=inner_dim,
                                num_flatten_dims=2,
                                act="gelu")
        self.linear_out = FC(name_scope=self.full_name(),
                             size=hidden_dim,
                             num_flatten_dims=2)
        self.dropout = dropout
        return
Пример #4
0
    def __init__(self, name_scope, hidden_dim, num_heads, dropout):
        assert hidden_dim % num_heads == 0
        super().__init__(name_scope)

        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.head_dim = hidden_dim // num_heads
        self.scale = self.head_dim**-0.5
        self.linear_qkv = FC(name_scope=self.full_name(),
                             size=hidden_dim * 3,
                             num_flatten_dims=2)
        self.linear_out = FC(name_scope=self.full_name(),
                             size=hidden_dim,
                             num_flatten_dims=2)
        self.dropout = dropout
        return
Пример #5
0
    def __init__(self,
                 name_scope,
                 trg_vocab_size,
                 max_length,
                 n_layer,
                 n_head,
                 d_key,
                 d_value,
                 d_model,
                 d_inner_hid,
                 prepostprocess_dropout,
                 attention_dropout,
                 relu_dropout,
                 preprocess_cmd,
                 postprocess_cmd,
                 weight_sharing,
                 caches=None,
                 gather_idx=None):
        """
        The wrapper assembles together all needed layers for the encoder.
        """
        super(WrapDecoderLayer, self).__init__(name_scope)

        self._prepare_decoder_layer = PrepareEncoderDecoderLayer(
            self.full_name(),
            trg_vocab_size,
            d_model,
            max_length,
            prepostprocess_dropout,
            word_emb_param_name=word_emb_param_names[1],
            pos_enc_param_name=pos_enc_param_names[1])
        self._decoder_layer = DecoderLayer(
            self.full_name(),
            n_layer,
            n_head,
            d_key,
            d_value,
            d_model,
            d_inner_hid,
            prepostprocess_dropout,
            attention_dropout,
            relu_dropout,
            preprocess_cmd,
            postprocess_cmd,
            caches=caches,
            gather_idx=gather_idx)
        self._weight_sharing = weight_sharing
        if not weight_sharing:
            self._fc = FC(self.full_name(),
                          size=trg_vocab_size,
                          bias_attr=False)
    def __init__(self, name_scope, hparams, generator, dtype="float32"):
        super().__init__(name_scope, hparams)
        self.generator = generator
        self.num_token_embeddings = hparams.num_token_embeddings
        self.num_pos_embeddings = hparams.num_pos_embeddings
        self.num_type_embeddings = hparams.num_type_embeddings
        self.num_turn_embeddings = hparams.num_turn_embeddings
        self.num_latent = hparams.num_latent
        self.tau = hparams.tau
        self.with_bow = hparams.with_bow
        self.hidden_dim = hparams.hidden_dim
        self.num_heads = hparams.num_heads
        self.num_layers = hparams.num_layers
        self.padding_idx = hparams.padding_idx
        self.dropout = hparams.dropout
        self.embed_dropout = hparams.embed_dropout
        self.attn_dropout = hparams.attn_dropout
        self.ff_dropout = hparams.ff_dropout
        self.use_discriminator = hparams.use_discriminator
        self.weight_sharing = hparams.weight_sharing
        self.pos_trainable = hparams.pos_trainable
        self.two_layer_predictor = hparams.two_layer_predictor
        self.bidirectional_context = hparams.bidirectional_context
        self.label_smooth = hparams.label_smooth
        self.initializer_range = hparams.initializer_range

        self.embedder = Embedder(self.full_name(),
                                 self.hidden_dim,
                                 self.num_token_embeddings,
                                 self.num_pos_embeddings,
                                 self.num_type_embeddings,
                                 self.num_turn_embeddings,
                                 padding_idx=self.padding_idx,
                                 dropout=self.embed_dropout,
                                 pos_trainable=self.pos_trainable)
        self.embed_layer_norm = LayerNorm(self.full_name(),
                                          begin_norm_axis=2,
                                          epsilon=1e-12,
                                          param_attr=fluid.ParamAttr(
                                              regularizer=fluid.regularizer.L2Decay(0.0)),
                                          bias_attr=fluid.ParamAttr(
                                              regularizer=fluid.regularizer.L2Decay(0.0)))

        self.layers = []
        for i in range(hparams.num_layers):
            layer = TransformerBlock(self.full_name(),
                                     self.hidden_dim,
                                     self.num_heads,
                                     self.dropout,
                                     self.attn_dropout,
                                     self.ff_dropout)
            self.layers.append(layer)
            self.add_sublayer(f"layer_{i}", layer)

        if self.num_latent > 0:
            self.post_network = FC(name_scope=self.full_name() + ".post_network",
                                   size=self.num_latent,
                                   bias_attr=False)

            if self.use_discriminator:
                self.dis_ratio = hparams.dis_ratio
                self.discriminator = FC(name_scope=self.full_name() + ".discriminator",
                                        size=1,
                                        act="sigmoid")

        if self.two_layer_predictor:
            self.pre_predictor = FC(name_scope=self.full_name() + ".pre_predictor",
                                    size=self.hidden_dim,
                                    num_flatten_dims=2,
                                    act="gelu")
            if self.num_latent > 0 and self.with_bow:
                self.pre_bow_predictor = FC(name_scope=self.full_name() + ".pre_bow_predictor",
                                            size=self.hidden_dim,
                                            act="gelu")
        if not self.weight_sharing:
            self.predictor = FC(name_scope=self.full_name() + ".predictor",
                                size=self.num_token_embeddings,
                                num_flatten_dims=2,
                                bias_attr=False)
        if self.num_latent > 0 and self.with_bow:
            self.bow_predictor = FC(name_scope=self.full_name() + ".bow_predictor",
                                    size=self.num_token_embeddings,
                                    bias_attr=False)

        self.max_grad_norm = hparams.max_grad_norm
        if self.max_grad_norm is not None:
            self.grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(hparams.max_grad_norm)
        else:
            self.grad_clip = None
        self.weight_decay = hparams.weight_decay
        self.optimizer = fluid.optimizer.AdamOptimizer(
            learning_rate=hparams.lr,
            regularization=fluid.regularizer.L2Decay(self.weight_decay))

        self._dtype = dtype

        # DataDistributed
        self.before_backward_fn = None
        self.after_backward_fn = None
        return
 def __init__(self, name_scope):
     super(Regressor, self).__init__(name_scope)
     name_scope = self.full_name()
     # 定义一层全连接层,输出维度是1,激活函数为None,即不使用激活函数
     self.fc = FC(name_scope, size=1, act=None)
    def __init__(self, name_scope, use_poster, use_mov_title, use_mov_cat,
                 use_age_job):
        super(Model, self).__init__(name_scope)
        name = self.full_name()

        # 将传入的name信息和bool型参数添加到模型类中
        self.use_mov_poster = use_poster
        self.use_mov_title = use_mov_title
        self.use_usr_age_job = use_age_job
        self.use_mov_cat = use_mov_cat

        # 获取数据集的信息,并构建训练和验证集的数据迭代器
        Dataset = MovieLen(self.use_mov_poster)
        self.Dataset = Dataset
        self.trainset = self.Dataset.train_dataset
        self.valset = self.Dataset.valid_dataset
        self.train_loader = self.Dataset.load_data(dataset=self.trainset,
                                                   mode='train')
        self.valid_loader = self.Dataset.load_data(dataset=self.valset,
                                                   mode='valid')
        """ define network layer for embedding usr info """
        USR_ID_NUM = Dataset.max_usr_id + 1
        # 对用户ID做映射,并紧接着一个FC层
        self.usr_emb = Embedding(name, [USR_ID_NUM, 32], is_sparse=False)
        self.usr_fc = FC(name, size=32)

        # 对用户性别信息做映射,并紧接着一个FC层
        USR_GENDER_DICT_SIZE = 2
        self.usr_gender_emb = Embedding(name, [USR_GENDER_DICT_SIZE, 16])
        self.usr_gender_fc = FC(name, 16)

        # 对用户年龄信息做映射,并紧接着一个FC层
        USR_AGE_DICT_SIZE = Dataset.max_usr_age + 1
        self.usr_age_emb = Embedding(name, [USR_AGE_DICT_SIZE, 16])
        self.usr_age_fc = FC(name, 16)

        # 对用户职业信息做映射,并紧接着一个FC层
        USR_JOB_DICT_SIZE = Dataset.max_usr_job + 1
        self.usr_job_emb = Embedding(name, [USR_JOB_DICT_SIZE, 16])
        self.usr_job_fc = FC(name, 16)

        # 新建一个FC层,用于整合用户数据信息
        self.usr_combined = FC(name, 200, act='tanh')
        """ define network layer for embedding usr info """
        # 对电影ID信息做映射,并紧接着一个FC层
        MOV_DICT_SIZE = Dataset.max_mov_id + 1
        self.mov_emb = Embedding(name, [MOV_DICT_SIZE, 32])
        self.mov_fc = FC(name, 32)

        # 对电影类别做映射
        CATEGORY_DICT_SIZE = len(Dataset.movie_cat) + 1
        self.mov_cat_emb = Embedding(name, [CATEGORY_DICT_SIZE, 32],
                                     is_sparse=False)
        self.mov_cat_fc = FC(name, 32)

        # 对电影名称做映射
        MOV_TITLE_DICT_SIZE = len(Dataset.movie_title) + 1
        self.mov_title_emb = Embedding(name, [MOV_TITLE_DICT_SIZE, 32],
                                       is_sparse=False)
        self.mov_title_conv = Conv2D(name,
                                     1,
                                     filter_size=(3, 1),
                                     stride=(2, 1),
                                     padding=0,
                                     act='relu')
        self.mov_title_conv2 = Conv2D(name,
                                      1,
                                      filter_size=(3, 1),
                                      stride=1,
                                      padding=0,
                                      act='relu')

        # 新建一个FC层,用于整合电影特征
        self.mov_concat_embed = FC(name, size=200, act='tanh')