示例#1
0
 def create_vars(self):
     self.transformer = GPT2SharedWeights(self.hparams,
                                          name='shared_transformer')
     self.e2c = tf.keras.layers.Dense(units=self.hparams.num_labels,
                                      kernel_initializer=get_initializer(
                                          self.hparams.initializer_range),
                                      name='e2c')
示例#2
0
 def create_vars(self, **kwargs):
     self.transformer = BertSharedWeights(self.hparams,
                                          name='transformer',
                                          **kwargs)
     self.e2c = tf.keras.layers.Dense(units=self.hparams.num_labels,
                                      kernel_initializer=get_initializer(
                                          self.hparams.initializer_range),
                                      name='e2c')
示例#3
0
 def build(self, input_shape):
     self.weight = self.add_weight("weight",
                                   shape=[self.nx, self.nf],
                                   initializer=get_initializer(
                                       self.initializer_range),
                                   regularizer=self.regularizer)
     self.bias = self.add_weight("bias",
                                 shape=[1, self.nf],
                                 initializer=tf.zeros_initializer(),
                                 regularizer=self.regularizer)
示例#4
0
 def create_vars(self, hparams):
     self.wte = SharedEmbeddings(
         self.vocab_size,
         hparams.hidden_size,
         initializer_range=hparams.initializer_range,
         regularizer=self.regularizer,
         name='wte')
     self.wpe = tf.keras.layers.Embedding(
         hparams.n_positions,
         hparams.embedding_dim,
         embeddings_initializer=get_initializer(hparams.initializer_range),
         embeddings_regularizer=self.regularizer,
         name='wpe')
     self.drop = tf.keras.layers.Dropout(hparams.embd_pdrop)
     attention_block = Block(hparams.n_ctx,
                             hparams,
                             regularizer=self.regularizer,
                             scale=True,
                             name='h')
     self.h = [attention_block for i in range(hparams.depth)]
     self.ln_f = tf.keras.layers.LayerNormalization(
         epsilon=hparams.layer_norm_epsilon, name='ln_f')