def create_vars(self): self.transformer = GPT2SharedWeights(self.hparams, name='shared_transformer') self.e2c = tf.keras.layers.Dense(units=self.hparams.num_labels, kernel_initializer=get_initializer( self.hparams.initializer_range), name='e2c')
def create_vars(self, **kwargs): self.transformer = BertSharedWeights(self.hparams, name='transformer', **kwargs) self.e2c = tf.keras.layers.Dense(units=self.hparams.num_labels, kernel_initializer=get_initializer( self.hparams.initializer_range), name='e2c')
def build(self, input_shape): self.weight = self.add_weight("weight", shape=[self.nx, self.nf], initializer=get_initializer( self.initializer_range), regularizer=self.regularizer) self.bias = self.add_weight("bias", shape=[1, self.nf], initializer=tf.zeros_initializer(), regularizer=self.regularizer)
def create_vars(self, hparams): self.wte = SharedEmbeddings( self.vocab_size, hparams.hidden_size, initializer_range=hparams.initializer_range, regularizer=self.regularizer, name='wte') self.wpe = tf.keras.layers.Embedding( hparams.n_positions, hparams.embedding_dim, embeddings_initializer=get_initializer(hparams.initializer_range), embeddings_regularizer=self.regularizer, name='wpe') self.drop = tf.keras.layers.Dropout(hparams.embd_pdrop) attention_block = Block(hparams.n_ctx, hparams, regularizer=self.regularizer, scale=True, name='h') self.h = [attention_block for i in range(hparams.depth)] self.ln_f = tf.keras.layers.LayerNormalization( epsilon=hparams.layer_norm_epsilon, name='ln_f')