def __init__(self, config, **kwargs): super().__init__(config, **kwargs) tf.logging.info("Initialize TransformerModel...") model_config = config['model']['net']['structure'] self.is_infer = config['model']['is_infer'] if self.is_infer: self.length_penalty = model_config['length_penalty'] self.dropout_rate = model_config['dropout_rate'] self.num_layers = model_config['num_layers'] self.l2_reg_lambda = model_config['l2_reg_lambda'] self.max_enc_len = model_config['max_enc_len'] self.max_dec_len = model_config['max_dec_len'] self.share_embedding = model_config['share_embedding'] self.padding_token = utils.PAD_IDX self.beam_size = model_config['beam_size'] self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast( tf.not_equal(inputs, self.padding_token), tf.int32)) self.embed_d = tf.keras.layers.Dropout(self.dropout_rate) self.pos_embed = layers.PositionEmbedding(self.max_enc_len, self.embedding_size) self.encoder = layers.TransformerEncoder(config) self.decoder = layers.TransformerDecoder(config, self.embed, self.decode_vocab_size) logging.info("decode_vocab_size: {}".format(self.decode_vocab_size)) logging.info("Initialize TransformerModel done.")
def __init__(self, config, **kwargs): super(TransformerModel, self).__init__(config, **kwargs) tf.logging.info("Initialize TransformerModel...") self.vocab_size = config['data']['vocab_size'] self.num_classes = config['data']['task']['classes']['num_classes'] model_config = config['model']['net']['structure'] self.dropout_rate = model_config['dropout_rate'] self.embedding_size = model_config['embedding_size'] self.num_layers = model_config['num_layers'] self.l2_reg_lambda = model_config['l2_reg_lambda'] self.max_len = model_config['max_len'] self.transformer_dropout = model_config['transformer_dropout'] self.residual_conn = model_config['residual_conn'] self.head_num = model_config['head_num'] self.hidden_dim = model_config['hidden_dim'] self.padding_token = utils.PAD_IDX self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast( tf.not_equal(inputs, self.padding_token), tf.int32)) self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.pos_embed = layers.PositionEmbedding(self.max_len, self.embedding_size) self.embed_d = tf.keras.layers.Dropout(self.dropout_rate) self.transformers = [ layers.TransformerEncoder( self.head_num, self.hidden_dim, self.embedding_size, dropout_rate=self.transformer_dropout, residual_conn=self.residual_conn) for _ in range(self.num_layers) ] self.pool = tf.keras.layers.GlobalMaxPooling1D() self.final_dense = tf.keras.layers.Dense( self.num_classes, activation=tf.keras.activations.linear, name="final_dense") tf.logging.info("Initialize TransformerModel done.")
def __init__(self, config, **kwargs): super().__init__(config, **kwargs) tf.logging.info("Initialize TransformerModel...") model_config = config['model']['net']['structure'] self.is_infer = config['model']['is_infer'] self.dropout_rate = model_config['dropout_rate'] self.num_layers = model_config['num_layers'] self.max_enc_len = model_config['max_enc_len'] self.max_dec_len = model_config['max_dec_len'] self.share_embedding = model_config.get('share_embedding', True) self.use_const = model_config.get('use_const', True) self.embed_d = tf.keras.layers.Dropout(self.dropout_rate) self.pos_embed = layers.PositionEmbedding(self.max_enc_len, self.embedding_size, self.use_const, "enc_pos") self.encoder = layers.TransformerEncoder(config) self.decoder = layers.TransformerDecoder(config, (self.embed, self.pos_embed), self.decode_vocab_size) logging.info("decode_vocab_size: {}".format(self.decode_vocab_size)) logging.info("Initialize TransformerModel done.")