def __init__(self, params, is_train, mode=None): self.is_train = is_train self.params = params if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT if params.shared_embedding_softmax_weights: print("sharing embedding!!!") self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: print("not sharing embedding!!!") self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( params.source_vocab_size, params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, 'soft_max') # done self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode) self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode) self._initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform")
def __init__(self, params, train): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. train: boolean indicating whether the model is in training mode. Used to determine if dropout layers should be added. """ self.train = train self.params = params self.source_embedding_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size_src, params.hidden_size) self.target_embedding_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size_tar, params.hidden_size) self.encoder_stack = EncoderStack(params, train) self.decoder_stack = DecoderStack(params, train)
def __init__(self, params, is_train, mode=None, scope=None): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. is_train: boolean indicating whether the model is in training mode. Used to determine if dropout layers should be added. """ self.dropout_rate = tf.placeholder_with_default(0.0, shape=[], name="dropout_rate") self.is_train = is_train self.params = params self.name_scope = scope # reset dropout rate using placeholder, # when inference, the dropout_rate is 0.0, when training is 0.1 self.params.layer_postprocess_dropout = self.dropout_rate self.params.attention_dropout = self.dropout_rate self.params.relu_dropout = self.dropout_rate if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT self.initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform") # done self.encoder_stack = EncoderStack(params, is_train, self.mode) self.decoder_stack = DecoderStack(params, is_train, self.mode) with tf.variable_scope(self.name_scope): if params.shared_embedding_softmax_weights: self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( params.source_vocab_size, params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, 'sot_max')
def __init__(self, params, train): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. train: boolean indicating whether the model is in training mode. Used to determine if dropout layers should be added. """ self.train = train self.params = params self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params["vocab_size"], params["hidden_size"], method="matmul" if params["tpu"] else "gather") self.embedding_en_softmax_layer = embedding_layer.EmbeddingSharedWeights( params['vocab_size_en'], params["hidden_size"], method="matmul" if params["tpu"] else "gather") self.encoder_stack = EncoderStack(params, train) self.decoder_stack = DecoderStack(params, train)
def __init__(self, params, train, **kwargs): super(Transformer, self).__init__(**kwargs) self.train = train self.param = params with self.name_scope(): self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_stack = EncoderStack(params, train) self.decoder_stack = DecoderStack(params, train) self.dropout_input = nn.Dropout( 1 - self.param.layer_postprocess_dropout) self.dropout_output = nn.Dropout( 1 - self.param.layer_postprocess_dropout)
def init_embed(self, name_scope): with tf.variable_scope(name_scope, initializer=self._initializer, reuse=tf.AUTO_REUSE): if self.params.shared_embedding_softmax_weights: print("sharing embedding!!!") self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( self.params.vocab_size, self.params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: print("not sharing embedding!!!") self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( self.params.source_vocab_size, self.params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( self.params.target_vocab_size, self.params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( self.params.target_vocab_size, self.params.hidden_size, 'soft_max')
def __init__(self, params, train): """Initialize layers to build Transformer model. Args: params: hyperparameter object defining layer sizes, dropout values, etc. train: boolean indicating whether the model is in training mode. Used to determine if dropout layers should be added. """ self.train = train self.params = params # SSY 1 transformer/model/embedding_layer.py transform each input word into emb vector # SSY vocab_size 33708 and hidden_size 512 come from transformer/model/model_params.py # input to this emb layer is of [batch size , seq length] # output is [batch size , seq length , emb dim] self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) # SSY 2 see below only matmul and Dense self.encoder_stack = EncoderStack(params, train) # SSY 3 see below self.decoder_stack = DecoderStack(params, train)
def __init__(self, params, is_train, mode=None): self.is_train = is_train self.params = params if mode is not None: self.mode = mode elif self.is_train: self.mode = ModeKeys.TRAIN else: self.mode = ModeKeys.PREDICT #with tf.device('/cpu:0'): # self.dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_pl") # self.params.layer_postprocess_dropout = self.dropout_pl # self.params.attention_dropout = self.dropout_pl # self.relu_dropout = self.dropout_pl if params.shared_embedding_softmax_weights: print("sharing embedding!!!") self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights( params.vocab_size, params.hidden_size) self.encoder_embedding_layer = self.embedding_softmax_layer self.decoder_embedding_layer = self.embedding_softmax_layer self.decoder_softmax_layer = self.embedding_softmax_layer else: print("not sharing embedding!!!") self.encoder_embedding_layer = embedding_layer.EmbeddingWeights( params.source_vocab_size, params.hidden_size, "source_embedding") self.decoder_embedding_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, "target_embedding") self.decoder_softmax_layer = embedding_layer.EmbeddingWeights( params.target_vocab_size, params.hidden_size, 'soft_max') # done self.encoder_stack = EncoderDecoder.EncoderStack(params, is_train, self.mode) self.decoder_stack = EncoderDecoder.DecoderStack(params, is_train, self.mode) self._initializer = tf.variance_scaling_initializer( self.params.initializer_gain, mode="fan_avg", distribution="uniform")