def conv_layer(inputs, kernel_shape, biases_shape, stride=1, padding='SAME', activation='relu', norm=None, dropout=False, dropout_rate=None, regularizer=None, initializer=None, dimensions=2, is_training=True): if initializer is not None: initializer_obj = get_initializer(initializer) weights = tf.get_variable('weights', initializer=initializer_obj(kernel_shape), regularizer=regularizer) else: if activation == 'relu': initializer = get_initializer('he_uniform') elif activation == 'sigmoid' or activation == 'tanh': initializer = get_initializer('glorot_uniform') # if initializer is None, tensorFlow seems to be using # a glorot uniform initializer weights = tf.get_variable('weights', kernel_shape, regularizer=regularizer, initializer=initializer) logger.debug(' conv_weights: {0}'.format(weights)) biases = tf.get_variable('biases', biases_shape, initializer=tf.constant_initializer(0.01)) logger.debug(' conv_biases: {0}'.format(biases)) if dimensions == 1: return conv_1d(inputs, weights, biases, stride=stride, padding=padding, activation=activation, norm=norm, dropout=dropout, dropout_rate=dropout_rate, is_training=is_training) elif dimensions == 2: return conv_2d(inputs, weights, biases, stride=stride, padding=padding, activation=activation, norm=norm, dropout=dropout, dropout_rate=dropout_rate, is_training=is_training) else: raise Exception('Unsupported number of dimensions', dimensions)
def fc_layer(inputs, in_count, out_count, activation='relu', norm=None, is_training=True, weights=None, biases=None, dropout=False, dropout_rate=None, initializer=None, regularizer=None): if weights is None: if initializer is not None: initializer_obj = get_initializer(initializer) weights = tf.get_variable('weights', initializer=initializer_obj( [in_count, out_count]), regularizer=regularizer) else: if activation == 'relu': initializer = get_initializer('he_uniform') elif activation == 'sigmoid' or activation == 'tanh': initializer = get_initializer('glorot_uniform') # if initializer is None, tensorFlow seems to be using # a glorot uniform initializer weights = tf.compat.v1.get_variable('weights', [in_count, out_count], regularizer=regularizer, initializer=initializer) logger.debug(' fc_weights: {}'.format(weights)) if biases is None: biases = tf.compat.v1.get_variable( 'biases', [out_count], initializer=tf.constant_initializer(0.01)) logger.debug(' fc_biases: {}'.format(biases)) hidden = tf.matmul(inputs, weights) + biases if norm is not None: if norm == 'batch': hidden = tf.contrib.layers.batch_norm(hidden, is_training=is_training) elif norm == 'layer': hidden = tf.contrib.layers.layer_norm(hidden) if activation: hidden = getattr(tf.nn, activation)(hidden) if dropout and dropout_rate is not None: hidden = tf.layers.dropout(hidden, rate=dropout_rate, training=is_training) logger.debug(' fc_dropout: {}'.format(hidden)) return hidden
def embedding_matrix(vocab, embedding_size, representation='dense', embeddings_trainable=True, pretrained_embeddings=None, force_embedding_size=False, initializer=None, regularizer=None): vocab_size = len(vocab) if representation == 'dense': if pretrained_embeddings is not None and pretrained_embeddings is not False: embeddings_matrix = load_pretrained_embeddings( pretrained_embeddings, vocab) if embeddings_matrix.shape[-1] != embedding_size: raise ValueError( 'The size of the pretrained embedding size is {}, ' 'but the specified embedding_size is {}. ' 'Please change the embedding_size accordingly.'.format( embeddings_matrix.shape[-1], embedding_size)) initializer_obj = tf.constant(embeddings_matrix, dtype=tf.float32) else: if vocab_size < embedding_size and not force_embedding_size: logger.info( ' embedding_size ({}) is greater than vocab_size ({}). ' 'Setting embedding size to be equal to vocab_size.'.format( embedding_size, vocab_size)) embedding_size = vocab_size if initializer is not None: initializer_obj_ref = get_initializer(initializer) else: initializer_obj_ref = get_initializer({ 'type': 'uniform', 'minval': -1.0, 'maxval': 1.0 }) initializer_obj = initializer_obj_ref([vocab_size, embedding_size]) embeddings = tf.compat.v1.get_variable('embeddings', initializer=initializer_obj, trainable=embeddings_trainable, regularizer=regularizer) elif representation == 'sparse': embedding_size = vocab_size embeddings = tf.compat.v1.get_variable( 'embeddings', initializer=get_initializer('identity')( [vocab_size, embedding_size]), trainable=False) else: raise Exception('Embedding representation {} not supported.'.format( representation)) return embeddings, embedding_size
def _get_predictions(self, hidden, hidden_size, regularizer=None): if not self.regularize: regularizer = None with tf.variable_scope('predictions_{}'.format(self.name)): initializer_obj = get_initializer(self.initializer) weights = tf.get_variable('weights', initializer=initializer_obj( [hidden_size, self.num_classes]), regularizer=regularizer) logging.debug(' class_weights: {0}'.format(weights)) biases = tf.get_variable('biases', [self.num_classes]) logging.debug(' class_biases: {0}'.format(biases)) logits = tf.matmul(hidden, weights) + biases logging.debug(' logits: {0}'.format(logits)) probabilities = tf.nn.softmax(logits, name='probabilities_{}'.format( self.name)) predictions = tf.argmax(logits, -1, name='predictions_{}'.format(self.name)) with tf.device('/cpu:0'): top_k_predictions = tf.nn.top_k( logits, k=self.top_k, sorted=True, name='top_k_predictions_{}'.format(self.name)) return (predictions, top_k_predictions, probabilities, logits, weights, biases)
def vector_predictions( self, hidden, hidden_size, regularizer=None, ): with tf.variable_scope('predictions_{}'.format(self.name)): initializer_obj = get_initializer(self.initializer) weights = tf.compat.v1.get_variable( 'weights', initializer=initializer_obj([hidden_size, self.vector_size]), regularizer=regularizer) logger.debug(' projection_weights: {0}'.format(weights)) biases = tf.compat.v1.get_variable('biases', [self.vector_size]) logger.debug(' projection_biases: {0}'.format(biases)) logits = tf.matmul(hidden, weights) + biases logger.debug(' logits: {0}'.format(logits)) if self.softmax: predictions = tf.nn.softmax(logits) else: predictions = logits return logits, self.vector_size, predictions
def _get_predictions(self, hidden, hidden_size, regularizer=None): if not self.regularize: regularizer = None with tf.variable_scope('predictions_{}'.format(self.name)): initializer_obj = get_initializer(self.initializer) weights = tf.get_variable('weights', initializer=initializer_obj( [hidden_size, 1]), regularizer=regularizer) logger.debug(' regression_weights: {0}'.format(weights)) biases = tf.get_variable('biases', [1]) logger.debug(' regression_biases: {0}'.format(biases)) logits = tf.reshape(tf.matmul(hidden, weights) + biases, [-1]) logger.debug(' logits: {0}'.format(logits)) probabilities = tf.nn.sigmoid(logits, name='probabilities_{}'.format( self.name)) predictions = tf.greater_equal(probabilities, self.threshold, name='predictions_{}'.format( self.name)) return predictions, probabilities, logits
def _get_predictions(self, hidden, hidden_size, regularizer=None): if not self.regularize: regularizer = None with tf.variable_scope('predictions_{}'.format(self.name)): initializer_obj = get_initializer(self.initializer) weights = tf.get_variable('weights', initializer=initializer_obj( [hidden_size, 1]), regularizer=regularizer) logger.debug(' regression_weights: {0}'.format(weights)) biases = tf.get_variable('biases', [1]) logger.debug(' regression_biases: {0}'.format(biases)) predictions = tf.reshape(tf.matmul(hidden, weights) + biases, [-1]) logger.debug(' predictions: {0}'.format(predictions)) if self.clip is not None: if isinstance(self.clip, (list, tuple)) and len(self.clip) == 2: predictions = tf.clip_by_value(predictions, self.clip[0], self.clip[1]) logger.debug( ' clipped_predictions: {0}'.format(predictions)) else: raise ValueError( 'The clip parameter of {} is {}. ' 'It must be a list or a tuple of length 2.'.format( self.name, self.clip)) return predictions
def _get_predictions( self, hidden, hidden_size, regularizer=None ): if not self.regularize: regularizer = None with tf.variable_scope('predictions_{}'.format(self.name)): initializer_obj = get_initializer(self.initializer) weights = tf.get_variable( 'weights', initializer=initializer_obj([hidden_size, 1]), regularizer=regularizer ) logging.debug(' regression_weights: {0}'.format(weights)) biases = tf.get_variable('biases', [1]) logging.debug(' regression_biases: {0}'.format(biases)) predictions = tf.reshape( tf.matmul(hidden, weights) + biases, [-1] ) logging.debug(' predictions: {0}'.format(predictions)) return predictions
def __call__(self, output_feature, targets, hidden, hidden_size, regularizer, is_timeseries=False): logging.info(' hidden shape: {0}'.format(hidden.shape)) if len(hidden.shape) != 3: raise ValueError( 'Decoder inputs rank is {}, but should be 3 [batch x sequence x hidden] ' 'when using a tagger sequential decoder. ' 'Consider setting reduce_output to null / None if a sequential encoder / combiner is used.' .format(len(hidden.shape))) if is_timeseries: output_feature['num_classes'] = 1 if not self.regularize: regularizer = None sequence_length = tf.shape(hidden)[1] if self.attention: hidden, hidden_size = feed_forward_memory_attention( hidden, hidden, hidden_size) targets_sequence_length = sequence_length_2D(targets) initializer_obj = get_initializer(self.initializer) class_weights = tf.get_variable('weights', initializer=initializer_obj([ hidden_size, output_feature['num_classes'] ]), regularizer=regularizer) logging.debug(' weights: {0}'.format(class_weights)) class_biases = tf.get_variable('biases', [output_feature['num_classes']]) logging.debug(' biases: {0}'.format(class_biases)) hidden_reshape = tf.reshape(hidden, [-1, hidden_size]) logits_to_reshape = tf.matmul(hidden_reshape, class_weights) + class_biases logits = tf.reshape( logits_to_reshape, [-1, sequence_length, output_feature['num_classes']]) logging.debug(' logits: {0}'.format(logits)) if is_timeseries: probabilities_sequence = tf.zeros_like(logits) predictions_sequence = tf.reshape(logits, [-1, sequence_length]) else: probabilities_sequence = tf.nn.softmax( logits, name='probabilities_{}'.format(output_feature['name'])) predictions_sequence = tf.argmax(logits, -1, name='predictions_{}'.format( output_feature['name']), output_type=tf.int32) predictions_sequence_length = sequence_length_3D(hidden) return predictions_sequence, probabilities_sequence, \ predictions_sequence_length, \ probabilities_sequence, targets_sequence_length, \ logits, hidden, class_weights, class_biases
def recurrent_decoder(encoder_outputs, targets, max_sequence_length, vocab_size, cell_type='rnn', state_size=256, embedding_size=50, num_layers=1, attention_mechanism=None, beam_width=1, projection=True, tied_target_embeddings=True, embeddings=None, initializer=None, regularizer=None, is_timeseries=False): with tf.variable_scope('rnn_decoder', reuse=tf.AUTO_REUSE, regularizer=regularizer): # ================ Setup ================ if beam_width > 1 and is_timeseries: raise ValueError('Invalid beam_width: {}'.format(beam_width)) GO_SYMBOL = vocab_size END_SYMBOL = 0 batch_size = tf.shape(encoder_outputs)[0] # ================ Projection ================ # Project the encoder outputs to the size of the decoder state encoder_outputs_size = encoder_outputs.shape[-1] if projection and encoder_outputs_size != state_size: with tf.variable_scope('projection'): encoder_output_rank = len(encoder_outputs.shape) if encoder_output_rank > 2: sequence_length = tf.shape(encoder_outputs)[1] encoder_outputs = tf.reshape(encoder_outputs, [-1, encoder_outputs_size]) encoder_outputs = fc_layer(encoder_outputs, encoder_outputs.shape[-1], state_size, activation=None, initializer=initializer) encoder_outputs = tf.reshape( encoder_outputs, [-1, sequence_length, state_size]) else: encoder_outputs = fc_layer(encoder_outputs, encoder_outputs.shape[-1], state_size, activation=None, initializer=initializer) # ================ Targets sequence ================ # Calculate the length of inputs and the batch size with tf.variable_scope('sequence'): targets_sequence_length = sequence_length_2D(targets) start_tokens = tf.tile([GO_SYMBOL], [batch_size]) end_tokens = tf.tile([END_SYMBOL], [batch_size]) if is_timeseries: start_tokens = tf.cast(start_tokens, tf.float32) end_tokens = tf.cast(end_tokens, tf.float32) targets_with_go_and_eos = tf.concat([ tf.expand_dims(start_tokens, 1), targets, tf.expand_dims(end_tokens, 1) ], 1) logging.debug( ' targets_with_go: {0}'.format(targets_with_go_and_eos)) targets_sequence_length_with_eos = targets_sequence_length + 1 # the EOS symbol is 0 so it's not increasing the real length of the sequence # ================ Embeddings ================ if is_timeseries: targets_embedded = tf.expand_dims(targets_with_go_and_eos, -1) targets_embeddings = None else: with tf.variable_scope('embedding'): if embeddings is not None: embedding_size = embeddings.shape.as_list()[-1] if tied_target_embeddings: state_size = embedding_size elif tied_target_embeddings: embedding_size = state_size if embeddings is not None: embedding_go = tf.get_variable( 'embedding_GO', initializer=tf.random_uniform([1, embedding_size], -1.0, 1.0)) targets_embeddings = tf.concat([embeddings, embedding_go], axis=0) else: initializer_obj = get_initializer(initializer) targets_embeddings = tf.get_variable( 'embeddings', initializer=initializer_obj( [vocab_size + 1, embedding_size]), regularizer=regularizer) logging.debug( ' targets_embeddings: {0}'.format(targets_embeddings)) targets_embedded = tf.nn.embedding_lookup( targets_embeddings, targets_with_go_and_eos, name='decoder_input_embeddings') logging.debug(' targets_embedded: {0}'.format(targets_embedded)) # ================ Class prediction ================ if tied_target_embeddings: class_weights = tf.transpose(targets_embeddings) else: initializer_obj = get_initializer(initializer) class_weights = tf.get_variable('class_weights', initializer=initializer_obj( [state_size, vocab_size + 1]), regularizer=regularizer) logging.debug(' class_weights: {0}'.format(class_weights)) class_biases = tf.get_variable('class_biases', [vocab_size + 1]) logging.debug(' class_biases: {0}'.format(class_biases)) projection_layer = Projection(class_weights, class_biases) # ================ RNN ================ initial_state = encoder_outputs with tf.variable_scope('rnn_cells') as vs: # Cell cell_fun = get_cell_fun(cell_type) if num_layers == 1: cell = cell_fun(state_size) if cell_type.startswith('lstm'): initial_state = LSTMStateTuple(c=initial_state, h=initial_state) elif num_layers > 1: cell = MultiRNNCell( [cell_fun(state_size) for _ in range(num_layers)], state_is_tuple=True) if cell_type.startswith('lstm'): initial_state = LSTMStateTuple(c=initial_state, h=initial_state) initial_state = tuple([initial_state] * num_layers) else: raise ValueError( 'num_layers in recurrent decoser: {}. ' 'Number of layers in a recurrenct decoder cannot be <= 0'. format(num_layers)) # Attention if attention_mechanism is not None: if attention_mechanism == 'bahdanau': attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=state_size, memory=encoder_outputs, memory_sequence_length=sequence_length_3D( encoder_outputs)) elif attention_mechanism == 'luong': attention_mechanism = tf.contrib.seq2seq.LuongAttention( num_units=state_size, memory=encoder_outputs, memory_sequence_length=sequence_length_3D( encoder_outputs)) else: raise ValueError( 'Attention mechanism {} not supported'.format( attention_mechanism)) cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=state_size) initial_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size) initial_state = initial_state.clone( cell_state=reduce_sequence(encoder_outputs, 'last')) for v in tf.global_variables(): if v.name.startswith(vs.name): logging.debug(' {}: {}'.format(v.name, v)) # ================ Decoding ================ def decode(initial_state, cell, helper, beam_width=1, projection_layer=None): # The decoder itself if beam_width > 1: # Tile inputs for beam search decoder beam_initial_state = tf.contrib.seq2seq.tile_batch( initial_state, beam_width) decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=cell, embedding=targets_embeddings, start_tokens=start_tokens, end_token=END_SYMBOL, initial_state=beam_initial_state, beam_width=beam_width, output_layer=projection_layer) else: decoder = BasicDecoder(cell=cell, helper=helper, initial_state=initial_state, output_layer=projection_layer) # The decoding operation outputs = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=False if beam_width > 1 else True, maximum_iterations=max_sequence_length) return outputs # ================ Decoding helpers ================ if is_timeseries: train_helper = TimeseriesTrainingHelper( inputs=targets_embedded, sequence_length=targets_sequence_length_with_eos) final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode( initial_state, cell, train_helper, projection_layer=projection_layer) eval_logits = final_outputs_pred.rnn_output train_logits = final_outputs_pred.projection_input predictions_sequence = tf.reshape(eval_logits, [batch_size, -1]) predictions_sequence_length_with_eos = final_sequence_lengths_pred else: train_helper = tf.contrib.seq2seq.TrainingHelper( inputs=targets_embedded, sequence_length=targets_sequence_length_with_eos) final_outputs_train, final_state_train, final_sequence_lengths_train = decode( initial_state, cell, train_helper, projection_layer=projection_layer) eval_logits = final_outputs_train.rnn_output train_logits = final_outputs_train.projection_input # train_predictions = final_outputs_train.sample_id pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embedding=targets_embeddings, start_tokens=start_tokens, end_token=END_SYMBOL) final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode( initial_state, cell, pred_helper, beam_width, projection_layer=projection_layer) if beam_width > 1: predictions_sequence = final_outputs_pred.beam_search_decoder_output.predicted_ids[:, :, 0] # final_outputs_pred..predicted_ids[:,:,0] would work too, but it contains -1s for padding predictions_sequence_scores = final_outputs_pred.beam_search_decoder_output.scores[:, :, 0] predictions_sequence_length_with_eos = final_sequence_lengths_pred[:, 0] else: predictions_sequence = final_outputs_pred.sample_id predictions_sequence_scores = final_outputs_pred.rnn_output predictions_sequence_length_with_eos = final_sequence_lengths_pred logging.debug(' train_logits: {0}'.format(train_logits)) logging.debug(' eval_logits: {0}'.format(eval_logits)) logging.debug(' predictions_sequence: {0}'.format(predictions_sequence)) logging.debug(' predictions_sequence_scores: {0}'.format( predictions_sequence_scores)) return predictions_sequence, predictions_sequence_scores, predictions_sequence_length_with_eos, \ targets_sequence_length_with_eos, eval_logits, train_logits, class_weights, class_biases