def double_predict_classification(self, response, confound_input, x_input): """Predicts a categorical outcome twice. First, predict the outcome from the confound. Second, concat the text input and the residuals from step (1), and use the resulting vector to generate better predictions of the outcome. Args: response: dict, an element of config.variable_spec. This is the response variable we are predicting. confound_input: tensor [batch, num confounds], the confounds, all stacked up into vectors. x_input: tensor [batch, hidden], an encoded version of the text input. Returns: confound_preds: tensor [batch, num classes], the predictions from the confound. confound_loss: tensor [batch], the cross-entropy loss between confound predictions and targets. final_preds: tensor[batch, num classes], the final predictions from confound residuals + text input. final_loss: tensor[batch, num classes], the cross-entropy loss between the final preds and targets. """ with tf.variable_scope('control_pred'): confound_preds, confound_loss = tf_utils.classifier( inputs=confound_input, labels=self.iter[response['name']], layers=self.params['classification_layers_1'], num_classes=self.dataset.num_levels(response['name']), hidden=self.params['classification_hidden_1'], dropout=self.dropout, sparse_labels=True) if self.params['ablate_confounds']: confound_preds = tf.zeros_like(confound_preds) final_input = tf.concat([x_input, confound_preds], axis=1) with tf.variable_scope('final_pred'): final_preds, final_loss = tf_utils.classifier( bias=False, inputs=final_input, labels=self.iter[response['name']], layers=self.params['classification_layers_2'], num_classes=self.dataset.num_levels(response['name']), hidden=self.params['classification_hidden_2'], dropout=self.dropout, sparse_labels=True) return confound_preds, confound_loss, final_preds, final_loss
def __init__(self, config, params, dataset, iterators): """Constructs the graph and training/summary ops.""" self.iter = iterators self.config = config self.params = params self.dataset = dataset self.filter_sizes = [ int(x) for x in self.params['filter_size'].split(',') ] tf_graph = tf.get_default_graph() self.learning_rate = tf.constant(params['learning_rate']) self.dropout = tf.placeholder(tf.float32, name='dropout') self.global_step = tf.Variable(0, trainable=False) source_name = dataset.input_varname() self.input_text, self.input_ids, self.input_lens = self.iter[ source_name] # Use a cnn to encode the source. conv, src_encoded = self.cnn_encoder() # Now build all the prediction heads (one per non-input variable). self.step_output = defaultdict(dict) for variable in self.config.data_spec[1:]: if variable['skip']: continue with tf.variable_scope(variable['name']): if variable['control']: prediction_input = self.reverse(src_encoded) else: prediction_input = tf.identity(src_encoded) # Each prediction head is a single fully-connected layer without # activation functions or bias. This makes it a simple linear projection # into the output space. if variable['type'] == utils.CATEGORICAL: preds, mean_loss = tf_utils.classifier( inputs=prediction_input, labels=self.iter[variable['name']], layers=1, num_classes=self.dataset.num_levels(variable['name']), dropout=self.dropout, sparse_labels=True, bias=False) elif variable['type'] == utils.CONTINUOUS: preds, mean_loss = tf_utils.regressor( inputs=prediction_input, labels=self.iter[variable['name']], layers=1, dropout=self.dropout, bias=False) else: raise Exception('ERROR: unknown type %s for variable %s' % (variable['type'], variable['name'])) prediction_head_weights = tf_graph.get_tensor_by_name( '%s/prediction_head/weights:0' % variable['name']) mean_loss = variable['weight'] * mean_loss # The user is allowed to specify a "rho" term which is a dampening # factor on the adversarial signal. This helps the model achieve a # balance between the losses of the prediction head and encoder. if variable['control']: mean_loss = self.params['rho'] * mean_loss tf.summary.scalar('%s_loss' % variable['name'], mean_loss) # Save everything you need for inference: the input, loss, the # convolutional feature maps, the output projection weights, and the # model's predictions. self.step_output[variable['name']]['input'] = self.iter[ variable['name']] self.step_output[variable['name']]['loss'] = mean_loss self.step_output[variable['name']]['conv'] = conv self.step_output[ variable['name']]['weights'] = prediction_head_weights self.step_output[variable['name']]['pred'] = preds # Optimization and summary writing. self.loss = tf.reduce_sum( [x['loss'] for x in self.step_output.values()]) tf.summary.scalar('global_loss', self.loss) self.train_step = tf.contrib.layers.optimize_loss( loss=self.loss, global_step=self.global_step, learning_rate=self.learning_rate, clip_gradients=self.params['gradient_clip'], optimizer='Adam', summaries=['gradient_norm']) # Savers, summaries, etc. self.trainable_variable_names = [ v.name for v in tf.trainable_variables() ] self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, config, params, dataset, iterators): self.iter = iterators self.config = config self.params = params self.dataset = dataset self.learning_rate = tf.constant(params['learning_rate']) self.global_step = tf.Variable(0, trainable=False) source_name = dataset.input_varname() self.input_text, input_ids, input_lens = self.iter[source_name] # transform input text into big BOW vector with tf.variable_scope('input'): input_vector = tf.map_fn( lambda seq: self._to_dense_vector(seq, self.dataset.vocab_size ), self.iter[dataset.input_varname()][1]) input_encoded = tf_utils.fc_tube( inputs=tf.cast(input_vector, tf.float32), num_outputs=self.params['encoder_layers'], layers=self.params['encoder_layers']) # TODO this is PAINFULLY hacky!!! cur_graph = tf.get_default_graph() self.feature_weights = cur_graph.get_tensor_by_name( 'input/layer_0/weights:0') self.feature_intercept = cur_graph.get_tensor_by_name( 'input/layer_0/biases:0') # now build all the prediction heads self.step_output = defaultdict(dict) for variable in self.config.data_spec[1:]: if variable['skip']: continue with tf.variable_scope(variable['name'] + '_prediction_head'): if variable['control']: prediction_input = self.reverse(input_encoded) else: prediction_input = tf.identity(input_encoded) if variable['type'] == 'categorical': preds, mean_loss = tf_utils.classifier( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['classifier_layers'], num_classes=self.dataset.num_classes(variable['name']), hidden=self.params['classifier_units'], dropout=0.0, sparse_labels=True) elif variable['type'] == 'continuous': preds, mean_loss = tf_utils.regressor( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['regressor_layers'], hidden=self.params['regressor_units'], dropout=0.0) else: raise Exception('ERROR: unknown type %s for variable %s' % (variable['type'], variable['name'])) mean_loss = tf.scalar_mul(variable['weight'], mean_loss) tf.summary.scalar('%s_loss' % variable['name'], mean_loss) self.step_output[variable['name']]['loss'] = mean_loss self.step_output[variable['name']]['pred'] = preds # regularize if need be if self.params['lambda'] > 0: if self.params['regularizor'] == 'l2': reg = tf.contrib.layers.l2_regularizer(self.params['lambda']) else: reg = tf.contrib.layers.l1_regularizer(self.params['lambda']) reg_weights = tf.trainable_variables() \ if self.params['reg_type'] =='all' else [self.feature_weights] reg_term = tf.contrib.layers.apply_regularization(reg, reg_weights) else: reg_term = 0 tf.summary.scalar('regularization_loss', reg_term) # now optimize self.loss = tf.reduce_sum( [x['loss'] for x in self.step_output.values()]) self.loss += reg_term tf.summary.scalar('global_loss', self.loss) self.train_step = tf.contrib.layers.optimize_loss( loss=self.loss, global_step=self.global_step, learning_rate=self.learning_rate, clip_gradients=self.params['gradient_clip'], optimizer='Adam', summaries=["gradient_norm"]) # savers, summaries, etc self.trainable_variable_names = [ v.name for v in tf.trainable_variables() ] self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, config, params, dataset, iterators): self.iter = iterators self.config = config self.params = params self.dataset = dataset self.learning_rate = tf.constant(params['learning_rate']) self.dropout = tf.placeholder(tf.float32, name='dropout') self.global_step = tf.Variable(0, trainable=False) source_name = dataset.input_varname() self.input_text, input_ids, input_lens = self.iter[source_name] # use attention to encode the source with tf.variable_scope('encoder'): rnn_outputs, source_embeddings = tf_utils.rnn_encode( source=input_ids, source_len=input_lens, vocab_size=self.dataset.vocab_size, embedding_size=self.params['embedding_size'], layers=self.params['encoder_layers'], units=self.params['encoder_units'], dropout=self.dropout, glove_matrix=tf_utils.get_glove(dataset) if self.params['use_glove'] else None) with tf.variable_scope('attention'): self.attn_scores, attn_context = tf_utils.attention( states=rnn_outputs, seq_lens=input_lens, layers=self.params['attn_layers'], units=self.params['attn_units'], dropout=self.dropout) # now build all the prediction heads self.step_output = defaultdict(dict) for variable in self.config.data_spec[1:]: if variable['skip']: continue with tf.variable_scope(variable['name'] + '_prediction_head'): if variable['control']: prediction_input = self.reverse(attn_context) else: prediction_input = tf.identity(attn_context) if variable['type'] == 'categorical': preds, mean_loss = tf_utils.classifier( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['classifier_layers'], num_classes=self.dataset.num_classes(variable['name']), hidden=self.params['classifier_units'], dropout=self.dropout, sparse_labels=True) elif variable['type'] == 'continuous': preds, mean_loss = tf_utils.regressor( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['regressor_layers'], hidden=self.params['regressor_units'], dropout=self.dropout) else: raise Exception('ERROR: unknown type %s for variable %s' % ( variable['type'], variable['name'])) mean_loss = tf.scalar_mul(variable['weight'], mean_loss) tf.summary.scalar('%s_loss' % variable['name'], mean_loss) self.step_output[variable['name']]['loss'] = mean_loss self.step_output[variable['name']]['pred'] = preds # now optimize self.loss = tf.reduce_sum([x['loss'] for x in self.step_output.values()]) tf.summary.scalar('global_loss', self.loss) self.train_step = tf.contrib.layers.optimize_loss( loss=self.loss, global_step=self.global_step, learning_rate=self.learning_rate, clip_gradients=self.params['gradient_clip'], optimizer='Adam', summaries=["gradient_norm"]) # savers, summaries, etc self.trainable_variable_names = [v.name for v in tf.trainable_variables()] self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, config, params, dataset, iterators): """Constructs the graph and training/summary ops.""" self.iter = iterators self.config = config self.params = params self.dataset = dataset self.learning_rate = tf.constant(params['learning_rate']) self.dropout = tf.placeholder(tf.float32, name='dropout') self.global_step = tf.Variable(0, trainable=False) source_name = dataset.input_varname() self.input_text, _, _ = self.iter[source_name] # Transform the input text into a big bag of words vector. with tf.variable_scope('input'): input_vector = tf.map_fn( lambda seq: tf_utils.sparse_to_dense_vector( # pylint: disable=g-long-lambda seq, self.dataset.vocab_size), self.iter[dataset.input_varname()][1]) input_encoded = tf_utils.fc_tube( inputs=tf.cast(input_vector, tf.float32), num_outputs=self.params['encoder_layers'], layers=self.params['encoder_layers']) # Pull out the vector of weights which dots the input vector. # TODO(rpryzant) -- there must be a more elegant way to do this in TF? cur_graph = tf.get_default_graph() self.feature_weights = cur_graph.get_tensor_by_name( 'input/layer_0/weights:0') self.feature_intercept = cur_graph.get_tensor_by_name( 'input/layer_0/biases:0') # Now build all the prediction heads, one for each non-input variable. self.step_output = defaultdict(dict) for variable in self.config.data_spec[1:]: if variable['skip']: continue with tf.variable_scope(variable['name'] + '_prediction_head'): if variable['control']: prediction_input = self.reverse(input_encoded) else: prediction_input = tf.identity(input_encoded) if variable['type'] == utils.CATEGORICAL: preds, mean_loss = tf_utils.classifier( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['classifier_layers'], num_classes=self.dataset.num_levels(variable['name']), hidden=self.params['classifier_units'], dropout=self.dropout, sparse_labels=True) elif variable['type'] == utils.CONTINUOUS: preds, mean_loss = tf_utils.regressor( inputs=prediction_input, labels=self.iter[variable['name']], layers=self.params['regressor_layers'], hidden=self.params['regressor_units'], dropout=self.dropout) else: raise Exception('ERROR: unknown type %s for variable %s' % (variable['type'], variable['name'])) mean_loss = tf.scalar_mul(variable['weight'], mean_loss) tf.summary.scalar('%s_loss' % variable['name'], mean_loss) self.step_output[variable['name']]['input'] = self.iter[ variable['name']] self.step_output[variable['name']]['loss'] = mean_loss self.step_output[variable['name']]['pred'] = preds # Regularize the parameters. if self.params['lambda'] > 0: if self.params['regularizer'] == 'l2': regularizer = tf.contrib.layers.l2_regularizer( self.params['lambda']) else: regularizer = tf.contrib.layers.l1_regularizer( self.params['lambda']) if self.params['reg_type'] == 'all': regularization_weights = tf.trainable_variables() else: regularization_weights = [self.feature_weights] regularization_term = tf.contrib.layers.apply_regularization( regularizer, regularization_weights) else: regularization_term = 0 tf.summary.scalar('regularization_loss', regularization_term) # Optimization ops. self.loss = tf.reduce_sum( [x['loss'] for x in self.step_output.values()]) self.loss += regularization_term tf.summary.scalar('global_loss', self.loss) self.train_step = tf.contrib.layers.optimize_loss( loss=self.loss, global_step=self.global_step, learning_rate=self.learning_rate, clip_gradients=self.params['gradient_clip'], optimizer='Adam', summaries=['gradient_norm']) # Savers, summaries, etc. self.trainable_variable_names = [ v.name for v in tf.trainable_variables() ] self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables())