def test_simple_batch_all_triplet_loss(): """Test the triplet loss with batch all triplet mining in a simple case. There is just one class in this super simple edge case, and we want to make sure that the loss is 0. """ num_data = 10 feat_dim = 6 margin = 0.2 num_classes = 1 embeddings = np.random.rand(num_data, feat_dim).astype(np.float32) labels = np.random.randint(0, num_classes, size=(num_data)).astype(np.float32) for squared in [True, False]: loss_np = 0.0 # Compute the loss in TF. loss_tf, fraction = batch_all_triplet_loss(labels, embeddings, margin, squared=squared) with tf.Session() as sess: loss_tf_val, fraction_val = sess.run([loss_tf, fraction]) assert np.allclose(loss_np, loss_tf_val) assert np.allclose(fraction_val, 0.0)
def model_fn(features, labels, params, mode): """ Args: features:dict Returns: model_spec: tf.estimator.EstimatorSpec object # 1. Configure the model via TensorFlow operations # 2. Define the loss function for training/evaluation # 3. Define the training operation/optimizer # 4. Generate predictions # 5. Return predictions/loss/train_op/eval_metric_ops in EstimatorSpec object """ images = features images = tf.reshape(images, [-1, params["image_size"], params["image_size"], params["image_channel"]]) if not images.shape[1:] == [params["image_size"], params["image_size"], params["image_channel"]]: tf.logging.error("Image shape do not equal to the config setting") embeddings = modify_structure(images, params) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {"embeddings": embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) with tf.name_scope("Triplet_Loss"): if params["triplet_strategy"] == "batch_all": triplet_loss, fraction_positive_triplets = batch_all_triplet_loss(labels, embeddings, params["margin"]) elif params["triplet_strategy"] == "batch_hard": triplet_loss = batch_hard_triplet_loss(labels, embeddings, params["margin"]) elif params["triplet_strategy"] == "batch_semi_hard": triplet_loss = batch_semi_hard_triplet_loss(labels, embeddings, params["margin"]) else: raise ValueError("Triplet strategy not recognized: {}".format(params["triplet_strategy"])) # Summaries for training if params["triplet_strategy"] == "batch_all": tf.summary.scalar("fraction_positive_triplets", fraction_positive_triplets) with tf.variable_scope("metrics"): eval_metric_ops = {"fraction_positive_triplets": tf.metrics.mean(fraction_positive_triplets)} # add mean become op tf.summary.image("train_image", images, max_outputs=10) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.train.AdamOptimizer(params["learning_rate"]) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=triplet_loss, global_step=global_step) if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode=mode, loss=triplet_loss, train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, loss=triplet_loss, eval_metric_ops=eval_metric_ops)
def test_batch_all_triplet_loss(): """Test the triplet loss with batch all triplet mining""" num_data = 10 feat_dim = 6 margin = 0.2 num_classes = 5 embeddings = np.random.rand(num_data, feat_dim).astype(np.float32) labels = np.random.randint(0, num_classes, size=(num_data)).astype(np.float32) for squared in [True, False]: pdist_matrix = pairwise_distance_np(embeddings, squared=squared) loss_np = 0.0 num_positives = 0.0 num_valid = 0.0 for i in range(num_data): for j in range(num_data): for k in range(num_data): distinct = (i != j and i != k and j != k) valid = (labels[i] == labels[j]) and (labels[i] != labels[k]) if distinct and valid: num_valid += 1.0 pos_distance = pdist_matrix[i][j] neg_distance = pdist_matrix[i][k] loss = np.maximum(0.0, pos_distance - neg_distance + margin) loss_np += loss num_positives += (loss > 0) loss_np /= num_positives # Compute the loss in TF. loss_dict = batch_all_triplet_loss(labels, embeddings, margin, squared=squared) loss_tf = loss_dict['triplet_loss'] fraction = loss_dict['fraction_positive_triplets'] with tf.Session() as sess: loss_tf_val, fraction_val = sess.run([loss_tf, fraction]) assert np.allclose(loss_np, loss_tf_val) assert np.allclose(num_positives / num_valid, fraction_val)
def test_simple_batch_all_triplet_loss(): """Test the triplet loss with batch all triplet mining in a simple case. There is just one class in this super simple edge case, and we want to make sure that the loss is 0. """ num_data = 10 feat_dim = 6 margin = 0.2 num_classes = 1 embeddings = np.random.rand(num_data, feat_dim) labels = np.random.randint(0, num_classes, size=(num_data)) labels, embeddings = torch.as_tensor(labels), torch.as_tensor(embeddings) for squared in [True, False]: loss_np = 0.0 loss_pt_val = batch_all_triplet_loss(labels, embeddings, margin, squared=squared) assert np.allclose(loss_np, loss_pt_val)
def test_batch_all_triplet_loss(): """Test the triplet loss with batch all triplet mining""" num_data = 10 feat_dim = 6 margin = 0.2 num_classes = 5 embeddings = np.random.rand(num_data, feat_dim) labels = np.random.randint(0, num_classes, size=(num_data)) for squared in [True, False]: pdist = pairwise_distance_np(embeddings, squared=squared) loss_np = 0.0 num_positives = 0 for i in range(num_data): for j in range(num_data): for k in range(num_data): distinct = (i != j and i != k and j != k) valid = (labels[i] == labels[j]) and (labels[i] != labels[k]) if distinct and valid: pos_distance = pdist[i][j] neg_distance = pdist[i][k] loss = np.maximum(0.0, pos_distance - neg_distance + margin) loss_np += loss num_positives += (loss > 0) loss_np /= num_positives loss_pt_val = batch_all_triplet_loss(torch.as_tensor(labels), torch.as_tensor(embeddings), margin, squared=squared) assert np.allclose(loss_np, loss_pt_val)
def test_encoder(self): tf.set_random_seed(2008) params = { 'embedding_size': 3, 'input_feature_dim': 3, 'l2_regularization_weight': 0.0001, 'margin': 0.2, 'squared': True } # bs, ts, dim inputs = tf.placeholder(shape=(None, 2, 1), dtype=tf.float32) labels = tf.placeholder(shape=(None, ), dtype=tf.int64) embeddings = encoder(inputs, params, is_training=True) loss_triplet, _ = batch_all_triplet_loss(labels, embeddings, margin=params['margin'], squared=params['squared']) loss_reg = tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) inputs_val = np.asarray([[[1], [2]], [[3], [4]], [[5], [6]]], dtype='float32') labels_val = np.asarray([1, 2, 2], dtype='int64') embeddings_val, loss_triplet_val, loss_reg_val = sess.run( [embeddings, loss_triplet, loss_reg], feed_dict={ inputs: inputs_val, labels: labels_val }) print(embeddings_val) print(loss_triplet_val) print(loss_reg_val)
def inception_v3_model_fn(features, labels, mode, params): images = features assert images.shape[1:] == [params.image_size, params.image_size, 3], "{}".format(images.shape) #MODEL: Download Inception v3 module for transfer learning module = hub.Module( "https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1") #Adjust images to module input shape [299,299,3] input_layer = adjust_image(images, params) out = module(input_layer) #Compute embeddings with the model with tf.variable_scope('model'): embeddings = tf.layers.dense(inputs=out, units=params.embedding_size) embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) #PREDICT if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) #Define the triplet loss if params.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) # EVALUATE # METRICS for evaluation: Use average over whole dataset with tf.variable_scope("metrics"): eval_metrics_ops = { "embedding_mean_norm": tf.metrics.mean(embedding_mean_norm) } if params.triplet_strategy == "batch_all": eval_metric_ops['fraction_positive_triplets'] = tf.metrics.mean( fraction) if mode == tf.estimator.Modekeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) #Summaries for training tf.summary.scalar('loss', loss) if params.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) tf.summary.image('train_image', images, max_outputs=1) #TRAINING ROUTINE optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, mode,params): vocab_size=400010 embedding_size=100 print("22222222222" * 8) word_embedding = tf.get_variable(name="embeddings", dtype=tf.float32, shape=[vocab_size , embedding_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) y_tower=features["tags"] y_tower=tf.reshape(y_tower,[-1,12,tag_max]) labels_lists=features["labels"] x_tower=features["text"] selected_tags, tag_logit, labels= get_tag_embedding(labels_lists,y_tower,word_embedding,mode) sentence_logit=get_txt_embedding(x_tower, word_embedding,mode) sentence_logit = tf.nn.l2_normalize(sentence_logit, dim=1) print("tag_logit shape {} labels ".format(tag_logit.shape,labels.shape)) tag_logit = tf.nn.l2_normalize(tag_logit, dim=1) print("tag_logit2 shape {}".format(tag_logit.shape)) embedding_mean_norm = tf.reduce_mean(tf.norm(sentence_logit, axis=1)) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) # return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Tensor("Cast:0", shape=(?,), dtype=int64) triplet_strategy = "batch_all" # Define triplet loss if triplet_strategy == "batch_all": loss, fraction,num_positive_triplets,cosine,neg,pairwise_dist,neg_matrix,triplet_loss1_matix = batch_all_triplet_loss(labels, sentence_logit,tag_logit, margin=0.05, squared=False) tf.summary.scalar('loss1', num_positive_triplets) tf.summary.scalar('fraction_positive_triplets', fraction) else : #triplet_strategy == "batch_hard" loss = batch_hard_triplet_loss(labels, sentence_logit,tag_logit, margin=0.05, squared=False) if mode == tf.estimator.ModeKeys.PREDICT: print("111111111111111111111111111111111"*8) predictions = {'sentence_logit': sentence_logit,"loss":loss,"selected_tags":selected_tags,"tag_logit":tag_logit,"labels222":labels,"cosine":cosine,"neg":neg,"pairwise_dist":pairwise_dist,"neg_matrix":neg_matrix,"triplet_loss1_matix":triplet_loss1_matix} export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec( mode, predictions=predictions, export_outputs=export_outputs) # else: # raise ValueError("Triplet strategy not recognized: {}".format(params.triplet_strategy)) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? # with tf.variable_scope("metrics"): # eval_metric_ops = {"embedding_mean_norm": tf.metrics.mean(embedding_mean_norm)} eval_metric_ops = {"cosine": tf.metrics.mean(cosine),"neg":tf.metrics.mean(neg)} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) # Summaries for training tf.summary.scalar('loss', loss) # tf.summary.image('train_image', images, max_outputs=1) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.train.AdamOptimizer(0.1) global_step = tf.train.get_global_step() # if params.use_batch_norm: # # Add a dependency to update the moving mean and variance for batch normalization # with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): # train_op = optimizer.minimize(loss, global_step=global_step) # else: train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def resnet_v2_model_fn(features, labels, mode, params): """ Model function for tf.estimator ==================================================== @params features : (Tensor) input batch of images @params labels : (Tensor) labels of the input images @params mode : (tf.estimator.ModeKeys) mode of {TRAIN, EVAL, PREDICT} @params params : (Params) hyperparameters ==================================================== @returns model_spec: (tf.estimator.EstimatorSpec) """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) # Reshape and arrange the images. images = features images = tf.reshape(images, [-1, params.image_size, params.image_size, 3]) assert images.shape[1:] == [params.image_size, params.image_size, 3], "{}".format(images.shape) # define the layers of the model with tf.variable_scope('model'): embeddings = build_resnet_v2_model(is_training, images, params) embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) tf.compat.v1.summary.scalar("embedding_mean_norm", embedding_mean_norm) # ================================================== # [PREDICT MODE] Return the embeddings. if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Define triplet loss if params.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? with tf.variable_scope("metrics"): eval_metric_ops = { "embedding_mean_norm": tf.compat.v1.metrics.mean(embedding_mean_norm) } if params.triplet_strategy == "batch_all": eval_metric_ops[ 'fraction_positive_triplets'] = tf.compat.v1.metrics.mean( fraction) # ================================================== # [EVAL MODE] Return the evaluation metrics. if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) # ================================================== # [TRAIN MODE] Return the evaluation metrics. # Summaries for training tf.compat.v1.summary.scalar('loss', loss) if params.triplet_strategy == "batch_all": tf.compat.v1.summary.scalar('fraction_positive_triplets', fraction) tf.compat.v1.summary.image('train_image', images, max_outputs=1) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.compat.v1.train.AdamOptimizer(params.learning_rate) global_step = tf.compat.v1.train.get_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) model_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) return model_spec
def model_fn(features, labels, mode, params): """Model function for tf.estimator Args: features: input batch of images labels: labels of the images mode: can be one of tf.estimator.ModeKeys.{TRAIN, EVAL, PREDICT} params: contains hyperparameters of the model (ex: `params.learning_rate`) Returns: model_spec: tf.estimator.EstimatorSpec object """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) images = features images = tf.reshape(images, [-1, params.image_size, params.image_size, params.image_channels]) assert images.shape[1:] == [params.image_size, params.image_size, params.image_channels], "{}".format(images.shape) # ----------------------------------------------------------- # MODEL: define the layers of the model # Compute the embeddings with the model if params.model_type == 'basic': embeddings = build_model(is_training, images, params) elif params.model_type == 'inception_resnet_v2': embeddings = build_inception_resnet_model(is_training, images, params) else: raise ValueError("Invalid model type: {0}".format(model_type)) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Define triplet loss if params.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "tf_batch_semi_hard": normalized_embeddings = tf.nn.l2_normalize(embeddings, axis=0) loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(labels, normalized_embeddings, margin=params.margin) else: raise ValueError("Triplet strategy not recognized: {}".format(params.triplet_strategy)) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Batch metrics with tf.variable_scope("batch_metrics"): tf.summary.scalar('batch_loss', loss) if params.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) # Metrics for evaluation using tf.metrics (average over whole dataset) with tf.variable_scope("metrics"): mean_loss, mean_loss_op = tf.metrics.mean(loss) tf.summary.scalar('loss', mean_loss) if mode == tf.estimator.ModeKeys.EVAL: embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) eval_metric_ops = { "embedding_mean_norm": tf.metrics.mean(embedding_mean_norm), "mean_loss": (mean_loss, mean_loss_op) } if params.triplet_strategy == "batch_all": eval_metric_ops['fraction_positive_triplets'] = tf.metrics.mean(fraction) if params.model_type == 'inception_resnet_v2' and params.dataset == 'imagenetvid': accuracy = calculate_accuracy(embeddings, labels) eval_metric_ops["accuracy_mean"] = tf.metrics.mean(accuracy) easier_accuracy = calculate_easier_accuracy(embeddings, labels) eval_metric_ops["easier_accuracy_mean"] = tf.metrics.mean(easier_accuracy) return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) # Summaries for training tf.summary.image('train_image', images, max_outputs=1) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(params, mode): # ----------------------------------------------------------- # MODEL: define the layers of the model with tf.variable_scope('model'): # Compute the embeddings with the model x, embeddings = hashing_model() embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) labels = tf.placeholder(tf.int64, [None, 1], 'labels') tf.summary.histogram('Distances', dist(embeddings)) # Define triplet loss if params.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) else: raise ValueError("Triplet strategy not recognized: {}".format(params.triplet_strategy)) # ----------------------------------------------------------- # METRICS AND SUMMARIES #Training accuracy og_emb_dist = tf.add((10*tf.eye(tf.shape(x)[0], dtype = tf.float32)), dist(x, squared=True)) tr_emb_dist = tf.add((10*tf.eye(tf.shape(x)[0], dtype = tf.float32)), dist(embeddings, squared=True)) og_emb_nn = tf.argmin(og_emb_dist, axis = 0) tr_emb_nn = tf.argmin(tr_emb_dist, axis = 0) accuracy = tf.equal(og_emb_nn, tr_emb_nn) accuracy = tf.cast(accuracy, dtype = tf.float32) accuracy = tf.reduce_mean(accuracy) tf.summary.scalar('accuracy', accuracy) # Summaries for training tf.summary.scalar('loss', loss) if params.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) # Define training step that minimizes the loss with the Gradient Descent optimizer optimizer = tf.train.GradientDescentOptimizer(params.learning_rate) # Define variable that holds the value of the global step gst = tf.train.create_global_step() train_op = optimizer.minimize(loss, global_step=gst) if mode == 'train': init = tf.global_variables_initializer() sess = tf.Session() merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.getcwd() + '/train_writer', sess.graph) sess.run(init) return x, labels, sess, train_op, accuracy, loss, merged, train_writer, gst elif mode == 'test': return x, embeddings, labels, accuracy
def model_fn(features, labels, mode, params): """Model function for tf.estimator Args: features: input batch of images labels: labels of the images mode: can be one of tf.estimator.ModeKeys.{TRAIN, EVAL, PREDICT} params: contains hyperparameters of the model (ex: `params.learning_rate`) Returns: model_spec: tf.estimator.EstimatorSpec object """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) channels = 3 if params.rgb else 1 image_size = params.image_size images = tf.reshape(features, [-1, image_size, image_size, channels]) assert images.shape[1:] == [image_size, image_size, channels], "{}".format(images.shape) # MODEL: Compute the embeddings using the specified model # ------------------------------------------------------- tf.logging.info("Current model: {}".format(params.model)) with tf.variable_scope('model'): embeddings = getattr(models, params.model)(is_training, images, params, mode) embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Send tensorflow INFO if minimizing the maximum loss instead of mean if params.minimize_max: tf.logging.info( "Minimizing the maximum individual loss at each batch.") # Define triplet loss if params.triplet_strategy == "batch_all": loss_dict = batch_all_triplet_loss(labels, embeddings, params) elif params.triplet_strategy == "batch_hard": if params.minimize_max: tf.logging.info( "Batch hard strategy applied while minimizing maximum loss.\nSetting the margin higher than zero may be useless for triplet selection and optimization." ) loss_dict = batch_hard_triplet_loss(labels, embeddings, params) # specific metrics to batch hard hardest_positive_dist = loss_dict['hardest_positive_dist'] hardest_negative_dist = loss_dict['hardest_negative_dist'] elif params.triplet_strategy == "batch_hard_pos": loss_dict = batch_hard_pos_triplet_loss(labels, embeddings, params) # specific metrics to batch hard hardest_positive_dist = loss_dict['hardest_positive_dist'] else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) # common general metrics loss = loss_dict['triplet_loss'] fraction_positive_triplets = loss_dict['fraction_positive_triplets'] max_batch_triplet_loss = loss_dict['max_batch_triplet_loss'] min_batch_triplet_loss = loss_dict['min_batch_triplet_loss'] rank1_acc = loss_dict['rank1_accuracy'] # METRICS AND SUMMARIES # ----------------------------------------------------------- with tf.variable_scope("metrics"): eval_metric_ops = { 'embedding_mean_norm': tf.metrics.mean(embedding_mean_norm), 'fraction_positive_triplets': tf.metrics.mean(fraction_positive_triplets), 'max_batch_triplet_loss': tf.metrics.mean(max_batch_triplet_loss), 'min_batch_triplet_loss': tf.metrics.mean(min_batch_triplet_loss), 'rank1_accuracy': tf.metrics.mean(rank1_acc) } if params.triplet_strategy == "batch_hard": eval_metric_ops['hardest_positive_dist'] = tf.metrics.mean( hardest_positive_dist) eval_metric_ops['hardest_negative_dist'] = tf.metrics.mean( hardest_negative_dist) if params.triplet_strategy == "batch_hard_pos": eval_metric_ops['hardest_positive_dist'] = tf.metrics.mean( hardest_positive_dist) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) # Summaries for training tf.summary.scalar("loss", loss) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) tf.summary.scalar("fraction_positive_triplets", fraction_positive_triplets) tf.summary.scalar("max_batch_triplet_loss", max_batch_triplet_loss) tf.summary.scalar("min_batch_triplet_loss", min_batch_triplet_loss) tf.summary.scalar("rank1_accuracy", rank1_acc) if params.triplet_strategy == "batch_hard": tf.summary.scalar("hardest_positive_dist", hardest_positive_dist) tf.summary.scalar("hardest_negative_dist", hardest_negative_dist) if params.triplet_strategy == "batch_hard_pos": tf.summary.scalar("hardest_positive_dist", hardest_positive_dist) tf.summary.image('train_image', images, max_outputs=8) # Define the optimizer based on choice in the configuration file optimizers = { 'adam': tf.train.AdamOptimizer, 'adagrad': tf.train.AdagradOptimizer, 'adadelta': tf.train.AdadeltaOptimizer, 'rmsprop': tf.train.RMSPropOptimizer, 'gradient_descent': tf.train.GradientDescentOptimizer } if params.optimizer in list(optimizers.keys()): optimizer = optimizers[params.optimizer](params.learning_rate) else: raise ValueError( "Optimizer not recognized: {}\nShould be in the list {}".format( params.optimizer, list(optimizers.keys()))) tf.logging.info("Current optimizer: {}".format(params.optimizer)) # Define training step that minimizes the loss with the chosen optimizer global_step = tf.train.get_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def build_model(features, labels=None, cf=None, is_training=True, num_examples=None, global_step=None, class_cnt=None, anchor_indices=None, positive_indices=None): images = features embeddings, end_points = build_slim_model(is_training, images, cf, class_cnt) if not is_training: return embeddings # if cf.l2norm: # embeddings = tf.nn.l2_normalize(embeddings, axis=1) labels = tf.cast(labels, tf.int64) if cf.use_all_loss: loss = tf.contrib.losses.metric_learning.lifted_struct_loss( labels, end_points['lifted_struct'], margin=cf.margin) embeddings = tf.nn.l2_normalize(embeddings, axis=1) loss += tf.contrib.losses.metric_learning.triplet_semihard_loss( labels, embeddings, margin=cf.margin) if anchor_indices is not None and positive_indices is not None: anchor_embeddings = tf.gather_nd(end_points['npairs'], anchor_indices) anchor_labels = tf.gather_nd(labels, anchor_indices) positive_embeddings = tf.gather_nd(end_points['npairs'], positive_indices) loss += tf.contrib.losses.metric_learning.npairs_loss( anchor_labels, anchor_embeddings, positive_embeddings, reg_lambda=0.) # anchor_embeddings = tf.gather_nd(end_points['contrastive'], anchor_indices) # positive_embeddings = tf.gather_nd(end_points['contrastive'], positive_indices) # anchor_embeddings = tf.nn.l2_normalize(anchor_embeddings, axis=1) # positive_embeddings = tf.nn.l2_normalize(positive_embeddings, axis=1) # loss += tf.contrib.losses.metric_learning.contrastive_loss(anchor_labels, anchor_embeddings, # positive_embeddings, # margin=cf.margin) else: if anchor_indices is not None and positive_indices is not None: anchor_embeddings = tf.gather_nd(embeddings, anchor_indices) anchor_labels = tf.gather_nd(labels, anchor_indices) positive_embeddings = tf.gather_nd(embeddings, positive_indices) print(anchor_embeddings, anchor_labels) # Define triplet loss if cf.triplet_strategy == "batch_all": if cf.use_focal_loss: loss = all_triplet_focal_loss(labels, embeddings, margin=cf.margin, squared=cf.squared, sigma=cf.focal_loss_sigma) else: loss = batch_all_triplet_loss(labels, embeddings, margin=cf.margin, squared=cf.squared) elif cf.triplet_strategy == "batch_hard": if cf.use_focal_loss: loss = hard_triplet_focal_loss(labels, embeddings, margin=cf.margin, squared=cf.squared, sigma=cf.focal_loss_sigma) else: loss = batch_hard_triplet_loss(labels, embeddings, margin=cf.margin, squared=cf.squared) elif cf.triplet_strategy == "semihard": embeddings = tf.nn.l2_normalize(embeddings, axis=1) if cf.use_focal_loss: loss = semihard_triplet_focal_loss(labels, embeddings, margin=cf.margin, sigma=cf.focal_loss_sigma) else: loss = tf.contrib.losses.metric_learning.triplet_semihard_loss( labels, embeddings, margin=cf.margin) elif cf.triplet_strategy == "cluster": embeddings = tf.nn.l2_normalize(embeddings, axis=1) loss = tf.contrib.losses.metric_learning.cluster_loss( labels, embeddings, 1.0) elif cf.triplet_strategy == "contrastive": assert anchor_indices is not None and positive_indices is not None anchor_embeddings = tf.nn.l2_normalize(anchor_embeddings, axis=1) positive_embeddings = tf.nn.l2_normalize(positive_embeddings, axis=1) loss = tf.contrib.losses.metric_learning.contrastive_loss( anchor_labels, anchor_embeddings, positive_embeddings, margin=cf.margin) elif cf.triplet_strategy == "lifted_struct": loss = tf.contrib.losses.metric_learning.lifted_struct_loss( labels, embeddings, margin=cf.margin) elif cf.triplet_strategy == "npairs": assert anchor_indices is not None and positive_indices is not None loss = tf.contrib.losses.metric_learning.npairs_loss( anchor_labels, anchor_embeddings, positive_embeddings, reg_lambda=0.) elif cf.triplet_strategy == "npairs_multilabel": pass else: raise ValueError("Triplet strategy not recognized: {}".format( cf.triplet_strategy)) vars = tf.trainable_variables() if cf.use_crossentropy: loss += tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.one_hot(labels, class_cnt), logits=end_points['Logits2'])) if cf.weight_decay is not None: loss += tf.add_n( [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * cf.weight_decay train_op = train_op_fun(loss, global_step, num_examples, cf) return loss, end_points, train_op, embeddings
def model_fn(features, labels, mode, params): """Model function for tf.estimator Args: features: input batch labels: labels of the inputs mode: can be one of tf.estimator.ModeKeys.{TRAIN, EVAL, PREDICT} params: contains hyperparameters of the model (ex: `params.learning_rate`) Returns: model_spec: tf.estimator.EstimatorSpec object """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) inputs = features encoder = _get_encoder(params['encoder']) embeddings = encoder( inputs, params=params, is_training=is_training, ) embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Define triplet loss if params['triplet_strategy'] == "batch_all": loss_triplet, fraction = batch_all_triplet_loss( labels, embeddings, margin=params['margin'], squared=params['squared']) elif params['triplet_strategy'] == "batch_hard": loss_triplet = batch_hard_triplet_loss(labels, embeddings, margin=params['margin'], squared=params['squared']) else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? with tf.variable_scope("metrics"): eval_metric_ops = { "embedding_mean_norm": tf.metrics.mean(embedding_mean_norm) } if params['triplet_strategy'] == "batch_all": eval_metric_ops['fraction_positive_triplets'] = tf.metrics.mean( fraction) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss_triplet, eval_metric_ops=eval_metric_ops) # Build loss loss = 0 # Apply triplet loss triplet_loss_weight = params['triplet_loss_weight'] if triplet_loss_weight > 0: if params['triplet_strategy'] == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) tf.summary.scalar('loss_triplet', loss_triplet) loss += triplet_loss_weight * loss_triplet # Apply cross entropy loss cross_entropy_loss_weight = params['cross_entropy_loss_weight'] if cross_entropy_loss_weight > 0: loss_cross_entropy = cross_entropy_loss( labels=labels, embeddings=embeddings, num_classes=params['num_classes']) tf.summary.scalar('loss_cross_entropy', loss_cross_entropy) loss += cross_entropy_loss_weight * loss_cross_entropy # Finally, apply weight regularization l2_regularization_weight = params['l2_regularization_weight'] if l2_regularization_weight > 0: loss_reg = l2_regularization_weight * tf.add_n( [tf.reduce_sum(tf.square(w)) for w in tf.trainable_variables()]) tf.summary.scalar('loss_reg', loss_reg) loss += loss_reg # Define training step that minimizes the loss with the Adam optimizer global_step = tf.train.get_global_step() lr_decay_rate = params['learning_rate_decay_rate'] lr_decay_steps = params['learning_rate_decay_steps'] lr_start = params['learning_rate'] learning_rate = tf.train.exponential_decay(learning_rate=lr_start, global_step=global_step, decay_rate=lr_decay_rate, decay_steps=lr_decay_steps) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Model function for tf.estimator Args: features: input batch of images labels: labels of the images mode: can be one of tf.estimator.ModeKeys.{TRAIN, EVAL, PREDICT} params: contains hyperparameters of the model (ex: `params.learning_rate`) Returns: model_spec: tf.estimator.EstimatorSpec object """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) images = features assert images.shape[1:] == [params.image_size, params.image_size, 1], "{}".format(images.shape) # ----------------------------------------------------------- # MODEL: define the layers of the model with tf.variable_scope('model'): # Compute the embeddings with the model embeddings = build_model(is_training, images, params) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'embeddings': embeddings} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) labels = tf.cast(labels, tf.int64) # Define triplet loss if params.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=params.margin, squared=params.squared) else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? with tf.variable_scope("metrics"): eval_metric_ops = dict() if params.triplet_strategy == "batch_all": eval_metric_ops['fraction_positive_triplets'] = tf.metrics.mean( fraction) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) # Summaries for training tf.summary.scalar('loss', loss) if params.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) tf.summary.image('train_image', images, max_outputs=1) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def __init__(self, params): """ Placeholders for feed_dict: images: input batch of images labels_images: labels of the images tweets: input batch of tweets labels_tweets: labels of the tweets is_training: bool sec_mod: choice of the second modality to be trained using triplet loss against the anchor params: contains hyperparameters of the model (ex: `params.learning_rate`) """ self.is_training = tf.placeholder(dtype=tf.bool, name='is_training') self.anchor_mode = tf.placeholder(dtype=tf.int32, shape=[], name='anchor_mode') self.opt_mode = tf.placeholder(dtype=tf.int32, shape=[], name='opt_mode') self.images = tf.placeholder(shape=(None, 2048), dtype=tf.float32, name='input_images') self.tweets = tf.placeholder(shape=(None, 768), dtype=tf.float32, name='input_tweets') self.user_ids = tf.placeholder(shape=(None, ), dtype=tf.int32, name='user_ids') self.team = tf.placeholder(shape=(None, ), dtype=tf.int64, name='user_ids') # Dense Layers to unify dimensionalities self.tweets_dense = tf.keras.layers.Dense(128, activation='relu')( self.tweets) """ images_reshaped = tf.reshape(self.images, [-1, params.image_size, params.image_size, 1]) assert images_reshaped.shape[1:] == [params.image_size, params.image_size, 1], "{}".format(images_reshaped.shape) # ----------------------------------------------------------- # MODEL: define the layers of the model with tf.variable_scope('model'): # Compute the embeddings with the model print("images_dense.shape:", self.images.shape) embeddings_images = build_model(self.is_training, self.images, params) """ self.images_dense = tf.keras.layers.Dense(128, activation='relu')( self.images) self.user_dense = tf.keras.layers.Embedding(200000, 128)(self.user_ids) #self.users_dense = tf.keras.layers.Dense(128, activation='relu')(self.user_emb) embeddings = { "image": self.images_dense, "tweet": self.tweets_dense, "user": self.user_dense } embedding_images_mean_norm = tf.reduce_mean( tf.norm(self.images_dense, axis=1)) tf.summary.scalar("embedding_images_mean_norm", embedding_images_mean_norm) embedding_tweets_mean_norm = tf.reduce_mean( tf.norm(self.tweets_dense, axis=1)) tf.summary.scalar("embedding_tweets_mean_norm", embedding_tweets_mean_norm) """ if not self.is_training: predictions = {'tweets embeddings': tweets_dense, "images embeddings": images_dense} return predictions """ labels = tf.cast(self.team, tf.int64) anchor_emb = tf.cond( tf.equal(self.anchor_mode, tf.constant(0, dtype=tf.int32)), lambda: embeddings["tweet"], lambda: tf.cond( tf.equal(self.anchor_mode, tf.constant(1, dtype=tf.int32)), lambda: embeddings["image"], lambda: embeddings["user"])) opt_emb = tf.cond( tf.equal(self.opt_mode, tf.constant(0, dtype=tf.int32)), lambda: embeddings["tweet"], lambda: tf.cond( tf.equal(self.opt_mode, tf.constant(1, dtype=tf.int32)), lambda: embeddings["image"], lambda: embeddings["user"])) # Define triplet loss if params.triplet_strategy == "batch_all": self.loss, fraction = batch_all_triplet_loss( labels, anchor_emb, opt_emb, margin=params.margin, squared=params.squared) #self.loss, fraction = batch_all_triplet_loss(labels, embeddings["image"], margin=params.margin, squared=params.squared) elif params.triplet_strategy == "batch_hard": self.loss = batch_hard_triplet_loss(labels, anchor_emb, opt_emb, margin=params.margin, squared=params.squared) #self.loss = batch_hard_triplet_loss(labels, embeddings["image"], margin=params.margin, squared=params.squared) else: raise ValueError("Triplet strategy not recognized: {}".format( params.triplet_strategy)) all_modes_hidden = tf.concat( [self.tweets_dense, self.images_dense, self.user_dense], 1) print("all_modes_hidden.shape:", all_modes_hidden.shape) weights_output_layer = tf.Variable(initial_value=np.zeros( (3 * 128, 16)), dtype='float32') bias_output_layer = tf.Variable(initial_value=np.zeros((1, 16)), dtype='float32') output_logits = tf.matmul(all_modes_hidden, weights_output_layer) + bias_output_layer self.pred_y = tf.nn.softmax(output_logits) self.pred = tf.argmax(self.pred_y, axis=-1) one_hot_labels = tf.one_hot(self.team, depth=16, axis=-1) self.class_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels, logits=output_logits)) print("self.pred:", self.pred) print("self.team:", self.team) self.class_acc = tf.reduce_mean( (tf.cast(tf.equal(self.pred, self.team), tf.float32))) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? with tf.variable_scope("metrics"): norm_dict = { "image": embedding_images_mean_norm, "tweet": embedding_tweets_mean_norm } norm = norm_dict["image"] self.eval_metric_ops = { "embedding_images_mean_norm": tf.metrics.mean(norm) } if params.triplet_strategy == "batch_all": self.eval_metric_ops[ 'fraction_positive_triplets'] = tf.metrics.mean(fraction) # Summaries for training tf.summary.scalar('loss', self.loss) if params.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) # Define training step that minimizes the loss with the Adam optimizer optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_global_step() self.total_loss = self.loss + self.class_loss if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.train_op = optimizer.minimize(self.total_loss, global_step=global_step) else: self.train_op = optimizer.minimize(self.total_loss, global_step=global_step)
def build_model(features, labels, cf, attrs=None, is_training=True, use_attr_net=False, num_hidden_attr_net=1, num_examples=None, global_step=None, use_old_model=False): images = features # ----------------------------------------------------------- # MODEL: define the layers of the model # Compute the embeddings with the model if use_old_model: with tf.variable_scope('model'): embeddings, end_points = build_slim_model(is_training, images, cf) if attrs is not None and use_attr_net: hidden_step = int((cf.attr_dim - cf.embedding_size) / (num_hidden_attr_net + 1)) for i in range(num_hidden_attr_net): print(cf.attr_dim - (hidden_step * (i + 1))) attr_net = tf.layers.dense(attrs, cf.attr_dim - (hidden_step * (i + 1)), tf.nn.relu, trainable=is_training) attr_net = tf.layers.dropout(attr_net, training=is_training) attrs = tf.layers.dense(attr_net, cf.embedding_size, tf.nn.relu, trainable=is_training) else: embeddings, end_points = build_slim_model(is_training, images, cf) if attrs is not None and use_attr_net: hidden_step = int( (cf.attr_dim - cf.embedding_size) / (num_hidden_attr_net + 1)) for i in range(num_hidden_attr_net): print(cf.attr_dim - (hidden_step * (i + 1))) attr_net = tf.layers.dense(attrs, cf.attr_dim - (hidden_step * (i + 1)), tf.nn.relu, trainable=is_training) attr_net = tf.layers.dropout(attr_net, training=is_training) attrs = tf.layers.dense(attr_net, cf.embedding_size, tf.nn.relu, trainable=is_training) if not is_training: if attrs is not None: return embeddings, attrs return embeddings if cf.l2norm: embeddings = tf.nn.l2_normalize(embeddings, axis=1) if attrs is not None: attrs = tf.nn.l2_normalize(attrs, axis=1) embedding_mean_norm = tf.reduce_mean(tf.norm(embeddings, axis=1)) tf.summary.scalar("embedding_mean_norm", embedding_mean_norm) labels = tf.cast(labels, tf.int64) # Define triplet loss if cf.triplet_strategy == "batch_all": loss, fraction = batch_all_triplet_loss( labels, embeddings, margin=cf.margin, attrs=attrs, attr_weight=cf.attr_loss_weight, squared=cf.squared) elif cf.triplet_strategy == "batch_hard": loss = batch_hard_triplet_loss(labels, embeddings, margin=cf.margin, attrs=attrs, attr_weight=cf.attr_loss_weight, squared=cf.squared) elif cf.triplet_strategy == "semihard": loss = tf.contrib.losses.metric_learning.triplet_semihard_loss( labels, embeddings, margin=cf.margin) elif cf.triplet_strategy == "cluster": loss = tf.contrib.losses.metric_learning.cluster_loss( labels, embeddings, 1.0) elif cf.triplet_strategy == "contrastive": pass elif cf.triplet_strategy == "lifted_struct": loss = tf.contrib.losses.metric_learning.lifted_struct_loss( labels, embeddings, margin=cf.margin) elif cf.triplet_strategy == "npairs": pass elif cf.triplet_strategy == "npairs_multilabel": pass else: raise ValueError("Triplet strategy not recognized: {}".format( cf.triplet_strategy)) vars = tf.trainable_variables() loss += tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * cf.weight_decay # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) # TODO: some other metrics like rank-1 accuracy? with tf.variable_scope("metrics"): eval_metric_ops = { "embedding_mean_norm": tf.metrics.mean(embedding_mean_norm) } if cf.triplet_strategy == "batch_all": eval_metric_ops['fraction_positive_triplets'] = tf.metrics.mean( fraction) # Summaries for training tf.summary.scalar('loss', loss) if cf.triplet_strategy == "batch_all": tf.summary.scalar('fraction_positive_triplets', fraction) tf.summary.image('train_image', images, max_outputs=1) train_op = train_op_fun(loss, global_step, num_examples, cf) return loss, end_points, train_op