def hard_sampler(y_none, y_pred): # We obtain the similarity matrix and its diagonal v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1) S = cos_similarity(v, c) St = tf.transpose(S) diagonal = tf.linalg.diag_part(S) #print(diagonal) reshaped = tf.expand_dims(diagonal, axis=1) #tf.reshape(diagonal,(s[0],1)) # Set the diagonal to -1 so it is never chosen as the next best candidate S = tf.linalg.set_diag(S, -tf.math.pow(diagonal, 0)) St = tf.linalg.set_diag(St, -tf.math.pow(diagonal, 0)) #print(reshaped.shape) # Proceed to substract the diagonal to the sims matrix values_s = tf.math.top_k(S, k=n)[0] vid_contrast = values_s - reshaped #+ margin values_st = tf.math.top_k(St, k=n)[0] sen_contrast = values_st - reshaped #+ margin b_loss = tf.maximum(0.0, vid_contrast + margin) + tf.maximum( 0.0, sen_contrast + margin) b_sum = tf.reduce_sum(b_loss, axis=-1) # Should be mean return tf.reduce_mean(b_sum)
def gen_topics_embedding(corpus, num_topics, dictionary, word_dim, w2v_model): lda_model = train_lda(True, corpus, num_topics, dictionary) temp = dictionary[0] # This is only to "load" the dictionary. topics_emb = np.zeros((num_topics, word_dim)) for topic_id in range(num_topics): topic = lda_model.get_topic_terms(topicid=topic_id, topn=10) topic_emb = np.zeros(word_dim) weights = [] vecs = [] for idx in range(len(topic)): word = dictionary.id2token[topic[idx][0]] weights.append(topic[idx][1]) vecs.append(w2v_model[word]) weights = np.array(weights) weights = weights / weights.sum() for idx in range(len(topic)): topic_emb += weights[idx] * vecs[idx] topics_emb[topic_id] = topic_emb # print(topic_emb) sim_mat = np.zeros((num_topics, num_topics)) for i in range(num_topics): for j in range(num_topics): sim_mat[i][j] = cos_similarity(topics_emb[i], topics_emb[j]) print("Topics similarity matrix:\n{}".format(sim_mat)) return topics_emb
def validate_classification(train_idxs, test_idxs, raw_tasks, topics_emb, net, theta, dataset): train_ids = [raw_tasks[train_idx] for train_idx in train_idxs] test_ids = [raw_tasks[test_idx] for test_idx in test_idxs] train_embs = [topics_emb[train_idx] for train_idx in train_idxs] test_embs = [topics_emb[test_idx] for test_idx in test_idxs] train_emb_distrib = compute_mix_emb(train_ids, train_embs) support_shots = num_training_samples_per_class query_shots = num_classes_per_task # support_shots = num_training_samples_per_class * sample_rate # query_shots = num_classes_per_task * sample_rate metrics = [] # total_val_samples_per_task = num_val_samples_per_class * num_classes_per_task for idx in range(len(test_ids)): task_ids = test_ids[idx] task_embs = test_embs[idx] net.load_weights(INNER_MODEL_PATH) # task_s, task_q = get_support_query_data(task, num_training_samples_per_class) # task_sample_ids = sample_task_from_raw_task(task_ids, support_shots, query_shots) task_sample_ids = sample_task_from_raw_task_by_label( task_ids, support_shots, query_shots, False) task_s = load_struct_data([task_sample_ids['s']], dataset)[0] task_q = load_struct_data([task_sample_ids['q']], dataset)[0] zeta = cos_similarity(train_emb_distrib, task_embs) drop_theta(zeta, theta) # w_task = adapt_to_task(task=task_s, inner_model=net, w0=theta) adapt_to_task(task_s, net, theta) WX_q, X_q, cnnX_q, MX_q, y_q = parse_task_data(task_q) # # ------------------------------------------------------------------------------ # # print output of each layer # w_layer_names = ['multi_graph_cnn_1', 'embedding_1', 'dense_1',\ # 'conv2d_1', 'multi_graph_cnn_2', 'gru_1', \ # 'dense_2', 'gru_2', 'cocnnattention_1', 'coattention_1',\ # 'batch_normalization_3', 'batch_normalization_2',\ # 'batch_normalization_1', 'dense_3', 'dense_4', 'dense_5',\ # 'dense_6', 'dense_7'] # for name in w_layer_names: # print_layer(name, net, WX_q, X_q, cnnX_q, MX_q) # # ------------------------------------------------------------------------------ scores = net.evaluate([WX_q, X_q, cnnX_q, MX_q], y_q, verbose=0) y_pred = net.predict([WX_q, X_q, cnnX_q, MX_q]) print("y_q:{} y_pred:{}".format(y_q.reshape(-1), y_pred.reshape(-1))) metrics.append(scores) # if not train_flag: # sys.stdout.write('\033[F') return metrics
def proxy_sampler_2(y_true, y_pred): proxy = cos_similarity(y_true, y_true) v, c = tf.split(y_pred, 2, axis=1) S = cos_similarity(v, c) St = tf.transpose(S) s = tf.shape(S) diagonal = tf.linalg.diag_part(S) #print(diagonal) reshaped = tf.expand_dims(diagonal, axis=1) #tf.reshape(diagonal,(s[0],1)) print(reshaped.shape) # Proceed to substract the diagonal to the sims matrix vid_contrast = S - reshaped + margin sen_contrast = St - reshaped + margin b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast) b_sum = tf.reduce_sum(b_loss, axis=-1) # Should be mean return tf.reduce_mean(b_sum)
def task_metalearn(inp, reuse=True): """ Perform gradient descent for one task in the meta-batch. """ img, label, features = inp task_outputbs, task_lossesb = [], [] task_accuraciesb = [] if FLAGS.dropout_ratio > 0: dropout_indices = tf.random.shuffle(tf.range(FLAGS.dict_size))[:int((1-FLAGS.dropout_ratio)*FLAGS.dict_size)] else: dropout_indices = tf.range(FLAGS.dict_size) k_dropped = tf.gather(memo_weights['k'], indices=dropout_indices, axis=0) if 'train' in prefix else memo_weights['k'] v_dropped = tf.gather(memo_weights['v'], indices=dropout_indices, axis=0) if 'train' in prefix else memo_weights['v'] sim = cos_similarity(features, k_dropped, memo_weights['alpha']) task_outputa = self.fc_forward(k_dropped, weights) task_lossa = self.loss_func_weighted(task_outputa, v_dropped, sim) grads = tf.gradients(task_lossa, list(weights.values())) gradients = dict(zip(weights.keys(), grads)) fast_weights = dict() for key in weights.keys(): if key in ['w1','b1']: if FLAGS.scalar_lr: fast_weights[key] = weights[key] - memo_weights['adaptive_lr']*gradients[key] else: fast_weights[key] = weights[key] - tf.matmul(memo_weights['adaptive_lr_diag'],gradients[key]) else: fast_weights[key] = weights[key] output = self.fc_forward(features, fast_weights) task_outputbs.append(output) task_lossesb.append(self.loss_func(output, label)) for j in range(num_updates - 1): loss = self.loss_func_weighted(self.fc_forward(k_dropped, fast_weights), v_dropped, sim) grads = tf.gradients(loss, list(fast_weights.values())) gradients = dict(zip(fast_weights.keys(), grads)) for key in fast_weights.keys(): if key in ['w1','b1']: if FLAGS.scalar_lr: fast_weights[key] = fast_weights[key] - memo_weights['adaptive_lr']*gradients[key] else: fast_weights[key] = fast_weights[key] - tf.matmul(memo_weights['adaptive_lr_diag'],gradients[key]) else: fast_weights[key] = fast_weights[key] output = self.fc_forward(features, fast_weights) task_outputbs.append(output) task_lossesb.append(self.loss_func(output, label)) task_output = [task_outputa, task_outputbs, task_lossa, task_lossesb] task_accuracya = tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(task_outputa), 1), tf.argmax(v_dropped, 1)) for j in range(num_updates): task_accuraciesb.append(tf.contrib.metrics.accuracy(tf.argmax(tf.nn.softmax(task_outputbs[j]), 1), tf.argmax(label, 1))) task_output.extend([task_accuracya, task_accuraciesb]) if FLAGS.visualize: task_output.extend([memo_weights['v'], memo_weights['k'], sim, features]) return task_output
def naive_max_ranking_roll(y_none, y_pred): vidp, senp, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1) vidn = tf.roll(vidp, shift=1, axis=0) senn = tf.roll(senp, shift=1, axis=0) # It's called Roll because treats its neighbor as negative vp_sn = cos_similarity(vidp, senn) d_vp_sn = tf.linalg.diag_part(vp_sn) r_vp_sn = tf.expand_dims(d_vp_sn, axis=1) vp_sp = cos_similarity(vidp, senp) d_vp_sp = tf.linalg.diag_part(vp_sp) r_vp_sp = tf.expand_dims(d_vp_sp, axis=1) vn_sp = cos_similarity(vidn, senp) d_vn_sp = tf.linalg.diag_part(vn_sp) r_vn_sp = tf.expand_dims(d_vn_sp, axis=1) # Max ranking loss loss = tf.maximum(0.0, margin + r_vp_sn - r_vp_sp) + tf.maximum( 0.0, margin + r_vn_sp - r_vp_sp) loss = tf.reduce_mean(loss) + 1e-12 return loss
def proxy_sampler_3(y_none, y_pred): v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1) C = cos_similarity(y_true, y_true) values_p, indices_p = tf.math.top_k(-C, k=n) S = cos_similarity(v, c) St = tf.transpose(S) diagonal = tf.linalg.diag_part(S) reshaped = tf.expand_dims(diagonal, axis=1) values_s = tf.gather(S, indices_p, batch_dims=1) values_st = tf.gather(St, indices_p, batch_dims=1) vid_contrast = values_s - reshaped + margin sen_contrast = values_st - reshaped + margin b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast) b_sum = tf.reduce_sum(b_loss, axis=-1) return b_sum
def proxy_sampler(y_true, y_pred): # We obtain the similarity matrix of sentences and its diagonal C = cos_similarity(y_true, y_true) # Here C is negative to obtain the position of the most dissimilar sentences indices_p = tf.math.top_k(-C, k=n)[1] # Now we start with v and c similarity v, c = tf.split(y_pred, 2, axis=1) S = cos_similarity(v, c) St = tf.transpose(S) diagonal = tf.linalg.diag_part(S) #print(diagonal) reshaped = tf.expand_dims(diagonal, axis=1) #tf.reshape(diagonal,(s[0],1)) # Extract from the v-c sim matrix the positions obtained by the proxy values_s = tf.gather(S, indices_p, batch_dims=1) values_st = tf.gather(St, indices_p, batch_dims=1) vid_contrast = values_s - reshaped #+ margin sen_contrast = values_st - reshaped #+ margin b_loss = tf.maximum(0.0, vid_contrast + margin) + tf.maximum( 0.0, sen_contrast + margin) b_sum = tf.reduce_sum(b_loss, axis=-1) # Should be mean return tf.reduce_mean(b_sum)
def hard_sampler_full(y_none, y_pred): # We obtain the similarity matrix and its diagonal v, c, y_true = tf.split(y_pred, [2048, 2048, 768], axis=1) S = cos_similarity(v, c) St = tf.transpose(S) diagonal = tf.linalg.diag_part(S) #print(diagonal) reshaped = tf.expand_dims(diagonal, axis=1) #tf.reshape(diagonal,(s[0],1)) #print(reshaped.shape) # Proceed to substract the diagonal to the sims matrix vid_contrast = S - reshaped + margin sen_contrast = St - reshaped + margin b_loss = tf.maximum(0.0, vid_contrast) + tf.maximum(0.0, sen_contrast) b_sum = tf.reduce_sum(b_loss, axis=-1) # Should be mean return tf.reduce_mean(b_sum)
def iterative_loss(y_true, y_p): # v and c are the respective video and captions tensors from the batch, shape (b, [repr. dim.]) # n is the number of samples per pair # t type of pairs to sample # For each datapoint find the 3 most distant v, c = tf.split(y_p, 2, axis=1) # D is supposed to be b x b D = cos_similarity(v, c) print(D, tf.shape(D)) # vp_cp is just the repetition for n pairs s = tf.shape(D) # Convert the matrix into vector diagonal = tf.linalg.diag_part(D) reshaped = tf.reshape(diagonal, (s[0], 1)) # Repeat each item into n columns repeated = tf.tile(reshaped, [1, n]) # Again convert matrix into vector vp_cp = tf.reshape(repeated, [n * s[0]]) # Change dialect, rows represent video # Here we eliminate the diagonal to only choose negatives by ranking the respective rows and columns rows = D - tf.linalg.diag(tf.linalg.diag_part(D)) - tf.eye(s[0]) # Columns represent captions columns = tf.transpose(rows) values_r = tf.math.top_k(rows, k=n)[0] #values_r = -values_r s_r = tf.shape(values_r) values_c = tf.math.top_k(columns, k=n)[0] #values_c = -values_c s_c = tf.shape(values_c) vp_cn = tf.reshape(values_r, [n * s_r[0]]) vn_cp = tf.reshape(values_c, [n * s_c[0]]) loss = tf.maximum(0.0, margin + vp_cn - vp_cp) + tf.maximum( 0.0, margin + vn_cp - vp_cp) loss = tf.reduce_mean(loss) + 1e-12 return loss
def rank_matrix(a, b): sm = cos_similarity(a, b) return tf.argsort(sm, direction="DESCENDING")
def meta_train(train_idxs, val_idxs, test_idxs, raw_tasks, topics_emb, net, theta, dataset): train_ids = [raw_tasks[train_idx] for train_idx in train_idxs] val_ids = [raw_tasks[val_idx] for val_idx in val_idxs] test_ids = [raw_tasks[test_idx] for test_idx in test_idxs] train_embs = [topics_emb[train_idx] for train_idx in train_idxs] val_embs = [topics_emb[val_idx] for val_idx in val_idxs] test_embs = [topics_emb[test_idx] for test_idx in test_idxs] train_emb_distrib = compute_mix_emb(train_ids, train_embs) # support_shots = num_training_samples_per_class * num_classes_per_task # query_shots = num_classes_per_task * num_classes_per_task support_shots = num_training_samples_per_class * sample_rate query_shots = num_classes_per_task * sample_rate for epoch in range(resume_epoch, resume_epoch + num_epochs): print("Meta epoch_{}:".format(epoch + 1)) # print("theta[0][0]:\n{}".format(theta[0][0])) task_num = len(train_ids) # initialize gradients with zero gradients = [] for item in theta['mean']: gradients.append(np.zeros(item.shape)) # batch_num = int(task_num/batch_size+0.5) batch_num = batch_per_epoch for batch in range(batch_num): np.random.seed() batch_idxs = np.random.choice(np.arange(task_num), size=batch_size, \ replace=False, p=None) batch_ids = [train_ids[batch_idx] for batch_idx in batch_idxs] batch_embs = [train_embs[batch_idx] for batch_idx in batch_idxs] for idx in range(len(batch_ids)): task_ids = batch_ids[idx] # run a task # load data of a task net.load_weights(INNER_MODEL_PATH) # task_s, task_q = get_support_query_data(train_task, num_training_samples_per_class) # print(task_ids) task_sample_ids = sample_task_from_raw_task_by_label( task_ids, support_shots, query_shots) task_s = load_struct_data([task_sample_ids['s']], dataset)[0] task_q = load_struct_data([task_sample_ids['q']], dataset)[0] zeta = cos_similarity(train_emb_distrib, batch_embs[idx]) drop_theta(zeta, theta) adapt_to_task(task_s, net, theta) # we valuate the model for every task WX_q, X_q, cnnX_q, MX_q, y_q = parse_task_data(task_q) # y_pred = net.predict([WX_q, X_q, cnnX_q, MX_q]) weights_1 = net.get_weights().copy() net.fit([WX_q, X_q, cnnX_q, MX_q], y_q, epochs=1) weights_2 = net.get_weights().copy() # compute gradients for idx in range(len(weights_1)): gradients[idx] += ((weights_1[idx] - weights_2[idx]) / inner_lr) # update theta for idx in range(len(theta['mean'])): theta['mean'][idx] = theta['mean'][idx] - gradients[idx] * ( meta_lr * pow(lr_decay, epoch)) / batch_size theta['logSigma'][idx] = theta['logSigma'][idx] - gradients[idx]*(meta_lr*pow(lr_decay, epoch))/batch_size\ *np.exp(theta['logSigma'][idx]) # evaluate the model metrics = validate_classification(train_idxs, val_idxs, raw_tasks, topics_emb, net, theta, dataset) for score in metrics: print(score) print("\nVal average: [loss, accuracy, f1_score, precision, recall]\n{}\n".format(\ np.mean(metrics, axis=0))) with open(log_file, 'a+') as f: f.write("Val average epoch={}: {}\n".format(\ epoch+1, np.mean(metrics, axis=0))) # #------------------------------------------- # print("test_data:") # fixed_weights = net.get_weights().copy() # advoid updating the meta_model and the inner_model # metrics = validate_classification(test_data, net, theta, train_flag=False, csv_flag=False) # net.set_weights(fixed_weights) # advoid updating the meta_model and the inner_model # for score in metrics: # print(score) # #------------------------------------------- if ((epoch + 1) % num_epochs_save == 0): checkpoint = {'theta': theta} print('SAVING WEIGHTS...') checkpoint_filename = 'Epoch_{0:d}.pt'.format(epoch + 1) print(checkpoint_filename) torch.save(checkpoint, os.path.join(dst_folder, checkpoint_filename)) print()