beg_token = beg_token.to(device) with ptan.common.utils.TBMeanTracker(writer, 100) as tb_tracker: optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) batch_idx = 0 best_bleu = None for epoch in range(MAX_EPOCHES): random.shuffle(train_data) dial_shown = False total_samples = 0 skipped_samples = 0 bleus_argmax = [] bleus_sample = [] for batch in data.iterate_batches(train_data, BATCH_SIZE): batch_idx += 1 optimiser.zero_grad() input_seq, input_batch, output_batch = \ model.pack_batch_no_out(batch, net.emb, device) enc = net.encode(input_seq) net_policies = [] net_actions = [] net_advantages = [] beg_embedding = net.emb(beg_token) for idx, inp_idx in enumerate(input_batch): total_samples += 1 ref_indices = [ indices[1:] for indices in output_batch[idx]
retriever_batch_idx = 0 retriever_batch_count = 0 best_true_reward = None time_start = time.time() # Loop in epoches. for epoch in range(MAX_EPOCHES): dial_shown = False random.shuffle(train_data) total_samples = 0 skipped_samples = 0 true_reward_argmax = [] true_reward_sample = [] # Stage1 for batch in data.iterate_batches(train_data, args.batches): # The dict stores the initial parameters in the modules. old_param_dict = metaLearner.get_net_named_parameter() # temp_param_dict = metaLearner.get_net_parameter() maml_batch_idx += 1 # Each batch conduct one gradient upweight. maml_batch_count += 1 # Batch is represented for a batch of tasks in MAML. # In each task, a minibatch of support set is established. meta_losses, running_vars, meta_total_samples, meta_skipped_samples, true_reward_argmax_batch, true_reward_sample_batch = metaLearner.reptile_sample( batch, old_param_dict=old_param_dict, dial_shown=dial_shown, epoch_count=epoch, batch_count=maml_batch_count,
train_data, test_data = data.split_train_test(train_data) log.info("Train set has %d phrases, test %d", len(train_data), len(test_data)) net = model.PhraseModel(emb_size=model.EMBEDDING_DIM, dict_size=len(emb_dict), hid_size=model.HIDDEN_STATE_SIZE).to(device) log.info("Model: %s", net) writer = SummaryWriter(comment="-" + args.name) optimiser = optim.Adam(net.parameters(), lr=LEARNING_RATE) best_bleu = None for epoch in range(MAX_EPOCHES): losses = [] bleu_sum = 0.0 bleu_count = 0 for batch in data.iterate_batches(train_data, BATCH_SIZE): optimiser.zero_grad() input_seq, out_seq_list, _, out_idx = model.pack_batch(batch, net.emb, device) enc = net.encode(input_seq) net_results = [] net_targets = [] for idx, out_seq in enumerate(out_seq_list): ref_indices = out_idx[idx][1:] enc_item = net.get_encoded_item(enc, idx) if random.random() < TEACHER_PROB: r = net.decode_teacher(enc_item, out_seq) bleu_sum += model.seq_bleu(r, ref_indices) else: r, seq = net.decode_chain_argmax(enc_item, out_seq.data[0:1], len(ref_indices))
def establish_positive_question_documents_pair(MAX_TOKENS): # Dict: word token -> ID. docID_dict, _ = data.get_docID_indices( data.get_ordered_docID_document(ORDERED_QID_QUESTION_DICT)) # Index -> qid. rev_docID_dict = {id: doc for doc, id in docID_dict.items()} # # List of (question, {question information and answer}) pairs, the training pairs are in format of 1:1. phrase_pairs, emb_dict = data.load_data_MAML(TRAIN_QUESTION_ANSWER_PATH, DIC_PATH, MAX_TOKENS) print("Obtained %d phrase pairs with %d uniq words from %s." % (len(phrase_pairs), len(emb_dict), TRAIN_QUESTION_ANSWER_PATH)) phrase_pairs_944K = data.load_data_MAML(TRAIN_944K_QUESTION_ANSWER_PATH, max_tokens=MAX_TOKENS) print("Obtained %d phrase pairs from %s." % (len(phrase_pairs_944K), TRAIN_944K_QUESTION_ANSWER_PATH)) # Transform token into index in dictionary. train_data = data.encode_phrase_pairs_RLTR(phrase_pairs, emb_dict) # train_data = data.group_train_data(train_data) train_data = data.group_train_data_RLTR(train_data) train_data_944K = data.encode_phrase_pairs_RLTR(phrase_pairs_944K, emb_dict) train_data_944K = data.group_train_data_RLTR_for_support(train_data_944K) dict944k = data.get944k(DICT_944K) print("Reading dict944k from %s is done. %d pairs in dict944k." % (DICT_944K, len(dict944k))) dict944k_weak = data.get944k(DICT_944K_WEAK) print("Reading dict944k_weak from %s is done. %d pairs in dict944k_weak" % (DICT_944K_WEAK, len(dict944k_weak))) metaLearner = metalearner.MetaLearner( samples=5, train_data_support_944K=train_data_944K, dict=dict944k, dict_weak=dict944k_weak, steps=5, weak_flag=True) question_doctments_pair_list = {} idx = 0 for temp_batch in data.iterate_batches(train_data, 1): task = temp_batch[0] if len(task) == 2 and 'qid' in task[1]: # print("Task %s is training..." %(str(task[1]['qid']))) # Establish support set. support_set = metaLearner.establish_support_set( task, metaLearner.steps, metaLearner.weak_flag, metaLearner.train_data_support_944K) documents = [] if len(support_set) > 0: for support_sample in support_set: if len(support_sample) == 2 and 'qid' in support_sample[1]: documents.append(support_sample[1]['qid']) else: print('task %s has no support set!' % (str(task[1]['qid']))) documents.append(task[1]['qid']) question_doctments_pair_list[task[1]['qid']] = documents if idx % 100 == 0: print(idx) idx += 1 else: print('task has no qid or len(task)!=2:') print(task) fw = open('../data/auto_QA_data/retriever_question_documents_pair.json', 'w', encoding="UTF-8") fw.writelines( json.dumps(question_doctments_pair_list, indent=1, ensure_ascii=False)) fw.close() print('Writing retriever_question_documents_pair.json is done!')
import random from libbots import data, model, utils import torch log = logging.getLogger("data_test") DIC_PATH = '../data/auto_QA_data/share.question' if __name__ == "__main__": for epoch in range(3): list = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ] random.shuffle(list) for batch in data.iterate_batches(list, 5): print(batch) print(str(epoch) + '------------------------------------') for batch in data.iterate_batches(list, 5): print(batch) print(str(epoch) + '------------------------------------') device = torch.device("cuda") a = torch.tensor(1.0).to(device) b = torch.tensor(2.0).to(device) temp_list = [a, b] print(temp_list) temp_list = torch.stack(temp_list).to(device) print(temp_list) loss = temp_list.mean().to(device) print(loss)