def train(self, x_set, y_set): """ Train function, training the model by splitting first the train dataset to train and validation, for each epoch we use shuffle for the original dataset and split it again. at the end of each epoch we use validation function to check accuracy and average loss for the specific epoch. :param x_set: the complete training dataset. :param y_set: the correlated classes. """ loss_sum = 0 for i in range(EPOCHS): x_set, y_set = utils.shuffle(x_set, y_set) train_x, train_y, val_x, val_y = utils.split_validation( x_set, y_set, VALIDATION_SIZE) train_x, train_y = utils.shuffle(train_x, train_y) # running of each example from the train dataset. for x, y in zip(train_x, train_y): x = np.reshape(x, (1, x.shape[0])) z1, h1, z2 = self.feedforward(x) probs = utils.softmax(self.weights2, h1, self.bias2, CLASSES) loss = utils.loss(probs[int(y)]) loss_sum += loss self.backprop(x, y, z1, h1, z2, probs) val_loss, acc = self.validation(val_x, val_y)
def main(): if opt.truncate: train_data = pickle.load( open('../datasets/' + opt.dataset + '/train_shortonly.txt', 'rb')) else: train_data = pickle.load( open('../datasets/' + opt.dataset + '/train.txt', 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: if opt.truncate: test_data = pickle.load( open('../datasets/' + opt.dataset + '/test_shortonly.txt', 'rb')) else: test_data = pickle.load( open('../datasets/' + opt.dataset + '/test.txt', 'rb')) # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb')) # g = build_graph(all_train_seq) train_data = Data(opt, train_data, shuffle=True) test_data = Data(opt, test_data, shuffle=False) # del all_train_seq, g if opt.dataset == 'diginetica': n_node = 43098 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37484 else: n_node = 310 model = trans_to_cuda(SessionGraph(opt, n_node)) model = torch.nn.DataParallel(model, device_ids=[0, 1]) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr = train_test(model, train_data, test_data) flag = 0 if (hit - best_result[0]) > 0.0001: best_result[0] = hit best_epoch[0] = epoch flag = 1 if (mrr - best_result[1]) > 0.0001: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))
def main(): doc_content_list, doc_train_list, doc_test_list, vocab_dic, labels_dic, max_num_sentence, keywords_dic, class_weights = read_file( args.dataset, args.use_LDA) pre_trained_weight = [] if args.dataset == 'mr': gloveFile = 'data/glove.6B.300d.txt' if not os.path.exists(gloveFile): print( 'Please download the pretained Glove Embedding from https://nlp.stanford.edu/projects/glove/' ) return pre_trained_weight = loadGloveModel(gloveFile, vocab_dic, len(vocab_dic) + 1) train_data, valid_data = split_validation(doc_train_list, args.valid_portion, SEED) test_data = split_validation(doc_test_list, 0.0, SEED) num_categories = len(labels_dic) train_data = Data(train_data, max_num_sentence, keywords_dic, num_categories, args.use_LDA) valid_data = Data(valid_data, max_num_sentence, keywords_dic, num_categories, args.use_LDA) test_data = Data(test_data, max_num_sentence, keywords_dic, num_categories, args.use_LDA) model = trans_to_cuda( DocumentGraph(args, pre_trained_weight, class_weights, len(vocab_dic) + 1, len(labels_dic))) for epoch in range(args.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) train_model(model, train_data, args) valid_detail, valid_acc = test_model(model, valid_data, args, False) detail, acc = test_model(model, test_data, args, False) print('Validation Accuracy:\t%.4f, Test Accuracy:\t%.4f' % (valid_acc, acc))
def main(): train_data = pickle.load( open('../datasets/' + opt.dataset + '/train.txt', 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load( open('../datasets/' + opt.dataset + '/test.txt', 'rb')) # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb')) # g = build_graph(all_train_seq) train_data = Data(train_data, shuffle=True, opt=opt) test_data = Data(test_data, shuffle=False, opt=opt) # del all_train_seq, g if opt.dataset == 'diginetica': n_node = 43098 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37484 elif opt.dataset == 'diginetica_users': n_node = 57070 else: n_node = 310 model = trans_to_cuda( SessionGraph(opt, n_node, max(train_data.len_max, test_data.len_max))) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))
def main(): train_data = pickle.load(open('./datasets/' + opt.dataset + '/train.txt', 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load(open('./datasets/' + opt.dataset + '/test.txt', 'rb')) train_data = Data(train_data, shuffle=True) test_data = Data(test_data, shuffle=False) if opt.dataset == 'diginetica': n_node = 43098 else: n_node = 37484 model = trans_to_cuda(SelfAttentionNetwork(opt, n_node)) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d'% (best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))
def main(): train_data = pickle.load(open('../datasets/' + opt.dataset + '/train.txt', 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb')) # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb')) # g = build_graph(all_train_seq) train_data = Data(train_data, shuffle=True) # <class 'tuple'>: ([[282], [281, 308], [281], [58, 58, 58, 230, 230, 230, 246, 230], [58, 58, 58, 230, 230, 230, 246], [58, 58, 58, 230, 230, 230], [58, 58, 58, 230, 230], test_data = Data(test_data, shuffle=False) # del all_train_seq, g if opt.dataset == 'diginetica': n_node = 43098 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37484 else: n_node = 310 ''' SessionGraph( (embedding): Embedding(310, 100) (gnn): GNN( (linear_edge_in): Linear(in_features=100, out_features=100, bias=True) (linear_edge_out): Linear(in_features=100, out_features=100, bias=True) (linear_edge_f): Linear(in_features=100, out_features=100, bias=True) ) (linear_one): Linear(in_features=100, out_features=100, bias=True) (linear_two): Linear(in_features=100, out_features=100, bias=True) (linear_three): Linear(in_features=100, out_features=1, bias=False) (linear_transform): Linear(in_features=200, out_features=100, bias=True) (loss_function): CrossEntropyLoss() ) ''' model = trans_to_cuda(SessionGraph(opt, n_node)) # opt.... n_node 310会话中点的数量,也就是涉及项的数量 start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % ( best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))
def main(run): train_data = pickle.load( open(os.path.join(opt.dataset_folder, 'train.dat'), 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load( open(os.path.join(opt.dataset_folder, 'test.dat'), 'rb')) print(test_data[0][0], test_data[1][0]) cars = pickle.load( open(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'), 'rb')) item_features = pickle.load( open(os.path.join(opt.dataset_folder, 'itemid_features.dat'), 'rb')) train_data = Data(train_data, shuffle=True, features=item_features) test_data = Data(test_data, shuffle=False, features=item_features) n_node = len(cars) + 1 #1149 #6176 #5933 #unique cars n_feature_columns = len(item_features[1]) features_vector = get_feature_vectors(n_node, item_features) run.log("Unique No. of Cars", n_node) model = trans_to_cuda( SessionGraph(opt, n_node, n_feature_columns=n_feature_columns, features=features_vector)) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 #Before Training, Predict hit, mrr = predict_scores(model, test_data) run.log(f'Recall@{opt.top_k}', hit) run.log(f'MRR@{opt.top_k}', mrr) for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr, mean_loss = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 #Metrics Capture run.log(f'Recall@{opt.top_k}', hit) run.log(f'MRR@{opt.top_k}', mrr) run.log('Mean Loss', mean_loss) print('Current Result:') print( '\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tMean Loss:\t%.4f,\tEpoch:\t%d,\t%d' % (hit, mrr, mean_loss, epoch, epoch)) print('Best Result:') print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (best_result[0], best_result[1], best_epoch[0], best_epoch[1])) bad_counter += 1 - flag if bad_counter >= opt.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start)) run.log('Training Time (s)', (end - start)) #Save Model output_folder = opt.output_folder os.makedirs(output_folder, exist_ok=True) torch.save(model, f'{output_folder}/{opt.model_name}_full.pth') torch.save(model.state_dict(), f'{output_folder}/{opt.model_name}.pt') shutil.copy( os.path.join(opt.dataset_folder, 'itemid_to_vehicle_mapping.dat'), f'{output_folder}/{opt.model_name}_item_veh_mapping.dat') shutil.copy(os.path.join(opt.dataset_folder, 'reg_no_item_id.dat'), f'{output_folder}/{opt.model_name}_veh_item_mapping.dat') shutil.copy(os.path.join(opt.dataset_folder, 'itemid_features.dat'), f'{output_folder}/itemid_features.dat') run.log("Model Saved in Outputs", True)
def main(): if args.wandb_on: wandb.init(project=args.wandb_project, name=args.model_name + '-' + args.dataset) wandb.config.update( {'hostname': os.popen('hostname').read().split('.')[0]}) wandb.config.update(args) train_data = pickle.load(open(args.data_folder + args.train_data, 'rb')) if args.validation: train_data, valid_data = split_validation(train_data, args.valid_portion) test_data = valid_data else: test_data = pickle.load(open(args.data_folder + args.valid_data, 'rb')) # all_train_seq = pickle.load(open('../../_data/' + args.dataset + '/all_train_seq.txt', 'rb')) # g = build_graph(all_train_seq) train_data = Data(train_data, shuffle=True) test_data = Data(test_data, shuffle=False) # del all_train_seq, g if args.dataset == 'diginetica': n_node = 43098 elif args.dataset == 'yoochoose1_64' or args.dataset == 'yoochoose1_4': n_node = 37484 else: n_node = 310 model = trans_to_cuda(SessionGraph(args, n_node)) if args.wandb_on: wandb.watch(model, log="all") start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(args.n_epochs): print('-------------------------------------------------------') print('epoch: ', epoch) hit, mrr = train_test(epoch, model, train_data, test_data, args) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') print(f'\tRecall@{args.top_k}:\t{best_result[0]:.4f}' f'\tMRR@{args.top_k}:\t{best_result[1]:.4f}' f'\tEpoch:\t{best_epoch[0]},\t{best_epoch[1]}') if args.wandb_on: wandb.log({ "best_recall": best_result[0], "best_mrr": best_result[1], "best_recall_epoch": best_epoch[0], "best_mrr_epoch": best_epoch[1] }) bad_counter += 1 - flag if bad_counter >= args.patience: break print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))
def main(): train_data = pickle.load( open('../datasets/' + opt.dataset + '/train.txt', 'rb')) if opt.validation: train_data, valid_data = split_validation(train_data, opt.valid_portion) test_data = valid_data else: test_data = pickle.load( open('../datasets/' + opt.dataset + '/test.txt', 'rb')) # all_train_seq = pickle.load(open('../datasets/' + opt.dataset + '/all_train_seq.txt', 'rb')) # g = build_graph(all_train_seq) train_data = Data(train_data, shuffle=True) test_data = Data(test_data, shuffle=False) # del all_train_seq, g if opt.dataset == 'diginetica': n_node = 43098 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37484 else: n_node = 310 model = trans_to_cuda(SessionGraph(opt, n_node)) start = time.time() best_result = [0, 0] best_epoch = [0, 0] bad_counter = 0 for epoch in range(opt.epoch): print('-------------------------------------------------------') print('epoch: ', epoch) start_paper = time.time() hit, mrr = train_test(model, train_data, test_data) flag = 0 if hit >= best_result[0]: best_result[0] = hit best_epoch[0] = epoch flag = 1 if mrr >= best_result[1]: best_result[1] = mrr best_epoch[1] = epoch flag = 1 print('Best Result:') result = '\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % ( best_result[0], best_result[1], best_epoch[0], best_epoch[1]) print(result) # file_path = "../logs/"+opt.dataset+'batch'+str(opt.batchSize)+opt.method+str(opt.k)+str(opt.nonhybrid)+opt.method_net_last+opt.method_net_last_n1+".txt" file_path = "../logs/" + opt.dataset + 'batch' + str( opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt" with open(file_path, "a") as f: f.write(str(result)) f.write('\n') bad_counter += 1 - flag if bad_counter >= opt.patience: break end_paper = time.time() time_paper = end_paper - start_paper print("each epoch all time->", end_paper - start_paper) file_path_time = "../logs_time/" + opt.dataset + 'batch' + str( opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt" with open(file_path_time, "a") as f: f.write(str(time_paper)) f.write('\n') file_path_two = "../logs_loss/" + opt.dataset + 'batch' + str( opt.batchSize) + opt.method + str(opt.k) + opt.distance + ".txt" with open(file_path_two, "a") as f: f.write('\n') file_path_three = "../logs_loss/" + opt.dataset + 'batch' + str( opt.batchSize) + opt.method + str( opt.k) + opt.distance + "all" + ".txt" with open(file_path_three, "a") as f: f.write('\n') print('-------------------------------------------------------') end = time.time() print("Run time: %f s" % (end - start))