def main(): global args args = parser.parse_args() if args.log_path == None: log_path = ('log/gnn/lg_' + str(args.lg) + '_up_' + str(args.update) + '_bs_' + str(args.batch_size) + '_ep_' + str(args.max_epoch) + '_st_' + str(args.epoch_step) + '_op_' + str(args.optim) + '_lr_' + str(args.lr) + '_da_' + str(args.lrdamping) + '_L_' + str(args.layers) + '_h_' + str(args.nfeatures) + '_ta_' + str(args.task) + '_' + str(time.time())[-3:] + '.pickle') args.log_path = log_path log.info("Log path : " + log_path) # logger logger = logs.Logger(args.log_path) logger.write_settings(args) #print(args.train, args.val, args.test) # Check if CUDA is enabled if args.cuda == True and torch.cuda.is_available(): log.info('Working on GPU') #torch.cuda.manual_seed(0) else: log.info('Working on CPU') args.cuda = False #torch.manual_seed(0) # Target stats stats_path = '/misc/vlgscratch4/BrunaGroup/sulem/chem/data/tensors/target_stat.pickle' with open(stats_path, 'rb') as file: M, S, A = pickle.load(file) mean = M[args.task].item() std = S[args.task].item() accuracy = A[args.task].item() # load training, validation and test datasets if args.train == True: with open(args.train_path, 'rb') as file: train_set = pickle.load(file) Ntrain = len(train_set) log.info("Number of training instances : " + str(Ntrain)) logger.add_info('Training set size : ' + str(Ntrain)) if args.val == True: with open(args.valid_path, 'rb') as file: valid_set = pickle.load(file) Nvalid = len(valid_set) log.info("Number of validation instances : " + str(Nvalid)) logger.add_info('Validation set size : ' + str(Nvalid)) if args.test == True: with open(args.test_path, 'rb') as file: test_set = pickle.load(file) Ntest = len(test_set) log.info("Number of test instances : " + str(Ntest)) logger.add_info('Test set size : ' + str(Ntest)) logger.add_info('Number of features of the inputs : ' + str(args.dim_input)) # Creates or loads model if args.train == False or args.model_path != None: gnn = torch.load(args.model_path) log.info('Network loaded') else: if args.lg == False: gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers, args.dim_input, 1, args.J) logger.add_model('gnn simple') else: gnn = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers, args.dim_input, args.J, 1, args.update) logger.add_model('gnn with LG') log.info('Network created') # Criterion and optimizer criterion = nn.MSELoss() if args.optim == 'sgd': optimizer = torch.optim.SGD(gnn.parameters(), lr=args.lr, momentum=args.momentum) elif args.optim == 'adamax': optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr) else: optimizer = torch.optim.Adam(gnn.parameters(), lr=args.lr) if args.cuda == True: gnn = gnn.cuda() criterion = criterion.cuda() # Training if args.train == True: gnn.train() log.info('Training the GNN') logger.add_res('Training phase') run_loss = utils.RunningAverage() run_error = utils.RunningAverage() for epoch in range(args.max_epoch): t0 = time.time() if epoch != 0 and epoch % args.epoch_step == 0: args.lr = args.lr * args.lrdamping for param_group in optimizer.param_groups: param_group['lr'] = args.lr loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task, criterion, optimizer, args.cuda, args.batch_size, mean, std) dur = int(time.time() - t0) run_loss.update(loss) run_error.update(error) logger.add_epoch_info(epoch + 1, run_loss.val, run_error.val, dur) log.info( 'Epoch {} : Avg Error {:.3f}; Average Loss {:.3f} Time : {}'. format(epoch + 1, run_error.val, run_loss.val, dur)) training_time = sum(logger.time_epoch) ratio = run_error.val / accuracy logger.add_train_info(run_loss.val, run_error.val, ratio, training_time) log.info( 'Training finished : Duration {} secs, Avg Loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}' .format(training_time, run_loss.val, run_error.val, ratio)) logger.save_model(gnn) # Validating if args.val == True: log.info('Evaluating on the validation set...') logger.add_res('Validation phase') val_loss, val_error = test_mnb.test_with_mnb(gnn, valid_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) ratio_val = val_error / accuracy log.info( 'Validation finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}' .format(val_loss, val_error, ratio_val)) logger.add_test_perf(val_loss, val_error, ratio_val) logger.plot_train_logs() logger.plot_test_logs() # Testing if args.test == True: log.info('Evaluating on the test set...') logger.add_res('Test phase') test_loss, test_error = test_mnb.test_with_mnb(gnn, test_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) ratio_test = test_error / accuracy log.info( 'Test finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}' .format(test_loss, test_error, ratio_test)) logger.add_test_perf(test_loss, test_error, ratio_test) logger.plot_train_logs() #logger.plot_test_logs() return test_error, ratio_test
def main(): global args args = parser.parse_args() # Setting log path if args.log_path == None: log_path = ('log/qm9/lg_' + str(args.lg) + '_up_' + str(args.update) + '_gru_' + str(args.gru) + '_bs_' + str(args.batch_size) + '_ep_' + str(args.max_epoch) + '_st_' + str(args.epoch_step) + '_op_' + str(args.optim) + '_lr_' + str(args.lr) + '_da_' + str(args.lrdamping) + '_L_' + str(args.layers) + '_h_' + str(args.nfeatures) + '_ta_' + str(args.task) + '_' + str(time.time())[-3:] + '.pickle' ) args.log_path = log_path log.info("Log path : " + log_path) # Initializing logger logger = logs.Logger(args.log_path) logger.write_settings(args) # Check if CUDA is enabled if args.cuda== True and torch.cuda.is_available(): log.info('Working on GPU') #torch.cuda.manual_seed(0) else: log.info('Working on CPU') args.cuda = False #torch.manual_seed(0) # Loading population statistics for the task stats_path = '/misc/vlgscratch4/BrunaGroup/sulem/chem/data/tensors/target_stat.pickle' with open(stats_path,'rb') as file : M, S, A = pickle.load(file) mean = M[args.task].item() std = S[args.task].item() accuracy = A[args.task].item() # Loading experiment sets logging.info("Loading data...") with open(args.data_path,'rb') as file : data_set = pickle.load(file) train_set, valid_set, test_set = loading.prepare_experiment_sets(data_set, args.shuffle) if args.train==True: Ntrain = len(train_set) log.info("Number of training instances : " + str(Ntrain)) logger.add_info('Training set size : ' + str(Ntrain)) if args.val==True: Nvalid = len(valid_set) log.info("Number of validation instances : " + str(Nvalid)) logger.add_info('Validation set size : ' + str(Nvalid)) if args.test==True: Ntest = len(test_set) log.info("Number of test instances : " + str(Ntest)) logger.add_info('Test set size : ' + str(Ntest)) # Creating or loading model if args.model_path != None: gnn = torch.load(args.model_path) log.info('Network loaded') else: if args.lg == False : gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers, args.dim_input, 1, args.J, args.gru) logger.add_model('gnn simple') else: gnn = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers, args.dim_input, args.J, 1, args.update) logger.add_model('gnn with LG') log.info('Network created') # Criterion and optimizer criterion = nn.MSELoss() if args.cuda == True : gnn = gnn.cuda() criterion = criterion.cuda() # Training if args.train==True: gnn.train() log.info('Training the GNN') logger.add_res('Training phase') run_loss = utils.RunningAverage() run_error = utils.RunningAverage() for epoch in range (args.max_epoch): t0 = time.time() optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr) loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task, criterion, optimizer, args.cuda, args.batch_size, mean, std) """ v_loss, v_error = test_mnb.test_with_mnb(gnn, valid_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) t_loss, t_error = test_mnb.test_with_mnb(gnn, test_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) """ dur = int(time.time() - t0) run_loss.update(loss) run_error.update(error) if epoch != 0 and epoch % args.epoch_step == 0 : args.lr = args.lr * args.lrdamping """" logger.add_epoch_logs(epoch+1,run_loss.val, run_error.val, v_loss, v_error, t_loss, t_error, dur) """ log.info('Epoch {} : Train loss {:.3f} error {:.3f} Time : {}' .format(epoch+1, run_error.val, run_loss.val, dur)) """ log.info('Validation loss {:.3f} error {:.3f}' .format(v_loss, v_error)) log.info('Test loss {:.3f} error {:.3f}' .format(t_loss, t_error)) """ training_time = sum(logger.time_epoch) // 60 ratio = run_error.val / accuracy """ v_loss = logger.loss_valid[-1] v_error = logger.error_valid[-1] t_loss = logger.loss_test[-1] t_error = logger.error_test[-1] """ logger.add_train_info(run_loss.val, run_error.val, ratio, training_time) """ logger.add_valid_perf(v_loss, v_error, v_error/accuracy) logger.add_test_perf(t_loss, t_error, t_error/accuracy) """ log.info('Training finished : Duration {} minutes, Loss {:.3f}, MAE {:.3f}, Error ratio {:.3f}' .format(training_time, run_loss.val, run_error.val, ratio)) """ log.info('Validation loss {:.3f} error {:.3f}'.format(v_loss, v_error)) log.info('Test loss {:.3f} error {:.3f}'.format(t_loss, t_error)) logger.plot_loss() logger.plot_error() """ logger.save_model(gnn) # Validating if args.val==True: log.info('Evaluating on the validation set...') logger.add_res('Validation phase') val_loss, val_error = test_mnb.test_with_mnb(gnn, valid_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) ratio_val = val_error / accuracy log.info('Validation finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}' .format(val_loss, val_error, ratio_val)) logger.add_test_perf(val_loss, val_error, ratio_val) logger.plot_train_logs() logger.plot_test_logs() # Testing if args.test==True: log.info('Evaluating on the test set...') logger.add_res('Test phase') test_loss, test_error = test_mnb.test_with_mnb(gnn, test_set, args.task, criterion, args.cuda, args.batch_size, mean, std, logger) ratio_test = test_error / accuracy log.info('Test finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}' .format(test_loss, test_error, ratio_test)) logger.add_test_perf(test_loss, test_error, ratio_test) logger.plot_train_logs() #logger.plot_test_logs() return test_error, ratio_test
def main(): chem_acc = torch.tensor([0.1, 0.05, 0.043, 0.043, 0.043, 0.043, 0.043, 0.1, 10.0, 1.2, 0.043, 0.043, 0.0012]) global args args = parser.parse_args() accuracy = chem_acc[args.task] # logger logger = logs.Logger(args.log_path) # Write experiment settings logger.write_settings(args) logger.add_info('Chemical accuracy for task {} : {:.2f}'.format(args.task, accuracy)) # Check if CUDA is enabled if args.cuda== True and torch.cuda.is_available(): log.info('Working on GPU') dtype = torch.cuda.FloatTensor torch.cuda.manual_seed(0) else: log.info('Working on CPU') args.cuda = False dtype = torch.FloatTensor torch.manual_seed(0) # load training and validation datasets with open(args.train_path,'rb') as file : train_set = pickle.load(file) with open(args.valid_path,'rb') as file : valid_set = pickle.load(file) Ntrain = len(train_set) Nvalid = len(valid_set) logger.add_info('Training set size : ' + str(Ntrain)) logger.add_info('Validation set size : ' + str(Nvalid)) train_target = torch.zeros(Ntrain) valid_target = torch.zeros(Nvalid) for i in range (Ntrain): train_target[i] = train_set[i][2][args.task] t_stats = utils.data_stats(train_target) mean = t_stats[2] std = t_stats[3] for i in range (Nvalid): valid_target[i] = valid_set[i][2][args.task] v_stats = utils.data_stats(valid_target) logger.add_info('Stats on the training set task [min, max, mean, std] : ' + str(t_stats)) logger.add_info('Stats on the validation set task [min, max, mean, std] : ' + str(v_stats)) dim_input = train_set[0][0].size()[1] # Creates 2 GNNs : 1 not using the line graph, 1 using it gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers, dim_input,args.J) logger.add_model('gnn simple') gnn_lg = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers, dim_input, args.J) logger.add_model('gnn with LG') log.info('2 networks created') # Criterion criterion = nn.MSELoss() # Optimizers optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr) optimizer_lg = torch.optim.Adamax(gnn_lg.parameters(), lr=args.lr) if args.cuda == True : gnn = gnn.cuda() gnn_lg = gnn_lg.cuda() # Training the 2 models successively gnn.train() log.info('Training the GNN without line graph') logger.add_res('Training the GNN without line graph') for epoch in range (args.max_epoch): t0 = time.time() if epoch != 0 and epoch % args.epoch_step == 0 : args.lr = args.lr * args.lrdamping for param_group in optimizer.param_groups: param_group['lr'] = args.lr loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task, criterion, optimizer, args.cuda, args.batch_size, mean, std) dur = int(time.time() - t0) logger.add_train_loss(epoch+1,loss) logger.add_train_error(epoch+1,error) logger.add_epoch_time(epoch+1,dur) log.info('Epoch {} : Avg Error Ratio {:.3f}; Average Loss {:.3f} Time : {}' .format(epoch+1, error, loss, dur)) avg_train_loss = np.mean(np.array(logger.loss_train)) avg_train_error = np.mean(np.array(logger.error_train)) training_time = sum(logger.time_epoch) MAE = avg_train_error * accuracy logger.add_res('Average training loss : {:.3f}'.format(avg_train_loss)) logger.add_res('Average training error : {:.3f}'.format(avg_train_error)) logger.add_res('Mean Absolute error : {:.3f}'.format(MAE)) logger.add_res('Training time : {} seconds'.format(training_time)) log.info('Avg Error Ratio {err:.3f}; Mean Average Error {MAE:.3f}' .format(err=avg_train_error, MAE=MAE)) gnn_lg.train() log.info('Training the GNN with the line graph') logger.add_res('Training the GNN with the line graph') for epoch in range (args.max_epoch): t0 = time.time() if epoch != 0 and epoch % args.epoch_step == 0 : args.lr = args.lr * args.lrdamping for param_group in optimizer.param_groups: param_group['lr'] = args.lr loss, error = train_with_mnb(gnn_lg, train_set, args.task, criterion, optimizer_lg, args.cuda, args.batch_size, mean, std) dur = int(time.time() - t0) logger.add_train_loss(epoch+1,loss) logger.add_train_error(epoch+1,error) logger.add_epoch_time(epoch+1,dur) log.info('Epoch {} : Avg Error Ratio {:.3f}; Average Loss {:.3f} Time : {}' .format(epoch+1, error, loss, dur)) avg_train_loss_lg = np.mean(np.array(logger.loss_train[args.max_epoch:])) avg_train_error_lg = np.mean(np.array(logger.error_train[args.max_epoch:])) training_time_lg = sum(logger.time_epoch[args.max_epoch:]) MAE_lg = avg_train_error_lg * accuracy logger.add_res('Average training loss : {:.3f}'.format(avg_train_loss_lg)) logger.add_res('Average training error : {:.3f}'.format(avg_train_error_lg)) logger.add_res('Mean Absolute error : {:.3f}'.format(MAE_lg)) logger.add_res('Training time : {} seconds'.format(training_time_lg)) log.info('Avg Error Ratio {err:.3f}; Mean Average Error {MAE:.3f}' .format(err=avg_train_error_lg, MAE=MAE_lg)) # Testing loss, error_ratio = test_mnb.test_with_mnb(gnn, valid_set, args.task, args.cuda, args.batch_size, mean, std, logger) loss_lg, error_ratio_lg = test_mnb.test_with_mnb(gnn_lg, valid_set, args.task, args.cuda, args.batch_size, mean, std, logger) logger.write_test_perf() logger.plot_train_logs() logger.plot_test_logs() MAE_test = error_ratio * accuracy MAE_test_lg = error_ratio_lg * accuracy log.info('Error ratio of the simple gnn : {:.5f} of the gnn using line graph : {:.5f} ' .format(error_ratio, error_ratio_lg)) return MAE_test, MAE_test_lg