示例#1
0
def main():

    global args
    args = parser.parse_args()
    if args.log_path == None:
        log_path = ('log/gnn/lg_' + str(args.lg) + '_up_' + str(args.update) +
                    '_bs_' + str(args.batch_size) + '_ep_' +
                    str(args.max_epoch) + '_st_' + str(args.epoch_step) +
                    '_op_' + str(args.optim) + '_lr_' + str(args.lr) + '_da_' +
                    str(args.lrdamping) + '_L_' + str(args.layers) + '_h_' +
                    str(args.nfeatures) + '_ta_' + str(args.task) + '_' +
                    str(time.time())[-3:] + '.pickle')
        args.log_path = log_path
    log.info("Log path : " + log_path)

    # logger
    logger = logs.Logger(args.log_path)
    logger.write_settings(args)
    #print(args.train, args.val, args.test)

    # Check if CUDA is enabled
    if args.cuda == True and torch.cuda.is_available():
        log.info('Working on GPU')
        #torch.cuda.manual_seed(0)

    else:
        log.info('Working on CPU')
        args.cuda = False
        #torch.manual_seed(0)

    # Target stats
    stats_path = '/misc/vlgscratch4/BrunaGroup/sulem/chem/data/tensors/target_stat.pickle'
    with open(stats_path, 'rb') as file:
        M, S, A = pickle.load(file)
    mean = M[args.task].item()
    std = S[args.task].item()
    accuracy = A[args.task].item()

    # load training, validation and test datasets
    if args.train == True:
        with open(args.train_path, 'rb') as file:
            train_set = pickle.load(file)
            Ntrain = len(train_set)
            log.info("Number of training instances : " + str(Ntrain))
            logger.add_info('Training set size : ' + str(Ntrain))

    if args.val == True:
        with open(args.valid_path, 'rb') as file:
            valid_set = pickle.load(file)
            Nvalid = len(valid_set)
            log.info("Number of validation instances : " + str(Nvalid))
            logger.add_info('Validation set size : ' + str(Nvalid))

    if args.test == True:
        with open(args.test_path, 'rb') as file:
            test_set = pickle.load(file)
            Ntest = len(test_set)
            log.info("Number of test instances : " + str(Ntest))
            logger.add_info('Test set size : ' + str(Ntest))

    logger.add_info('Number of features of the inputs : ' +
                    str(args.dim_input))

    # Creates or loads model
    if args.train == False or args.model_path != None:
        gnn = torch.load(args.model_path)
        log.info('Network loaded')
    else:
        if args.lg == False:
            gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers,
                                       args.dim_input, 1, args.J)
            logger.add_model('gnn simple')
        else:
            gnn = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers,
                                   args.dim_input, args.J, 1, args.update)
            logger.add_model('gnn with LG')
        log.info('Network created')

    # Criterion and optimizer
    criterion = nn.MSELoss()

    if args.optim == 'sgd':
        optimizer = torch.optim.SGD(gnn.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum)
    elif args.optim == 'adamax':
        optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr)

    else:
        optimizer = torch.optim.Adam(gnn.parameters(), lr=args.lr)

    if args.cuda == True:
        gnn = gnn.cuda()
        criterion = criterion.cuda()

    # Training

    if args.train == True:
        gnn.train()

        log.info('Training the GNN')
        logger.add_res('Training phase')

        run_loss = utils.RunningAverage()
        run_error = utils.RunningAverage()

        for epoch in range(args.max_epoch):

            t0 = time.time()

            if epoch != 0 and epoch % args.epoch_step == 0:
                args.lr = args.lr * args.lrdamping
                for param_group in optimizer.param_groups:
                    param_group['lr'] = args.lr

            loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task,
                                                   criterion, optimizer,
                                                   args.cuda, args.batch_size,
                                                   mean, std)

            dur = int(time.time() - t0)

            run_loss.update(loss)
            run_error.update(error)

            logger.add_epoch_info(epoch + 1, run_loss.val, run_error.val, dur)
            log.info(
                'Epoch {} : Avg Error {:.3f}; Average Loss {:.3f} Time : {}'.
                format(epoch + 1, run_error.val, run_loss.val, dur))

        training_time = sum(logger.time_epoch)
        ratio = run_error.val / accuracy

        logger.add_train_info(run_loss.val, run_error.val, ratio,
                              training_time)
        log.info(
            'Training finished : Duration {} secs, Avg Loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}'
            .format(training_time, run_loss.val, run_error.val, ratio))

        logger.save_model(gnn)

    # Validating

    if args.val == True:
        log.info('Evaluating on the validation set...')
        logger.add_res('Validation phase')
        val_loss, val_error = test_mnb.test_with_mnb(gnn, valid_set, args.task,
                                                     criterion, args.cuda,
                                                     args.batch_size, mean,
                                                     std, logger)
        ratio_val = val_error / accuracy
        log.info(
            'Validation finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}'
            .format(val_loss, val_error, ratio_val))
        logger.add_test_perf(val_loss, val_error, ratio_val)

        logger.plot_train_logs()
        logger.plot_test_logs()

    # Testing
    if args.test == True:
        log.info('Evaluating on the test set...')
        logger.add_res('Test phase')
        test_loss, test_error = test_mnb.test_with_mnb(gnn, test_set,
                                                       args.task, criterion,
                                                       args.cuda,
                                                       args.batch_size, mean,
                                                       std, logger)
        ratio_test = test_error / accuracy
        log.info(
            'Test finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}'
            .format(test_loss, test_error, ratio_test))
        logger.add_test_perf(test_loss, test_error, ratio_test)

        logger.plot_train_logs()
        #logger.plot_test_logs()

        return test_error, ratio_test
示例#2
0
def main():
    
    global args
    args = parser.parse_args()
    
    # Setting log path
    if args.log_path == None:
        log_path = ('log/qm9/lg_' + str(args.lg) + '_up_' + str(args.update) + '_gru_' + str(args.gru) + '_bs_' 
                    + str(args.batch_size) + '_ep_' + str(args.max_epoch) + '_st_' + str(args.epoch_step)
                    + '_op_' + str(args.optim) + '_lr_' + str(args.lr) + '_da_' + str(args.lrdamping)
                    + '_L_' + str(args.layers) + '_h_' + str(args.nfeatures) + '_ta_' + str(args.task)
                    + '_' + str(time.time())[-3:] + '.pickle'
        )
        args.log_path = log_path
    log.info("Log path : " + log_path)
    
    # Initializing logger
    logger = logs.Logger(args.log_path)
    logger.write_settings(args)
    
    # Check if CUDA is enabled
    if args.cuda== True and torch.cuda.is_available():
        log.info('Working on GPU')
        #torch.cuda.manual_seed(0)
        
    else:
        log.info('Working on CPU')
        args.cuda = False
        #torch.manual_seed(0)
        
    # Loading population statistics for the task
    stats_path = '/misc/vlgscratch4/BrunaGroup/sulem/chem/data/tensors/target_stat.pickle'
    with open(stats_path,'rb') as file :
        M, S, A = pickle.load(file)
    mean = M[args.task].item()
    std = S[args.task].item()
    accuracy = A[args.task].item()
    
    # Loading experiment sets
    logging.info("Loading data...")
    with open(args.data_path,'rb') as file :
            data_set = pickle.load(file)
            
    train_set, valid_set, test_set = loading.prepare_experiment_sets(data_set,
                                                                     args.shuffle)
    if args.train==True:
        Ntrain = len(train_set)   
        log.info("Number of training instances : " + str(Ntrain))
        logger.add_info('Training set size : ' + str(Ntrain))
    
    if args.val==True:
        Nvalid = len(valid_set) 
        log.info("Number of validation instances : " + str(Nvalid))
        logger.add_info('Validation set size : ' + str(Nvalid))
            
    if args.test==True:
        Ntest = len(test_set) 
        log.info("Number of test instances : " + str(Ntest))
        logger.add_info('Test set size : ' + str(Ntest))

    # Creating or loading model
    if args.model_path != None:
        gnn = torch.load(args.model_path)
        log.info('Network loaded')
    else:
        if args.lg == False :     
            gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers,
                                       args.dim_input, 1, args.J, args.gru)
            logger.add_model('gnn simple')
        else:
            gnn = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers,
                                   args.dim_input, args.J, 1, args.update)
            logger.add_model('gnn with LG')
        log.info('Network created')
    
    # Criterion and optimizer
    criterion = nn.MSELoss()
        
    if args.cuda == True :    
        gnn = gnn.cuda()
        criterion = criterion.cuda()

    # Training
    
    if args.train==True:
        gnn.train()
        
        log.info('Training the GNN')
        logger.add_res('Training phase')
        
        run_loss = utils.RunningAverage()
        run_error = utils.RunningAverage()
        
        for epoch in range (args.max_epoch):
            
            t0 = time.time()
            
            optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr)
            
            loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task, criterion,
                                         optimizer, args.cuda, args.batch_size, mean, std)
            
            """
            v_loss, v_error = test_mnb.test_with_mnb(gnn, valid_set, args.task,
                                                     criterion, args.cuda, args.batch_size,
                                                     mean, std, logger)
            
            t_loss, t_error = test_mnb.test_with_mnb(gnn, test_set, args.task,
                                         criterion, args.cuda, args.batch_size,
                                         mean, std, logger)
            """
            dur = int(time.time() - t0)
            
            run_loss.update(loss)
            run_error.update(error)
            
            if epoch != 0 and epoch % args.epoch_step == 0 :
                args.lr = args.lr * args.lrdamping
            
            """"
            logger.add_epoch_logs(epoch+1,run_loss.val, run_error.val, v_loss,
                                  v_error, t_loss, t_error, dur)
            """
            log.info('Epoch {} : Train loss {:.3f} error {:.3f} Time : {}'
              .format(epoch+1, run_error.val, run_loss.val, dur))
            """
            log.info('Validation loss {:.3f} error {:.3f}'
              .format(v_loss, v_error))
            log.info('Test loss {:.3f} error {:.3f}'
              .format(t_loss, t_error))
            """
            
        training_time = sum(logger.time_epoch) // 60
        ratio = run_error.val / accuracy
        
        """
        v_loss = logger.loss_valid[-1]
        v_error = logger.error_valid[-1]
        t_loss = logger.loss_test[-1]
        t_error = logger.error_test[-1]
        """
        
        logger.add_train_info(run_loss.val, run_error.val, ratio, training_time)
        
        """
        logger.add_valid_perf(v_loss, v_error, v_error/accuracy)
        logger.add_test_perf(t_loss, t_error, t_error/accuracy)
        """
        log.info('Training finished : Duration {} minutes, Loss {:.3f}, MAE {:.3f}, Error ratio {:.3f}'
              .format(training_time, run_loss.val, run_error.val, ratio))
        
        """
        log.info('Validation loss {:.3f} error {:.3f}'.format(v_loss, v_error))
        log.info('Test loss {:.3f} error {:.3f}'.format(t_loss, t_error))
        
        logger.plot_loss()
        logger.plot_error()
        """
        
        logger.save_model(gnn)
        
    # Validating
    
    if args.val==True:
        log.info('Evaluating on the validation set...')
        logger.add_res('Validation phase')
        val_loss, val_error = test_mnb.test_with_mnb(gnn, valid_set, args.task,
                                                     criterion, args.cuda, args.batch_size,
                                                     mean, std, logger)
        ratio_val = val_error / accuracy
        log.info('Validation finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}'
                 .format(val_loss, val_error, ratio_val))
        logger.add_test_perf(val_loss, val_error, ratio_val)
        
        logger.plot_train_logs()
        logger.plot_test_logs()    
        
    # Testing
    if args.test==True:
        log.info('Evaluating on the test set...')
        logger.add_res('Test phase')
        test_loss, test_error = test_mnb.test_with_mnb(gnn, test_set, args.task, criterion,
                                                       args.cuda, args.batch_size,
                                                       mean, std, logger)
        ratio_test = test_error / accuracy
        log.info('Test finished : Avg loss {:.3f}, Mean Average Error {:.3f}, Error ratio {:.3f}'
                 .format(test_loss, test_error, ratio_test))
        logger.add_test_perf(test_loss, test_error, ratio_test)
        
        logger.plot_train_logs()
        #logger.plot_test_logs() 
        
        return test_error, ratio_test
示例#3
0
def main():
    
    chem_acc = torch.tensor([0.1, 0.05, 0.043, 0.043, 0.043, 0.043,
                         0.043, 0.1, 10.0, 1.2, 0.043, 0.043, 0.0012])
    
    global args
    args = parser.parse_args()
    
    accuracy = chem_acc[args.task]
    
    # logger
    logger = logs.Logger(args.log_path)
    # Write experiment settings
    logger.write_settings(args)
    logger.add_info('Chemical accuracy for task {} : {:.2f}'.format(args.task, accuracy))
    
    # Check if CUDA is enabled
    if args.cuda== True and torch.cuda.is_available():
        log.info('Working on GPU')
        dtype = torch.cuda.FloatTensor
        torch.cuda.manual_seed(0)
        
    else:
        log.info('Working on CPU')
        args.cuda = False
        dtype = torch.FloatTensor
        torch.manual_seed(0)

    # load training and validation datasets
    with open(args.train_path,'rb') as file :
        train_set = pickle.load(file)
        
    with open(args.valid_path,'rb') as file :
        valid_set = pickle.load(file)
    
    Ntrain = len(train_set)
    Nvalid = len(valid_set)
    logger.add_info('Training set size : ' + str(Ntrain))
    logger.add_info('Validation set size : ' + str(Nvalid))
    
    train_target = torch.zeros(Ntrain)
    valid_target = torch.zeros(Nvalid)
    
    for i in range (Ntrain):
        train_target[i] = train_set[i][2][args.task]
    t_stats = utils.data_stats(train_target)
    mean = t_stats[2]
    std = t_stats[3]
    
    for i in range (Nvalid):
        valid_target[i] = valid_set[i][2][args.task]
    v_stats = utils.data_stats(valid_target)
    
    logger.add_info('Stats on the training set task [min, max, mean, std] : ' + str(t_stats))
    logger.add_info('Stats on the validation set task [min, max, mean, std] : ' + str(v_stats))
    
    dim_input = train_set[0][0].size()[1]

    # Creates 2 GNNs : 1 not using the line graph, 1 using it       
    gnn = model_mnb.GNN_simple(args.task, args.nfeatures, args.layers, dim_input,args.J)
    logger.add_model('gnn simple')
    gnn_lg = model_mnb.GNN_lg(args.task, args.nfeatures, args.layers, dim_input, args.J)
    logger.add_model('gnn with LG')
    log.info('2 networks created')
    
    # Criterion
    criterion = nn.MSELoss()
    # Optimizers
    optimizer = torch.optim.Adamax(gnn.parameters(), lr=args.lr)
    optimizer_lg = torch.optim.Adamax(gnn_lg.parameters(), lr=args.lr)
    
    if args.cuda == True :    
        gnn = gnn.cuda()
        gnn_lg = gnn_lg.cuda()
    
    # Training the 2 models successively
    
    gnn.train()
    log.info('Training the GNN without line graph')
    logger.add_res('Training the GNN without line graph')
    for epoch in range (args.max_epoch):
        
        t0 = time.time()
        
        if epoch != 0 and epoch % args.epoch_step == 0 :
            args.lr = args.lr * args.lrdamping
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr
        
        loss, error = train_mnb.train_with_mnb(gnn, train_set, args.task, criterion,
                                     optimizer, args.cuda, args.batch_size, mean, std)
        
        dur = int(time.time() - t0)

        logger.add_train_loss(epoch+1,loss)
        logger.add_train_error(epoch+1,error)
        logger.add_epoch_time(epoch+1,dur)

        log.info('Epoch {} : Avg Error Ratio {:.3f}; Average Loss {:.3f} Time : {}'
          .format(epoch+1, error, loss, dur))
    
    avg_train_loss = np.mean(np.array(logger.loss_train))
    avg_train_error = np.mean(np.array(logger.error_train))
    training_time = sum(logger.time_epoch)
    MAE = avg_train_error * accuracy
    
    logger.add_res('Average training loss : {:.3f}'.format(avg_train_loss))
    logger.add_res('Average training error : {:.3f}'.format(avg_train_error))
    logger.add_res('Mean Absolute error : {:.3f}'.format(MAE))
    logger.add_res('Training time : {} seconds'.format(training_time))
    
    log.info('Avg Error Ratio {err:.3f}; Mean Average Error {MAE:.3f}'
          .format(err=avg_train_error, MAE=MAE))
    
    
    gnn_lg.train()
    log.info('Training the GNN with the line graph')
    logger.add_res('Training the GNN with the line graph')
    for epoch in range (args.max_epoch):
        
        t0 = time.time()
        
        if epoch != 0 and epoch % args.epoch_step == 0 :
            args.lr = args.lr * args.lrdamping
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr
        
        loss, error = train_with_mnb(gnn_lg, train_set, args.task, criterion,
                                  optimizer_lg, args.cuda, args.batch_size, mean, std)
        
        dur = int(time.time() - t0)

        logger.add_train_loss(epoch+1,loss)
        logger.add_train_error(epoch+1,error)
        logger.add_epoch_time(epoch+1,dur)

        log.info('Epoch {} : Avg Error Ratio {:.3f}; Average Loss {:.3f} Time : {}'
          .format(epoch+1, error, loss, dur))
    
    avg_train_loss_lg = np.mean(np.array(logger.loss_train[args.max_epoch:]))
    avg_train_error_lg = np.mean(np.array(logger.error_train[args.max_epoch:]))
    training_time_lg = sum(logger.time_epoch[args.max_epoch:])
    MAE_lg = avg_train_error_lg * accuracy
    
    logger.add_res('Average training loss : {:.3f}'.format(avg_train_loss_lg))
    logger.add_res('Average training error : {:.3f}'.format(avg_train_error_lg))
    logger.add_res('Mean Absolute error : {:.3f}'.format(MAE_lg))
    logger.add_res('Training time : {} seconds'.format(training_time_lg))
    
    log.info('Avg Error Ratio {err:.3f}; Mean Average Error {MAE:.3f}'
          .format(err=avg_train_error_lg, MAE=MAE_lg))
    
    
    # Testing
    
    loss, error_ratio = test_mnb.test_with_mnb(gnn, valid_set, args.task, args.cuda, args.batch_size, mean, std, logger)
    loss_lg, error_ratio_lg = test_mnb.test_with_mnb(gnn_lg, valid_set, args.task, args.cuda, args.batch_size, mean, std, logger)
    
    logger.write_test_perf()
    
    logger.plot_train_logs()
    logger.plot_test_logs()
    
    MAE_test = error_ratio * accuracy
    MAE_test_lg = error_ratio_lg * accuracy
    
    log.info('Error ratio of the simple gnn : {:.5f} of the gnn using line graph : {:.5f} '
          .format(error_ratio, error_ratio_lg))
    
    return MAE_test, MAE_test_lg