Exemplo n.º 1
0
def main(args):

    best_er1 = 0

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')
    files = [
        f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))
    ]

    idx = np.random.permutation(len(files))
    idx = idx.tolist()

    valid_ids = [files[i] for i in idx[0:10000]]
    test_ids = [files[i] for i in idx[10000:20000]]
    train_ids = [files[i] for i in idx[20000:]]

    data_train = utils.Qm9(root,
                           train_ids,
                           edge_transform=datasets.qm9_edges,
                           e_representation=args.edge_rep)
    data_valid = utils.Qm9(root,
                           valid_ids,
                           edge_transform=datasets.qm9_edges,
                           e_representation=args.edge_rep)
    data_test = utils.Qm9(root,
                          test_ids,
                          edge_transform=datasets.qm9_edges,
                          e_representation=args.edge_rep)

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    print('\tStatistics')
    stat_dict = datasets.get_graph_stats(data_valid,
                                         ['target_mean', 'target_std'])

    data_train.set_target_transform(lambda x: datasets.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_valid.set_target_transform(lambda x: datasets.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))
    data_test.set_target_transform(lambda x: datasets.normalize_data(
        x, stat_dict['target_mean'], stat_dict['target_std']))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(data_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               collate_fn=datasets.collate_g,
                                               num_workers=args.prefetch,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(data_valid,
                                               batch_size=args.batch_size,
                                               collate_fn=datasets.collate_g,
                                               num_workers=args.prefetch,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(data_test,
                                              batch_size=args.batch_size,
                                              collate_fn=datasets.collate_g,
                                              num_workers=args.prefetch,
                                              pin_memory=True)

    print('\tCreate model')
    in_n = [len(h_t[0]), len(list(e.values())[0])]
    hidden_state_size = 73
    message_size = 73
    n_layers = 3
    l_target = len(l)
    type = 'regression'
    if args.model == 'MPNNv2':
        model = MPNNv2(in_n, [5, 15, 15], [10, 20, 20], l_target, type=type)
    elif args.model == 'MPNNv3':
        model = MPNNv3([1, 2, 3, 4],
                       in_n, [5, 15, 15],
                       30,
                       l_target,
                       type=type)
    else:
        model = MPNN(in_n,
                     hidden_state_size,
                     message_size,
                     n_layers,
                     l_target,
                     type=type)
    del in_n, hidden_state_size, message_size, n_layers, l_target, type

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.MSELoss()

    evaluation = lambda output, target: torch.mean(
        torch.abs(output - target) / torch.abs(target))

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[
                0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        er1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = er1 > best_er1
        best_er1 = min(er1, best_er1)
        datasets.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_er1': best_er1,
                'optimizer': optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.cuda:
                model.cuda()
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
Exemplo n.º 2
0
def main():

    global args, best_acc1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')

    train_classes, train_ids = read_cxl(os.path.join(root, 'data/train.cxl'))
    test_classes, test_ids = read_cxl(os.path.join(root, 'data/test.cxl'))
    valid_classes, valid_ids = read_cxl(os.path.join(root, 'data/valid.cxl'))

    num_classes = len(list(set(train_classes + test_classes)))

    data_train = datasets.GREC(root, train_ids, train_classes)
    data_valid = datasets.GREC(root, valid_ids, valid_classes)
    data_test = datasets.GREC(root, test_ids, test_classes)
    
    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    # Data Loader
    train_loader = torch.utils.data.DataLoader(data_train,
                                               batch_size=args.batch_size, shuffle=True, collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(data_valid,
                                               batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(data_test,
                                              batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                              num_workers=args.prefetch, pin_memory=True)

    print('\tCreate model')
    model = MPNN([len(h_t[0]), len(list(e.values())[0])], 25, 15, 2, num_classes, type='classification')

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.NLLLoss()

    evaluation = utils.accuracy

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr-args.lr*args.lr_decay)/(args.epochs*args.schedule[1] - args.epochs*args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {}; accuracy {})".format(best_model_file, checkpoint['epoch'],
                                                                             best_acc1))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation, logger)

        # evaluate on test set
        acc1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)
        utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc1': best_acc1,
                               'optimizer': optimizer.state_dict(), }, is_best=is_best, directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {}; accuracy {})".format(best_model_file, checkpoint['epoch'],
                                                                             best_acc1))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
Exemplo n.º 3
0
def main(args):
    best_er1 = 0

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    files = []
    train_ids = []
    test_ids = []

    if os.path.isfile('file_list.txt'):
        print("File exists")
    else:
        print("File does not exist")
        print("Prepare files")
        files = [
            f for f in os.listdir(root)
            if os.path.isfile(os.path.join(root, f))
        ]

        idx = np.random.permutation(len(files))
        idx = idx.tolist()
        files = [files[i] for i in idx[:]]

        with open('file_list.txt', "w") as myfile:
            for f in files:
                myfile.write("%s\n" % f)

    file2 = open("file_list.txt")
    files = [line[:-1] for line in file2]
    file2.close()

    chunk = int(len(files) / 10)

    train_ids = []
    test_ids = []

    print(len(files))
    for i in range(10):
        if i == int(args.fold):
            test_ids = files[i * chunk:i * chunk + chunk]
            print("test: " + str(i * chunk) + ":" + str(i * chunk + chunk))
            continue

        train_ids += files[i * chunk:i * chunk + chunk]
        print("train: " + str(i * chunk) + ":" + str(i * chunk + chunk))

    all_data = utils.Qm9(
        root,
        files,
        edge_transform=datasets.qm9_edges,
        e_representation="raw_distance",
    )

    data_train = utils.Qm9(
        root,
        train_ids,
        edge_transform=datasets.qm9_edges,
        e_representation="raw_distance",
    )

    data_test = utils.Qm9(
        root,
        test_ids,
        edge_transform=datasets.qm9_edges,
        e_representation="raw_distance",
    )

    # Define model and optimizer
    print("Define model")
    # Select one graph
    g_tuple, l = data_train[1]
    g, h_t, e = g_tuple

    print("\tStatistics")
    stat_dict = datasets.get_graph_stats(all_data,
                                         ["target_mean", "target_std"])

    # Identify atoms in all files

    data_train.set_target_transform(lambda x: datasets.normalize_data(
        x, stat_dict["target_mean"], stat_dict["target_std"]))

    data_test.set_target_transform(lambda x: datasets.normalize_data(
        x, stat_dict["target_mean"], stat_dict["target_std"]))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(
        data_train,
        batch_size=args.batch_size,
        shuffle=True,
        collate_fn=datasets.collate_g,
        num_workers=args.prefetch,
        pin_memory=True,
    )

    test_loader = torch.utils.data.DataLoader(
        data_test,
        batch_size=args.batch_size,
        collate_fn=datasets.collate_g,
        num_workers=args.prefetch,
        pin_memory=True,
    )

    print("\tCreate model")
    in_n = [len(h_t[0]), len(list(e.values())[0])]
    hidden_state_size = 73
    message_size = 73
    n_layers = 3
    l_target = len(l)
    type = "regression"
    if args.model == "MPNNv2":
        model = MPNNv2(in_n, [5, 15, 15], [10, 20, 20], l_target, type=type)
    elif args.model == "MPNNv3":
        model = MPNNv3([1, 2, 3, 4],
                       in_n, [5, 15, 15],
                       30,
                       l_target,
                       type=type)
    elif args.model == "LSTM":
        model = LSTM(in_n,
                     hidden_state_size,
                     message_size,
                     n_layers,
                     l_target,
                     type=type)
    else:
        model = MPNN(in_n,
                     hidden_state_size,
                     message_size,
                     n_layers,
                     l_target,
                     type=type)

    del in_n, hidden_state_size, message_size, n_layers, l_target, type

    print("Optimizer")
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.MSELoss()

    evaluation = lambda output, target: torch.mean(
        torch.abs(output - target) / torch.abs(target))

    print("Logger")
    logger = Logger(args.logPath)

    lr_step = (args.lr - args.lr * args.lr_decay) / (
        args.epochs * args.schedule[1] - args.epochs * args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, "model_best.pth")
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint["epoch"]
            best_acc1 = checkpoint["best_er1"]
            model.load_state_dict(checkpoint["state_dict"])
            optimizer.load_state_dict(checkpoint["optimizer"])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint["epoch"]))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print("Check cuda")
    if args.cuda:
        print("\t* Cuda")
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if (epoch > args.epochs * args.schedule[0]
                and epoch < args.epochs * args.schedule[1]):
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group["lr"] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation,
              logger)

        # evaluate on test set
        #er1 = validate(valid_loader, model, criterion, evaluation, logger)

        #is_best = er1 > best_er1
        #best_er1 = min(er1, best_er1)

        is_best = True
        best_er1 = 1

        datasets.save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": model.state_dict(),
                "best_er1": best_er1,
                "optimizer": optimizer.state_dict(),
            },
            is_best=is_best,
            directory=args.resume,
        )

        # Logger step
        logger.log_value("learning_rate", args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, "model_best.pth")
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint["epoch"]
            best_acc1 = checkpoint["best_er1"]
            model.load_state_dict(checkpoint["state_dict"])
            if args.cuda:
                model.cuda()
            optimizer.load_state_dict(checkpoint["optimizer"])
            print("=> loaded best model '{}' (epoch {})".format(
                best_model_file, checkpoint["epoch"]))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)
Exemplo n.º 4
0
def main():

    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Load data
    root = args.datasetPath

    print('Prepare files')
    files = [f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))]

    idx = np.random.permutation(len(files))
    idx = idx.tolist()

    valid_ids = [files[i] for i in idx[0:10000]]
    test_ids = [files[i] for i in idx[10000:20000]]
    train_ids = [files[i] for i in idx[20000:]]

    if args.labels == 'all':
        args.labels = label_names

    data_train = datasets.Qm9(root, train_ids,
            edge_transform=utils.qm9_edges, e_representation='raw_distance',
            labels=args.labels)
    data_valid = datasets.Qm9(root, valid_ids,
            edge_transform=utils.qm9_edges, e_representation='raw_distance',
            labels=args.labels)

    data_test = datasets.Qm9(root, test_ids,
            edge_transform=utils.qm9_edges, e_representation='raw_distance',
            labels=args.labels)

    if len(args.aichemy_path):
        aichemy_files = [f for f in os.listdir(args.aichemy_path) \
                if os.path.isfile(os.path.join(args.aichemy_path, f)) and f.find("swp") == -1]
        aichemy_data = datasets.AIChemy(args.aichemy_path, aichemy_files,
            edge_transform=utils.qm9_edges, e_representation='raw_distance',
            labels=args.labels)

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    print('\tStatistics')
    #stat_dict = datasets.utils.get_graph_stats(data_valid, ['target_mean', 'target_std'])
    stat_dict = datasets.utils.get_graph_stats(data_train, ['target_mean', 'target_std'])
    print(stat_dict)

    data_train.set_target_transform(lambda x: datasets.utils.normalize_data(x,stat_dict['target_mean'],
                                                                            stat_dict['target_std']))
    data_valid.set_target_transform(lambda x: datasets.utils.normalize_data(x, stat_dict['target_mean'],
                                                                            stat_dict['target_std']))
    data_test.set_target_transform(lambda x: datasets.utils.normalize_data(x, stat_dict['target_mean'],
                                                                           stat_dict['target_std']))

    if len(args.aichemy_path):
        args.epochs = 0
        aichemy_data.set_target_transform(lambda x: datasets.utils.normalize_data(x,stat_dict['target_mean'],
                                                                           stat_dict['target_std']))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(data_train,
                                               batch_size=args.batch_size, shuffle=True,
                                               collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(data_valid,
                                               batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(data_test,
                                              batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                              num_workers=args.prefetch, pin_memory=True)

    if len(args.aichemy_path):
        aichemy_loader = torch.utils.data.DataLoader(aichemy_data,
                                              batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                              num_workers=args.prefetch, pin_memory=True)
    print('\tCreate model')
    in_n = [len(h_t[0]), len(list(e.values())[0])]
    hidden_state_size = 73
    hidden_state_size = 128
    message_size = 73
    message_size = 128
    n_layers = 3
    #n_layers = 6
    l_target = len(l)
    print("in_n=%d, %d, hidden_state_size=%d, message_size=%d, n_layers=%d, l_target=%d" %\
            (in_n[0], in_n[1], hidden_state_size, message_size, n_layers, l_target))
    type ='regression'
    model = MPNN(
            in_n,
            hidden_state_size=args.node_dim,
            message_size=args.node_dim,
            n_layers=args.edge_num_layers,
            l_target=l_target, type=type,
            edge_hidden_dim=args.edge_hidden_dim,
            set2set_comps=args.set2set_comps,
            hidden_dim=args.hidden_dim
            )
    del in_n, hidden_state_size, message_size, n_layers, l_target, type


    criterion = nn.MSELoss()

    #evaluation = lambda output, target: torch.mean(torch.abs(output - target) / torch.abs(target))
    evaluation = lambda output, target: torch.mean(torch.abs(output - target))

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr-args.lr*args.lr_decay)/(args.epochs*args.schedule[1] - args.epochs*args.schedule[0]) \
            if args.epochs > 0 else args.lr

    # get the best checkpoint if available without training
    if len(args.resume):
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
#            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation, logger)

        # evaluate on test set
        er1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = er1 <  best_er1
        best_er1 = min(er1, best_er1)
        utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_er1': best_er1,
                               'optimizer': optimizer.state_dict(), }, is_best=is_best, directory=args.resume)

        validate(test_loader, model, criterion, evaluation)
        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume and args.epochs:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.cuda:
                model.cuda()
#            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    er_test = validate(test_loader, model, criterion, evaluation)
    er_aichemy = validate(aichemy_loader, model, criterion, evaluation)
    print("test:", er_test * stat_dict['target_std'], "aichemy_test", er_aichemy * stat_dict['target_std'])
Exemplo n.º 5
0
def main():

    global args, best_er1
    args = parser.parse_args()

    # Check if CUDA is enabled
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Load data
    root = args.datasetPath

    print('Prepare files')
    files = [f for f in os.listdir(root) if os.path.isfile(os.path.join(root, f))]

    idx = np.random.permutation(len(files))
    idx = idx.tolist()

    valid_ids = [files[i] for i in idx[0:10000]]
    test_ids = [files[i] for i in idx[10000:20000]]
    train_ids = [files[i] for i in idx[20000:]]

    data_train = datasets.Qm9(root, train_ids)
    data_valid = datasets.Qm9(root, valid_ids)
    data_test = datasets.Qm9(root, test_ids)

    # Define model and optimizer
    print('Define model')
    # Select one graph
    g_tuple, l = data_train[0]
    g, h_t, e = g_tuple

    print('\tStatistics')
    # stat_dict = datasets.utils.get_graph_stats(data_valid, ['degrees', 'target_mean', 'target_std', 'edge_labels'])

    stat_dict = {}

    stat_dict['degrees'] = [1, 2, 3, 4]
    stat_dict['target_mean'] = np.array([2.71802732e+00,   7.51685080e+01,  -2.40259300e-01,   1.09503300e-02,
                                         2.51209430e-01,   1.18997445e+03,   1.48493130e-01,  -4.11609491e+02,
                                        -4.11601022e+02,  -4.11600078e+02,  -4.11642909e+02,   3.15894998e+01])
    stat_dict['target_std'] = np.array([1.58422291e+00,   8.29443552e+00,   2.23854977e-02,   4.71030547e-02,
                                        4.77156393e-02,   2.80754665e+02,   3.37238236e-02,   3.97717205e+01,
                                        3.97715029e+01,   3.97715029e+01,   3.97722334e+01,   4.09458852e+00])
    stat_dict['edge_labels'] = [1, 2, 3, 4]

    data_train.set_target_transform(lambda x: datasets.utils.normalize_data(x,stat_dict['target_mean'],
                                                                            stat_dict['target_std']))
    data_valid.set_target_transform(lambda x: datasets.utils.normalize_data(x, stat_dict['target_mean'],
                                                                            stat_dict['target_std']))
    data_test.set_target_transform(lambda x: datasets.utils.normalize_data(x, stat_dict['target_mean'],
                                                                           stat_dict['target_std']))

    # Data Loader
    train_loader = torch.utils.data.DataLoader(data_train,
                                               batch_size=args.batch_size, shuffle=True,
                                               collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(data_valid,
                                               batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                               num_workers=args.prefetch, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(data_test,
                                              batch_size=args.batch_size, collate_fn=datasets.utils.collate_g,
                                              num_workers=args.prefetch, pin_memory=True)

    print('\tCreate model')
    model = MPNN([len(h_t[0]), len(list(e.values())[0])], 73, 15, 2, len(l), type='regression')

    print('Optimizer')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    criterion = nn.MSELoss()
    # evaluation = nn.L1Loss()
    evaluation = lambda output, target: torch.mean(torch.abs(output - target) / torch.abs(target))

    print('Logger')
    logger = Logger(args.logPath)

    lr_step = (args.lr-args.lr*args.lr_decay)/(args.epochs*args.schedule[1] - args.epochs*args.schedule[0])

    # get the best checkpoint if available without training
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.cuda:
                model.cuda()
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    print('Check cuda')
    if args.cuda:
        print('\t* Cuda')
        model = model.cuda()
        criterion = criterion.cuda()
    

    # Epoch for loop
    for epoch in range(0, args.epochs):

        if epoch > args.epochs * args.schedule[0] and epoch < args.epochs * args.schedule[1]:
            args.lr -= lr_step
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, evaluation, logger)

        # evaluate on test set
        er1 = validate(valid_loader, model, criterion, evaluation, logger)

        is_best = er1 > best_er1
        best_er1 = min(er1, best_er1)
        utils.save_checkpoint({'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_er1': best_er1,
                               'optimizer': optimizer.state_dict(), }, is_best=is_best, directory=args.resume)

        # Logger step
        logger.log_value('learning_rate', args.lr).step()

    # get the best checkpoint and test it with test set
    if args.resume:
        checkpoint_dir = args.resume
        best_model_file = os.path.join(checkpoint_dir, 'model_best.pth')
        if not os.path.isdir(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        if os.path.isfile(best_model_file):
            print("=> loading best model '{}'".format(best_model_file))
            checkpoint = torch.load(best_model_file)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_er1']
            model.load_state_dict(checkpoint['state_dict'])
            if args.cuda:
                model.cuda()
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded best model '{}' (epoch {})".format(best_model_file, checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_file))

    # For testing
    validate(test_loader, model, criterion, evaluation)