# Optional argument parser.add_argument( '--root', nargs=1, help='Specify the data directory.', default=['/home/adutta/Workspace/Datasets/STDGraphs/Letter']) parser.add_argument('--subset', nargs=1, help='Specify the sub dataset.', default=['LOW']) args = parser.parse_args() root = args.root[0] subset = args.subset[0] train_classes, train_ids = read_cxl(os.path.join(root, subset, 'train.cxl')) test_classes, test_ids = read_cxl(os.path.join(root, subset, 'test.cxl')) valid_classes, valid_ids = read_cxl( os.path.join(root, subset, 'validation.cxl')) num_classes = len(list(set(train_classes + valid_classes + test_classes))) data_train = LETTER(root, subset, train_ids, train_classes, num_classes) data_valid = LETTER(root, subset, valid_ids, valid_classes, num_classes) data_test = LETTER(root, subset, test_ids, test_classes, num_classes) print(len(data_train)) print(len(data_valid)) print(len(data_test)) for i in range(len(train_ids)):
def main(): global args, best_acc1 args = parser.parse_args() # Check if CUDA is enabled args.cuda = not args.no_cuda and torch.cuda.is_available() # Load data root = args.datasetPath subset = args.subSet print('Prepare files') train_classes, train_ids = read_cxl(os.path.join(root, subset, 'train.cxl')) test_classes, test_ids = read_cxl(os.path.join(root, subset, 'test.cxl')) valid_classes, valid_ids = read_cxl( os.path.join(root, subset, 'validation.cxl')) class_list = list(set(train_classes + test_classes)) num_classes = len(class_list) data_train = datasets.LETTER(root, subset, train_ids, train_classes, class_list) data_valid = datasets.LETTER(root, subset, valid_ids, valid_classes, class_list) data_test = datasets.LETTER(root, subset, test_ids, test_classes, class_list) # Define model and optimizer print('Define model') # Select one graph g_tuple, l = data_train[0] g, h_t, e = g_tuple #TODO: Need attention print('\tStatistics') stat_dict = {} # stat_dict = datasets.utils.get_graph_stats(data_train, ['edge_labels']) stat_dict['edge_labels'] = [1] # Data Loader train_loader = torch.utils.data.DataLoader( data_train, batch_size=args.batch_size, shuffle=True, collate_fn=datasets.utils.collate_g, num_workers=args.prefetch, pin_memory=True) valid_loader = torch.utils.data.DataLoader( data_valid, batch_size=args.batch_size, collate_fn=datasets.utils.collate_g, num_workers=args.prefetch, pin_memory=True) test_loader = torch.utils.data.DataLoader( data_test, batch_size=args.batch_size, collate_fn=datasets.utils.collate_g, num_workers=args.prefetch, pin_memory=True) print('\tCreate model') model = MpnnIntNet([len(h_t[0]), len(list(e.values())[0])], [5, 15, 15], [10, 20, 20], num_classes, type='classification') print('Optimizer') optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.NLLLoss() evaluation = utils.accuracy print('Logger') logger = Logger(args.logPath) lr_step = (args.lr - args.lr * args.lr_decay) / ( args.epochs * args.schedule[1] - args.epochs * args.schedule[0]) # get the best checkpoint if available without training if args.resume: checkpoint_dir = args.resume best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') if not os.path.isdir(best_model_file): os.makedirs(checkpoint_dir) if os.path.isfile(best_model_file): print("=> loading best model '{}'".format(best_model_file)) checkpoint = torch.load(best_model_file) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded best model '{}' (epoch {}; accuracy {})".format( best_model_file, checkpoint['epoch'], best_acc1)) else: print("=> no best model found at '{}'".format(best_model_file)) print('Check cuda') if args.cuda: print('\t* Cuda') model = model.cuda() criterion = criterion.cuda() # Epoch for loop for epoch in range(0, args.epochs): if epoch > args.epochs * args.schedule[ 0] and epoch < args.epochs * args.schedule[1]: args.lr -= lr_step for param_group in optimizer.param_groups: param_group['lr'] = args.lr # train for one epoch train(train_loader, model, criterion, optimizer, epoch, evaluation, logger) # evaluate on test set acc1 = validate(valid_loader, model, criterion, evaluation, logger) is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best=is_best, directory=args.resume) # Logger step logger.log_value('learning_rate', args.lr).step() # get the best checkpoint and test it with test set if args.resume: checkpoint_dir = args.resume best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') if not os.path.isdir(best_model_file): os.makedirs(checkpoint_dir) if os.path.isfile(best_model_file): print("=> loading best model '{}'".format(best_model_file)) checkpoint = torch.load(best_model_file) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded best model '{}' (epoch {}; accuracy {})".format( best_model_file, checkpoint['epoch'], best_acc1)) else: print("=> no best model found at '{}'".format(best_model_file)) # For testing validate(test_loader, model, criterion, evaluation)
if __name__ == '__main__': # Parse optios for downloading parser = argparse.ArgumentParser(description='GREC Object.') # Optional argument parser.add_argument( '--root', nargs=1, help='Specify the data directory.', default=['/home/adutta/Workspace/Datasets/Graphs/GREC']) args = parser.parse_args() root = args.root[0] train_classes, train_ids = read_cxl(os.path.join(root, 'data/train.cxl')) test_classes, test_ids = read_cxl(os.path.join(root, 'data/test.cxl')) valid_classes, valid_ids = read_cxl(os.path.join(root, 'data/valid.cxl')) num_classes = len(list(set(train_classes + valid_classes + test_classes))) data_train = GREC(root, train_ids, train_classes, num_classes) data_valid = GREC(root, valid_ids, valid_classes, num_classes) data_test = GREC(root, test_ids, test_classes, num_classes) print(len(data_train)) print(len(data_valid)) print(len(data_test)) for i in range(len(train_ids)): print(data_train[i])