with open(args.normal, 'rb') as f:
        data = pickle.load(f)
elif args.adv:
    print("======================= Loading Adversarial Dataset")
    with open(args.adv, 'rb') as f:
        data = pickle.load(f)
else:
    raise Exception('Must provide normal or adversarial dataset')

# Ground truth labels are the same no matter what
with open(args.labels, 'rb') as f:
    labels = pickle.load(f)

# Use their class to load the dataset
test_set = VisitSequenceWithLabelDataset(data,
                                         labels,
                                         args.num_features,
                                         reverse=True)
test_loader = DataLoader(dataset=test_set,
                         batch_size=1,
                         shuffle=False,
                         collate_fn=visit_collate_fn,
                         num_workers=0)

# Load the model
# Check to see if it was trained onf a GPU or not, act accordingly
try:
    model = torch.load(args.model)
except RuntimeError:
    model = torch.load(args.model, map_location=device)

model = model.cpu()
Пример #2
0
def main(argv):
    global args
    args = parser.parse_args(argv)
    if args.threads == -1:
        args.threads = torch.multiprocessing.cpu_count() - 1 or 1
    print('===> Configuration')
    print(args)

    cuda = args.cuda
    if cuda:
        if torch.cuda.is_available():
            print('===> {} GPUs are available'.format(
                torch.cuda.device_count()))
        else:
            raise Exception("No GPU found, please run with --no-cuda")

    # Fix the random seed for reproducibility
    # random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    # Data loading
    print('===> Loading entire datasets')
    with open(args.data_path + 'train.seqs', 'rb') as f:
        train_seqs = pickle.load(f)
    with open(args.data_path + 'train.labels', 'rb') as f:
        train_labels = pickle.load(f)
    with open(args.data_path + 'valid.seqs', 'rb') as f:
        valid_seqs = pickle.load(f)
    with open(args.data_path + 'valid.labels', 'rb') as f:
        valid_labels = pickle.load(f)
    with open(args.data_path + 'test.seqs', 'rb') as f:
        test_seqs = pickle.load(f)
    with open(args.data_path + 'test.labels', 'rb') as f:
        test_labels = pickle.load(f)

    max_code = max(
        map(lambda p: max(map(lambda v: max(v), p)),
            train_seqs + valid_seqs + test_seqs))
    num_features = max_code + 1

    print("     ===> Construct train set")
    train_set = VisitSequenceWithLabelDataset(train_seqs,
                                              train_labels,
                                              num_features,
                                              reverse=False)
    print("     ===> Construct validation set")
    valid_set = VisitSequenceWithLabelDataset(valid_seqs,
                                              valid_labels,
                                              num_features,
                                              reverse=False)
    print("     ===> Construct test set")
    test_set = VisitSequenceWithLabelDataset(test_seqs,
                                             test_labels,
                                             num_features,
                                             reverse=False)

    train_loader = DataLoader(dataset=train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=visit_collate_fn,
                              num_workers=args.threads)
    valid_loader = DataLoader(dataset=valid_set,
                              batch_size=args.eval_batch_size,
                              shuffle=False,
                              collate_fn=visit_collate_fn,
                              num_workers=args.threads)
    test_loader = DataLoader(dataset=test_set,
                             batch_size=args.eval_batch_size,
                             shuffle=False,
                             collate_fn=visit_collate_fn,
                             num_workers=args.threads)
    print('===> Dataset loaded!')

    # Create model
    print('===> Building a Model')

    model = RNN(dim_input=num_features, dim_emb=128, dim_hidden=128)

    if cuda:
        model = model.cuda()
    print(model)
    print('===> Model built!')

    weight_class0 = torch.mean(torch.FloatTensor(train_set.labels))
    weight_class1 = 1.0 - weight_class0
    weight = torch.FloatTensor([weight_class0, weight_class1])

    criterion = nn.CrossEntropyLoss(weight=weight)
    if args.cuda:
        criterion = criterion.cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                nesterov=False,
                                weight_decay=args.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, 'min')

    best_valid_epoch = 0
    best_valid_loss = sys.float_info.max

    train_losses = []
    valid_losses = []

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    for ei in trange(args.epochs, desc="Epochs"):
        # Train
        _, _, train_loss = rnn_epoch(train_loader,
                                     model,
                                     criterion=criterion,
                                     optimizer=optimizer,
                                     train=True)
        train_losses.append(train_loss)

        # Eval
        _, _, valid_loss = rnn_epoch(valid_loader, model, criterion=criterion)
        valid_losses.append(valid_loss)

        scheduler.step(valid_loss)

        is_best = valid_loss < best_valid_loss

        if is_best:
            best_valid_epoch = ei
            best_valid_loss = valid_loss

            # evaluate on the test set
            test_y_true, test_y_pred, test_loss = rnn_epoch(
                test_loader, model, criterion=criterion)

            if args.cuda:
                test_y_true = test_y_true.cpu()
                test_y_pred = test_y_pred.cpu()

            test_auc = roc_auc_score(test_y_true.numpy(),
                                     test_y_pred.numpy()[:, 1],
                                     average="weighted")
            test_aupr = average_precision_score(test_y_true.numpy(),
                                                test_y_pred.numpy()[:, 1],
                                                average="weighted")

            with open(args.save + 'train_result.txt', 'w') as f:
                f.write('Best Validation Epoch: {}\n'.format(ei))
                f.write('Best Validation Loss: {}\n'.format(valid_loss))
                f.write('Train Loss: {}\n'.format(train_loss))
                f.write('Test Loss: {}\n'.format(test_loss))
                f.write('Test AUROC: {}\n'.format(test_auc))
                f.write('Test AUPR: {}\n'.format(test_aupr))

            torch.save(model, args.save + 'best_model.pth')
            torch.save(model.state_dict(), args.save + 'best_model_params.pth')

        # plot
        if args.plot:
            plt.figure(figsize=(12, 9))
            plt.plot(np.arange(len(train_losses)),
                     np.array(train_losses),
                     label='Training Loss')
            plt.plot(np.arange(len(valid_losses)),
                     np.array(valid_losses),
                     label='Validation Loss')
            plt.xlabel('epoch')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.tight_layout()
            plt.savefig(args.save + 'loss_plot.eps', format='eps')
            plt.close()

    print('Best Validation Epoch: {}\n'.format(best_valid_epoch))
    print('Best Validation Loss: {}\n'.format(best_valid_loss))
    print('Train Loss: {}\n'.format(train_loss))
    print('Test Loss: {}\n'.format(test_loss))
    print('Test AUROC: {}\n'.format(test_auc))
    print('Test AUPR: {}\n'.format(test_aupr))
def main(argv):
    global args
    args = parser.parse_args(argv)
    if args.threads == -1:
        args.threads = torch.multiprocessing.cpu_count() - 1 or 1
    if args.save == '':
        args.save = os.path.dirname(args.seqs_path)
    print('===> Configuration')
    print(args)

    cuda = args.cuda
    if cuda:
        if torch.cuda.is_available():
            print('===> {} GPUs are available'.format(
                torch.cuda.device_count()))
        else:
            raise Exception("No GPU found, please run with --no-cuda")

    # Fix the random seed for reproducibility
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    # Data loading
    print('===> Loading test dataset')
    with open(args.seqs_path, 'rb') as f:
        test_seqs = pickle.load(f)
    with open(args.labels_path, 'rb') as f:
        test_labels = pickle.load(f)
    print("     ===> Num features: {}".format(args.num_features))

    test_set = VisitSequenceWithLabelDataset(test_seqs,
                                             test_labels,
                                             args.num_features,
                                             reverse=True)
    test_loader = DataLoader(dataset=test_set,
                             batch_size=args.eval_batch_size,
                             shuffle=False,
                             collate_fn=visit_collate_fn,
                             num_workers=args.threads)
    print('===> Dataset loaded!')

    # Load model
    print('===> Loading a Model')
    model = torch.load(args.model_path, map_location=torch.device('cpu'))
    model = model.cpu()
    if cuda:
        model = model.cuda()
    print(model)
    print('===> Model built!')

    # No loss weight for test
    criterion = nn.CrossEntropyLoss()
    if args.cuda:
        criterion = criterion.cuda()

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    # evaluate on the test set
    test_y_true, test_y_pred, test_loss = retain_epoch(test_loader,
                                                       model,
                                                       criterion=criterion)

    if args.cuda:
        test_y_true = test_y_true.cpu()
        test_y_pred = test_y_pred.cpu()

    test_auc = roc_auc_score(test_y_true.numpy(),
                             test_y_pred.numpy()[:, 1],
                             average="weighted")
    test_aupr = average_precision_score(test_y_true.numpy(),
                                        test_y_pred.numpy()[:, 1],
                                        average="weighted")

    with open(os.path.join(args.save, 'test_result.txt'), 'w') as f:
        f.write('Test Loss: {}\n'.format(test_loss))
        f.write('Test AUROC: {}\n'.format(test_auc))
        f.write('Test AUPR: {}\n'.format(test_aupr))

    print("Done!")
    print('Test Loss: {}\n'.format(test_loss))
    print('Test AUROC: {}\n'.format(test_auc))
    print('Test AUPR: {}\n'.format(test_aupr))
Пример #4
0
    # Fix the random seed for reproducibility
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    # Data loading
    with open(args.data_path + 'test.seqs', 'rb') as f:
        test_seqs = pickle.load(f)
    with open(args.data_path + 'test.labels', 'rb') as f:
        test_labels = pickle.load(f)

    print("     ===> Construct a clean test set")
    # NOTE: reverse=True since we use RETAIN
    clean_test_set = VisitSequenceWithLabelDataset(test_seqs,
                                                   test_labels,
                                                   args.num_features,
                                                   reverse=True)
    clean_loader = DataLoader(dataset=clean_test_set,
                              batch_size=1,
                              shuffle=False,
                              collate_fn=visit_collate_fn,
                              num_workers=args.threads)
    print('===> Dataset loaded!')

    # Create model
    print('===> Building a Model')
    source_model = torch.load(args.model_path,
                              map_location=torch.device('cpu'))
    source_model = source_model.cpu()
    if args.cuda:
        source_model = source_model.cuda()