Пример #1
0
def main(num_epochs, buffer_size, batch_size, datasets_path=None, output_resolution=512,
         max_load_resolution=512, num_classes=2, num_gpu=None, use_tpu=False):
    physical_gpus = tf.config.experimental.list_physical_devices('GPU')
    if num_gpu is None:
        num_gpu = len(physical_gpus)
    for gpu in physical_gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices("GPU")
    try:
        # TPU detection
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver() if use_tpu else None
    except ValueError:
        tpu = None
    # Select appropriate distribution strategy
    if use_tpu and tpu:
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        tf.get_logger().info('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
    elif len(logical_gpus) > 1:
        strategy = tf.distribute.MirroredStrategy(
            devices=['/gpu:{}'.format(i) for i in range(num_gpu)]
        )
        tf.get_logger().info('Running on multiple GPUs.')
    elif len(logical_gpus) == 1:
        strategy = tf.distribute.get_strategy()
        tf.get_logger().info('Running on single GPU.')
    else:
        strategy = tf.distribute.get_strategy()
        tf.get_logger().info('Running on single CPU.')
    tf.get_logger().info('Number of devices: {}'.format(strategy.num_replicas_in_sync))
    tf.get_logger().info('num_classes: {}'.format(num_classes))
    tf.get_logger().info('batch_size: {}'.format(batch_size))
    tf.get_logger().info('output_resolution: {}'.format(output_resolution))
    checkpoint_path = "training/cp-{epoch:04d}-{step:04d}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    dataset_loader = DatasetLoader(buffer_size=buffer_size, batch_size=batch_size * strategy.num_replicas_in_sync,
                                   output_resolution=output_resolution,
                                   max_load_resolution=max_load_resolution)
    train_dataset, test_dataset, train_num_datasets, test_num_datasets = dataset_loader.load(
        datasets_path=datasets_path, train_dir_name="train", test_dir_name="test")
    tf.get_logger().info("train_num_datasets:{}".format(train_num_datasets))
    tf.get_logger().info("test_num_datasets:{}".format(test_num_datasets))
    with strategy.scope():
        model = Model(output_resolution=output_resolution, num_classes=num_classes)
        train_len = tf.data.experimental.cardinality(train_dataset)
        train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
        test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)
        trainer = Train(batch_size=batch_size, strategy=strategy, num_epochs=num_epochs, model=model,
                        train_num_datasets=train_num_datasets,
                        test_num_datasets=test_num_datasets,
                        checkpoint_path=checkpoint_path,
                        train_len=train_len,
                        num_classes=num_classes,
                        num_gpu=num_gpu,
                        checkpoint_dir=checkpoint_dir)
        trainer.custom_loop(train_dist_dataset,
                            test_dist_dataset,
                            strategy)
Пример #2
0
def preprocess_data():
    save_path = './new_features.csv'
    survival_data_save_path = './new_survival_data.csv'
    data_loader = DatasetLoader()
    features = data_loader.load_cell_features(DATASET_ROOT_DIR, save_path=save_path)
    print(features.shape)
    print(data_loader.patient_id_list)
    survival_data = data_loader.load_survial_data(DATASET_ROOT_DIR, save_path=survival_data_save_path)
    print(survival_data.shape)
def test_disjoint_samples_train_5_tasks(task, dataset, ind_task):
    path = "./Archives/Data/Tasks/{}/{}_5_train.pt".format(dataset, task)
    data = torch.load(path)
    data_set = DatasetLoader(data,
                             current_task=0,
                             transform=None,
                             load_images=False,
                             path=None)

    data_set.set_task(ind_task)
    folder = "./Samples/5_tasks/"

    if not os.path.exists(folder):
        os.makedirs(folder)

    path_out = os.path.join(
        folder, "{}_{}_task_{}.png".format(dataset, task, ind_task))

    if task == "permutations":
        permutations = torch.load(
            "../Archives/Data/Tasks/{}/ind_permutations_5_train.pt".format(
                dataset))
        data_set.visualize_reordered(path_out,
                                     number=100,
                                     shape=[28, 28, 1],
                                     permutations=permutations)
    else:
        data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
Пример #4
0
def test_DataLoader_init_label_size(get_fake_dataset):
    """
    Test if the dictionnary of label have the good size
    :param get_fake_dataset:
    :return:
    """
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset)

    if not len(dataset.labels) == dataset_size:
        raise AssertionError("Test fail")
Пример #5
0
def test_DataLoader_init_label_is_dict(get_fake_dataset):
    """
    Test if the dictionnary of label is really a dictionnary
    :param get_fake_dataset:
    :return:
    """
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset)

    if not isinstance(dataset.labels, dict):
        raise AssertionError("Test fail")
def test_disjoint_samples_disjoint_classes_permutations(ind_task, dataset):
    index_permutation = 2  # empirically chosen
    permutation = torch.load("permutation_classes.t")[index_permutation]

    name = ''
    for i in range(10):
        name += str(int(permutation[i]))

    path = "./Archives/Data/Tasks/{}/disjoint_{}_10_train.pt".format(
        dataset, name)
    data = torch.load(path)
    data_set = DatasetLoader(data,
                             current_task=0,
                             transform=None,
                             load_images=False,
                             path=None)

    data_set.set_task(ind_task)
    folder = "./Samples/disjoint_classes_permutations/"

    if not os.path.exists(folder):
        os.makedirs(folder)

    path_out = os.path.join(
        folder, "dijsoint_classes_permutations_{}.png".format(ind_task))

    data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
Пример #7
0
def test_DataLoader_with_torch(get_fake_dataset):
    """
    Test if the dataloader can be used with torch.utils.data.DataLoader
    :param get_fake_dataset:
    :return:
    """
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset)
    train_loader = data.DataLoader(dataset,
                                   batch_size=10,
                                   shuffle=True,
                                   num_workers=6)

    for _, (_, _) in enumerate(train_loader):
        break
Пример #8
0
def test_DataLoader_with_torch_loader(get_fake_dataset):
    """
    Test if the dataloader with torch.utils.data.DataLoader provide data of good type
    :param get_fake_dataset:
    :return:
    """
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset)
    train_loader = data.DataLoader(dataset,
                                   batch_size=10,
                                   shuffle=True,
                                   num_workers=6)

    for _, (batch, label) in enumerate(train_loader):

        if not isinstance(label, torch.LongTensor):
            raise AssertionError("Test fail")

        if not isinstance(batch, torch.FloatTensor):
            raise AssertionError("Test fail")
        break
def test_disjoint_samples_train_10_tasks(dataset, ind_task):
    path = "./Archives/Data/Tasks/{}/disjoint_10_train.pt".format(dataset)
    data = torch.load(path)
    data_set = DatasetLoader(data,
                             current_task=0,
                             transform=None,
                             load_images=False,
                             path=None)

    data_set.set_task(ind_task)

    folder = "./Samples/disjoint_10_tasks/"

    if not os.path.exists(folder):
        os.makedirs(folder)

    path_out = os.path.join(folder, "{}_task_{}.png".format(dataset, ind_task))
    data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
Пример #10
0
def test_DataLoader_init_current_task(get_fake_dataset, init_current_task):
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset, current_task=init_current_task)

    if not dataset.current_task == init_current_task:
        raise AssertionError("Test fail")
Пример #11
0
def main():
    print(torch.cuda.is_available())
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    lossgradient = options.lossgradient

    cfg = {}
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))

    cfg.update(config.parse_variable_assignments(options.var))

    outfile = options.outfile
    sample_rate = cfg['sample_rate']
    frame_len = cfg['frame_len']
    fps = cfg['fps']
    mel_bands = cfg['mel_bands']
    mel_min = cfg['mel_min']
    mel_max = cfg['mel_max']
    blocklen = cfg['blocklen']
    batchsize = cfg['batchsize']

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    print("Preparing data reading...")
    datadir = os.path.join(os.path.dirname(__file__), os.path.pardir,
                           'datasets', options.dataset)

    # - load filelist
    with io.open(os.path.join(datadir, 'filelists', 'valid')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]
    with io.open(os.path.join(datadir, 'filelists', 'test')) as f:
        filelist += [l.rstrip() for l in f if l.rstrip()]

    # - load mean/std
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % options.dataset)

    dataloader = DatasetLoader(options.dataset,
                               options.cache_spectra,
                               datadir,
                               input_type=options.input_type,
                               filelist=filelist)
    mel_spects, labels = dataloader.prepare_batches(sample_rate,
                                                    frame_len,
                                                    fps,
                                                    mel_bands,
                                                    mel_min,
                                                    mel_max,
                                                    blocklen,
                                                    batchsize,
                                                    batch_data=False)

    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    mdl = model.CNNModel(input_type='mel_spects_norm',
                         is_zeromean=False,
                         meanstd_file=meanstd_file,
                         device=device)
    mdl.load_state_dict(torch.load(modelfile))
    mdl.to(device)
    mdl.eval()

    if (lossgradient != 'None'):
        mdl_lossgrad = model.CNNModel(input_type=options.input_type,
                                      is_zeromean=False,
                                      sample_rate=sample_rate,
                                      frame_len=frame_len,
                                      fps=fps,
                                      mel_bands=mel_bands,
                                      mel_min=mel_min,
                                      mel_max=mel_max,
                                      bin_mel_max=bin_mel_max,
                                      meanstd_file=meanstd_file,
                                      device=device)
        mdl_lossgrad.load_state_dict(torch.load(lossgradient))
        mdl_lossgrad.to(device)
        mdl_lossgrad.eval()
        criterion = torch.nn.BCELoss()
        loss_grad_val = dataloader.prepare_loss_grad_batches(
            options.loss_grad_save, mel_spects, labels, mdl_lossgrad,
            criterion, blocklen, batchsize, device)

    # run prediction loop
    print("Predicting:")
    predictions = []
    #for spect, g in zip(mel_spects, loss_grad_val):
    c = 0
    for spect in progress(mel_spects, total=len(filelist), desc='File '):
        if (lossgradient != 'None'):
            g = loss_grad_val[c]
        c += 1
        # naive way: pass excerpts of the size used during training
        # - view spectrogram memory as a 3-tensor of overlapping excerpts
        num_excerpts = len(spect) - blocklen + 1
        excerpts = np.lib.stride_tricks.as_strided(
            spect.astype(floatX),
            shape=(num_excerpts, blocklen, spect.shape[1]),
            strides=(spect.strides[0], spect.strides[0], spect.strides[1]))
        preds = np.zeros((num_excerpts, 1))
        count = 0
        for pos in range(0, num_excerpts, batchsize):
            input_data = np.transpose(
                excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2))
            input_data = (input_data - mean) * istd
            if lossgradient != 'None':
                for i in range(input_data.shape[0]):
                    if (options.lossgrad_algorithm == 'grad'):
                        rank_matrix = np.abs(g[i + pos])
                    elif (options.lossgrad_algorithm == 'gradxinp'):
                        rank_matrix = np.squeeze(g[i + pos] *
                                                 input_data[i, :, :, :])
                    elif (options.lossgrad_algorithm == 'gradorig'):
                        rank_matrix = g[i + pos]
                    if (options.ROAR == 1):
                        v = np.argsort(rank_matrix,
                                       axis=None)[-cfg['occlude']:]
                    else:
                        v = np.argsort(rank_matrix, axis=None)[:cfg['occlude']]
                    input_data[i, :, v // 80, v % 80] = 0
            else:
                for i in range(input_data.shape[0]):
                    #print('random')
                    v = np.random.choice(115 * 80,
                                         cfg['occlude'],
                                         replace=False)
                    input_data[i, :, v // 80, v % 80] = 0

            count += 1

            #print('Here')
            #preds = np.vstack(mdl.forward(torch.from_numpy(
            #            np.transpose(excerpts[pos:pos + batchsize,:,:,
            #            np.newaxis],(0,3,1,2))).to(device)).cpu().detach().numpy()
            #        for pos in range(0, num_excerpts, batchsize))

            preds[pos:pos + batchsize, :] = mdl(
                torch.from_numpy(input_data).to(
                    device)).cpu().detach().numpy()
        print('Here')
        predictions.append(preds)
    # save predictions
    print("Saving predictions")
    np.savez(outfile, **{fn: pred for fn, pred in zip(filelist, predictions)})
def main(args):
    # Load train & val data
    # adj, features, labels, idx_train, idx_val, idx_test = load_data()
    train_kwargs = {
        'root_dir': args.root_dir,
        'data_file': args.train_file,
        'corpus_file': args.corpus_file,
        'label_file': args.label_file
    }
    train_data = DatasetLoader(kwargs=train_kwargs, transform=True)
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True)

    val_kwargs = {
        'root_dir': args.root_dir,
        'data_file': args.val_file,
        'corpus_file': args.corpus_file,
        'label_file': args.label_file
    }
    val_data = DatasetLoader(kwargs=val_kwargs, transform=True)
    val_loader = DataLoader(val_data, batch_size=1, shuffle=True)

    # Model and optimizer
    model = EGAT(node_feat=len(train_data.corpus),
                 edge_feat=8,
                 nclass=len(CLASSES),
                 nhidden=args.hidden,
                 dropout=args.dropout,
                 alpha=args.alpha,
                 nheads=args.nb_heads)
    model = model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)

    best_loss = 1000
    best_acc = 0.0

    for epoch in range(0, args.epochs):
        model.train()
        train_loss_mean = []
        train_acc_mean = []
        start_time = time.time()
        for in_data in train_loader:
            optimizer.zero_grad()
            output = model(in_data)
            label = in_data['graph_lbl'].to(DEVICE)
            loss_train = F.nll_loss(output, label)
            loss_train.backward()
            optimizer.step()

            acc_train = accuracy(output, label)
            train_loss_mean.append(loss_train.data.item())
            train_acc_mean.append(acc_train)

        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_train: {:.4f}'.format(np.mean(train_loss_mean)),
              'acc_train: {:.4f}'.format(np.mean(train_acc_mean)),
              'time: {:.4f}s'.format(time.time() - start_time))

        if epoch == args.patience:
            model.eval()
            val_loss_mean = []
            val_acc_mean = []
            for in_data in val_loader:
                output = model(in_data)
                label = in_data['graph_lbl']
                loss_val = F.nll_loss(output, label)

                acc_val = accuracy(output, label)
                val_loss_mean.append(loss_val.data.item())
                val_acc_mean.append(acc_val)

            print("*" * 20)
            print('Epoch: {:04d}'.format(epoch + 1),
                  'loss_val: {:.4f}'.format(np.mean(val_loss_mean)),
                  'acc_val: {:.4f}'.format(np.mean(val_acc_mean)))

            if (np.mean(val_acc_mean) > best_acc and np.mean(val_loss_mean)):
                torch.save(
                    {
                        "state_dict": model.state_dict(),
                        "configs": args,
                        "epoch": epoch,
                        "train_acc": np.mean(train_loss_mean),
                        "val_acc": np.mean(val_loss_mean),
                    }, "{0}_epoch_{1}.pt".format(args.save_path, epoch))
Пример #13
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile

    cfg = {}
    print(options.vars)
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))

    cfg.update(config.parse_variable_assignments(options.var))

    sample_rate = cfg['sample_rate']
    frame_len = cfg['frame_len']
    fps = cfg['fps']
    mel_bands = cfg['mel_bands']
    mel_min = cfg['mel_min']
    mel_max = cfg['mel_max']
    blocklen = cfg['blocklen']
    batchsize = cfg['batchsize']

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), os.path.pardir,
                           'datasets', options.dataset)

    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % options.dataset)

    if (options.input_type == 'audio'):
        dataloader = DatasetLoader(options.dataset,
                                   options.cache_spectra,
                                   datadir,
                                   input_type=options.input_type)
        batches = dataloader.prepare_audio_batches(sample_rate, frame_len, fps,
                                                   blocklen, batchsize)
    else:
        dataloader = DatasetLoader(options.dataset,
                                   options.cache_spectra,
                                   datadir,
                                   input_type=options.input_type)
        batches = dataloader.prepare_batches(sample_rate, frame_len, fps,
                                             mel_bands, mel_min, mel_max,
                                             blocklen, batchsize)

    validation_data = DatasetLoader(options.dataset,
                                    '../ismir2015/experiments/mel_data/',
                                    datadir,
                                    dataset_split='valid',
                                    input_type='mel_spects')
    mel_spects_val, labels_val = validation_data.prepare_batches(
        sample_rate,
        frame_len,
        fps,
        mel_bands,
        mel_min,
        mel_max,
        blocklen,
        batchsize,
        batch_data=False)

    mdl = model.CNNModel(model_type=options.model_type,
                         input_type=options.input_type,
                         is_zeromean=False,
                         sample_rate=sample_rate,
                         frame_len=frame_len,
                         fps=fps,
                         mel_bands=mel_bands,
                         mel_min=mel_min,
                         mel_max=mel_max,
                         bin_mel_max=bin_mel_max,
                         meanstd_file=meanstd_file,
                         device=device)
    mdl = mdl.to(device)

    #Setting up learning rate and learning rate parameters
    initial_eta = cfg['initial_eta']
    eta_decay = cfg['eta_decay']
    momentum = cfg['momentum']
    eta_decay_every = cfg.get('eta_decay_every', 1)
    eta = initial_eta

    #set up loss
    criterion = torch.nn.BCELoss()

    #set up optimizer
    optimizer = torch.optim.SGD(mdl.parameters(),
                                lr=eta,
                                momentum=momentum,
                                nesterov=True)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=eta_decay_every,
                                                gamma=eta_decay)

    #set up optimizer
    writer = SummaryWriter(os.path.join(modelfile, 'runs'))

    epochs = cfg['epochs']
    epochsize = cfg['epochsize']
    batches = iter(batches)

    #conditions to save model
    best_val_loss = 100000.
    best_val_error = 1.

    for epoch in range(epochs):
        # - Initialize certain parameters that are used to monitor training
        err = 0
        total_norm = 0
        loss_accum = 0
        mdl.train(True)
        # - Compute the L-2 norm of the gradients
        for p in mdl.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item()**2
        total_norm = total_norm**(1. / 2)

        # - Start the training for this epoch
        for batch in progress(range(epochsize),
                              min_delay=0.5,
                              desc='Epoch %d/%d: Batch ' %
                              (epoch + 1, epochs)):
            data = next(batches)
            if (options.input_type == 'audio' or options.input_type == 'stft'):
                input_data = data[0]
            else:
                input_data = np.transpose(data[0][:, :, :, np.newaxis],
                                          (0, 3, 1, 2))
            labels = data[1][:, np.newaxis].astype(np.float32)

            #map labels to make them softer
            if not options.adversarial_training:
                labels = (0.02 + 0.96 * labels)
            optimizer.zero_grad()

            if (options.adversarial_training):
                mdl.train(False)
                if (options.input_type == 'stft'):
                    input_data_adv = attacks.PGD(
                        mdl,
                        torch.from_numpy(input_data).to(device),
                        target=torch.from_numpy(labels).to(device),
                        eps=cfg['eps'],
                        step_size=cfg['eps_iter'],
                        iterations=cfg['nb_iter'],
                        use_best=True,
                        random_start=True,
                        clip_min=0,
                        clip_max=1e8).cpu().detach().numpy()
                else:
                    input_data_adv = attacks.PGD(
                        mdl,
                        torch.from_numpy(input_data).to(device),
                        target=torch.from_numpy(labels).to(device),
                        eps=cfg['eps'],
                        step_size=cfg['eps_iter'],
                        iterations=cfg['nb_iter'],
                        use_best=True,
                        random_start=True).cpu().detach().numpy()

                mdl.train(True)
                optimizer.zero_grad()
                outputs = mdl(torch.from_numpy(input_data_adv).to(device))
            else:
                optimizer.zero_grad()
                outputs = mdl(torch.from_numpy(input_data).to(device))
            #input(outputs.size())
            #input(mdl.conv(torch.from_numpy(input_data).to(device)).cpu().detach().numpy().shape)
            loss = criterion(outputs, torch.from_numpy(labels).to(device))
            loss.backward()
            optimizer.step()
            print(loss.item())
            loss_accum += loss.item()

        # - Compute validation loss and error if desired
        if options.validate:
            mdl.input_type = 'mel_spects'
            from eval import evaluate
            mdl.train(False)
            val_loss = 0
            preds = []
            labs = []
            max_len = fps

            num_iter = 0

            for spect, label in zip(mel_spects_val, labels_val):
                num_excerpts = len(spect) - blocklen + 1
                excerpts = np.lib.stride_tricks.as_strided(
                    spect,
                    shape=(num_excerpts, blocklen, spect.shape[1]),
                    strides=(spect.strides[0], spect.strides[0],
                             spect.strides[1]))
                # - Pass mini-batches through the network and concatenate results
                for pos in range(0, num_excerpts, batchsize):
                    input_data = np.transpose(
                        excerpts[pos:pos + batchsize, :, :, np.newaxis],
                        (0, 3, 1, 2))
                    #if (pos+batchsize>num_excerpts):
                    #    label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts,
                    #            np.newaxis].astype(np.float32)
                    #else:
                    #    label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize,
                    #            np.newaxis].astype(np.float32)
                    if (pos + batchsize > num_excerpts):
                        label_batch = label[pos:num_excerpts,
                                            np.newaxis].astype(np.float32)
                    else:
                        label_batch = label[pos:pos + batchsize,
                                            np.newaxis].astype(np.float32)

                    pred = mdl(torch.from_numpy(input_data).to(device))
                    e = criterion(pred,
                                  torch.from_numpy(label_batch).to(device))
                    preds = np.append(preds, pred[:, 0].cpu().detach().numpy())
                    labs = np.append(labs, label_batch)
                    val_loss += e.item()
                    num_iter += 1
            mdl.input_type = options.input_type
            print("Validation loss: %.3f" % (val_loss / num_iter))
            _, results = evaluate(preds, labs)
            print("Validation error: %.3f" % (1 - results['accuracy']))

            if (1 - results['accuracy'] < best_val_error):
                torch.save(mdl.state_dict(),
                           os.path.join(modelfile, 'model.pth'))
                best_val_loss = val_loss / num_iter
                best_val_error = 1 - results['accuracy']
                print('New saved model', best_val_loss, best_val_error)

        #Update the learning rate
        scheduler.step()

        print('Training Loss per epoch', loss_accum / epochsize)

        # - Save parameters for examining
        writer.add_scalar('Training Loss', loss_accum / epochsize, epoch)
        writer.add_scalar('Validation loss', val_loss / num_iter, epoch)
        writer.add_scalar('Gradient norm', total_norm, epoch)
        writer.add_scalar('Validation error', 1 - results['accuracy'])
        #for param_group in optimizer.param_groups:
        #print(param_group['lr'])

    if not options.validate:
        torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth'))
    with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f:
        f.writelines('%s=%s\n' % kv for kv in cfg.items())
Пример #14
0
dir_path = "/backtesting_data"
file_name = "validation_data_Training_31a3e6e41cef24188ab2121d55be07ab98f7ccaf_2018-05-08_production.db"
validation_db = join(dir_path, file_name)

transform = Transform(db_file=validation_db,
                      min_start_date='2015-01-01',
                      max_end_date='2017-12-30',
                      training_transformation=True)

train_db = DatasetReader(path_to_training_db=validation_db,
                         transform=transform,
                         num_csku_per_query=1000,
                         shuffle_transform=True)

dataloader = DatasetLoader(train_db,
                           collate_fn=append_lists,
                           mini_batch_size=100,
                           num_workers=0)
st = time.time()

for epoch in range(1):
    print "epoch %d" % epoch
    for i_batch, sample_batched in enumerate(dataloader):
        if i_batch % 99 == 0: print i_batch
        d = 1
    end_t = time.time()
    print i_batch, (end_t - st)
    st = time.time()

print(time.time() - st)
Пример #15
0
def main():
    # data_dir = './train_data/'
    train_image_dir = './train_data/DUTS/DUTS-TR-Image/'
    train_label_dir = './train_data/DUTS/DUTS-TR-Mask/'

    model_dir = './saved_models/'

    resume_train = True
    saved_model_path = model_dir + 'model.pth'

    validation = True
    save_every = 1
    epoch_num = 100000
    batch_size_train = 16
    batch_size_val = 1
    train_num = 0
    val_num = 0

    if validation:
        val_image_dir = 'test_data/val/images/'
        val_label_dir = 'test_data/val/gts/'
        prediction_dir = './val_results/'

        val_img_name_list = glob.glob(val_image_dir + '*.jpg')
        val_lbl_name_list = glob.glob(val_label_dir + '*.png')

        val_dataset = DatasetLoader(img_name_list=val_img_name_list,
                                    lbl_name_list=val_lbl_name_list,
                                    transform=transforms.Compose(
                                        [Rescale(256),
                                         ToTensor()]))

        val_dataloader = DataLoader(val_dataset,
                                    batch_size=1,
                                    shuffle=False,
                                    num_workers=4)

    train_img_name_list = glob.glob(train_image_dir + '*.jpg')
    train_lbl_name_list = []

    for img_path in train_img_name_list:
        img_path = img_path.replace('.jpg', '.png')
        img_path = img_path.replace('DUTS-TR-Image', 'DUTS-TR-Mask')
        train_lbl_name_list.append(img_path)

    if len(train_img_name_list) == 0 or len(val_img_name_list) == 0:
        print('0 images found.')
        assert False

    print('Train images: ', len(train_img_name_list))
    print('Train labels: ', len(train_lbl_name_list))

    train_num = len(train_img_name_list)

    dataset = DatasetLoader(img_name_list=train_img_name_list,
                            lbl_name_list=train_lbl_name_list,
                            transform=transforms.Compose([
                                RandomHorizontalFlip(0.5),
                                RandomVerticalFlip(0.5),
                                Rescale(300),
                                RandomCrop(256),
                                ToTensor()
                            ]))
    dataloader = DataLoader(dataset,
                            batch_size=batch_size_train,
                            shuffle=True,
                            num_workers=4)

    model = MYNet(3, 1)
    model.cuda()

    from torchsummary import summary
    summary(model, input_size=(3, 256, 256))

    # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001, nesterov=False)
    optimizer = optim.Adam(model.parameters(),
                           lr=0.01,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[200000, 350000],
                                               gamma=0.1,
                                               last_epoch=-1)

    # scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001,
    #     max_lr=0.01, step_size_up=8000, mode='triangular2')

    i_num_tot = 0
    loss_output = 0.0
    loss_pre_ref = 0.0
    i_num_epoch = 0
    epoch_init = 0

    if resume_train:
        print('Loading checkpoint: ', saved_model_path)
        checkpoint = torch.load(saved_model_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict']),
        epoch_init = checkpoint['epoch'] + 1
        i_num_tot = checkpoint['i_num_tot'] + 1
        i_num_epoch = checkpoint['i_num_epoch']
        loss_output = checkpoint['loss_output']
        # loss_pre_ref = checkpoint['loss_pre_ref']

    log_file = open('logs/log.txt', 'a+')
    log_file.write(str(model) + '\n')
    log_file.close()

    print('Training...')
    _s = time.time()
    for epoch in range(epoch_init, epoch_num):
        model.train()
        print('Epoch {}...'.format(epoch))
        _time_epoch = time.time()
        for i, data in enumerate(dataloader):
            i_num_tot += 1
            i_num_epoch += 1

            inputs, labels = data

            inputs = inputs.cuda()
            labels = labels.cuda()

            optimizer.zero_grad()

            out = model(inputs)
            loss = muti_bce_loss_fusion(out, labels)

            loss[0].backward()
            optimizer.step()
            scheduler.step()

            loss_output += loss[0].item()
            # loss_pre_ref += loss[1].item()

            del out, inputs, labels

        print('Epoch time: {}'.format(time.time() - _time_epoch))
        if epoch % save_every == 0:  # save the model every X epochs
            state_dic = {
                'epoch': epoch,
                'i_num_tot': i_num_tot,
                'i_num_epoch': i_num_epoch,
                'loss_output': loss_output,
                # 'loss_pre_ref': loss_pre_ref,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
            }
            torch.save(state_dic, model_dir + 'model.pth')

        log = '[epoch: {:d}/{:d}, ite: {:d}] loss_output: {:.6f}, l: {:.6f}\n'.format(
            epoch, epoch_num, i_num_tot, loss_output / i_num_epoch,
            loss[0].item())

        del loss

        loss_output = 0
        loss_pre_ref = 0
        i_num_epoch = 0
        log_file = open('logs/log.txt', 'a+')
        log_file.write(log + '\n')
        log_file.close()
        print(log)

        if validation:
            model.eval()
            # val_i_num_tot = 0
            val_i_num_epoch = 0
            val_loss_output = 0
            # val_loss_pre_ref = 0
            val_log_file = open('logs/log_val.txt', 'a+')
            print('Evaluating...')
            with torch.no_grad():
                for val_i, val_data in enumerate(val_dataloader):
                    # val_i_num_tot += 1
                    val_i_num_epoch += 1

                    val_inputs, val_labels = val_data

                    val_inputs = val_inputs.cuda()
                    val_labels = val_labels.cuda()

                    val_out = model(val_inputs)

                    val_loss = muti_bce_loss_fusion(val_out, val_labels)

                    val_loss_output += val_loss[0].item()
                    # val_loss_pre_ref += val_loss0.item()

                    pred = val_out[0][:, 0, :, :]
                    pred = normPRED(pred)

                    save_output(val_img_name_list[val_i], pred, prediction_dir)

                    del val_out, val_inputs, val_labels, val_loss

            log_val = '[val: epoch: {:d}, ite: {:d}] loss_output: {:.6f}\n'.format(
                epoch, i_num_tot, val_loss_output / val_i_num_epoch)
            val_log_file.write(log_val + '\n')
            val_log_file.close()

    _t = 'Training time: ' + str(time.time() - _s) + '\n'
    print(_t)
    log_file = open('logs/log.txt', 'a+')
    log_file.write(_t)
    log_file.close()
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    lossgradient = options.lossgradient
    cfg = {}
    print(options.vars)
    print('Model save file:', modelfile)
    print('Lossgrad file:', lossgradient)
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))

    cfg.update(config.parse_variable_assignments(options.var))
    
    sample_rate = cfg['sample_rate']
    frame_len = cfg['frame_len']
    fps = cfg['fps']
    mel_bands = cfg['mel_bands']
    mel_min = cfg['mel_min']
    mel_max = cfg['mel_max']
    blocklen = cfg['blocklen']
    batchsize = cfg['batchsize']
    print('Occluded amount:',cfg['occlude'])
    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__),
                           os.path.pardir, 'datasets', options.dataset)
    
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % options.dataset)
 
    dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type)
    batches = dataloader.prepare_batches(sample_rate, frame_len, fps,
            mel_bands, mel_min, mel_max, blocklen, batchsize)
    
    validation_data = DatasetLoader(options.dataset, '../ismir2015/experiments/mel_data/', datadir,
            dataset_split='valid', input_type='mel_spects')
    mel_spects_val, labels_val = validation_data.prepare_batches(sample_rate, frame_len, fps,
            mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False)

    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)
    if(options.input_type=='mel_spects'):
        mdl = model.CNNModel(input_type='mel_spects_norm', is_zeromean=False,
            sample_rate=sample_rate, frame_len=frame_len, fps=fps,
            mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max,
            bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device)
        if(lossgradient!='None'):
            mdl_lossgrad =  model.CNNModel(input_type=options.input_type,
                is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps,
                mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max,
                bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device)
            mdl_lossgrad.load_state_dict(torch.load(lossgradient))
            mdl_lossgrad.to(device)
            mdl_lossgrad.eval()
 
    mdl = mdl.to(device)
    
    #Setting up learning rate and learning rate parameters
    initial_eta = cfg['initial_eta']
    eta_decay = cfg['eta_decay']
    momentum = cfg['momentum']
    eta_decay_every = cfg.get('eta_decay_every', 1)
    eta = initial_eta

    #set up loss
    criterion = torch.nn.BCELoss()

    #set up optimizer
    optimizer = torch.optim.SGD(mdl.parameters(),lr=eta,momentum=momentum,nesterov=True)
    #optimizer = torch.optim.Adam(mdl.parameters(), lr=eta, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=eta_decay_every,gamma=eta_decay)

    #set up optimizer 
    writer = SummaryWriter(os.path.join(modelfile,'runs'))

    
    epochs = cfg['epochs']
    epochsize = cfg['epochsize']
    batches = iter(batches)
    
    #conditions to save model
    best_val_loss = 100000.
    best_val_error = 1.
    
    #loss gradient values for validation data
    loss_grad_val = validation_data.prepare_loss_grad_batches(options.loss_grad_save,
            mel_spects_val, labels_val, mdl_lossgrad, criterion, blocklen, batchsize, device)
    for epoch in range(epochs):
        # - Initialize certain parameters that are used to monitor training
        err = 0
        total_norm = 0
        loss_accum = 0
        mdl.train(True)
        # - Compute the L-2 norm of the gradients
        for p in mdl.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item() ** 2
        total_norm = total_norm ** (1. / 2)
        
        # - Start the training for this epoch
        for batch in progress(range(epochsize), min_delay=0.5,desc='Epoch %d/%d: Batch ' % (epoch+1, epochs)):
            data = next(batches)
            if(options.input_type=='audio' or options.input_type=='stft'):
                input_data = data[0]
            else:
                input_data = np.transpose(data[0][:,:,:,np.newaxis],(0,3,1,2))
            labels = data[1][:,np.newaxis].astype(np.float32)
            input_data_loss = input_data
            
            if lossgradient!='None':
                g = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(labels).to(device), criterion)
                g = np.squeeze(g)
                input_data = (input_data-mean) * istd
                for i in range(batchsize):
                    if(options.lossgrad_algorithm=='grad'):
                        rank_matrix = np.abs(g[i])
                    elif(options.lossgrad_algorithm=='gradxinp'):
                        rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:])
                    elif(options.lossgrad_algorithm=='gradorig'):
                        rank_matrix = g[i]
                    v = np.argsort(rank_matrix, axis=None)[-cfg['occlude']:]
                    input_data[i,:,v//80,v%80] = 0
 
            else:
                for i in range(batchsize):
                    #print('random')
                    v = np.random.choice(115*80, cfg['occlude'], replace=False)
                    input_data[i,:,v//80,v%80] = 0
          
            input_data = input_data.astype(floatX)

            labels = (0.02 + 0.96*labels)
            
            optimizer.zero_grad()
            outputs = mdl(torch.from_numpy(input_data).to(device))
        
            loss = criterion(outputs, torch.from_numpy(labels).to(device))
            loss.backward()
            optimizer.step()
            #print(loss.item())
            loss_accum += loss.item()
   
        # - Compute validation loss and error if desired
        if options.validate:
            #mdl.model_type = 'mel_spects'
            from eval import evaluate
            mdl.train(False) 
            val_loss = 0
            preds = []
            labs = []
            max_len = fps
            
            num_iter = 0 

            for spect, label, g in zip(mel_spects_val, labels_val, loss_grad_val):
                num_excerpts = len(spect) - blocklen + 1
                excerpts = np.lib.stride_tricks.as_strided(
                    spect, shape=(num_excerpts, blocklen, spect.shape[1]),
                    strides=(spect.strides[0], spect.strides[0], spect.strides[1]))
                
                # - Pass mini-batches through the network and concatenate results
                for pos in range(0, num_excerpts, batchsize):
                    input_data = np.transpose(excerpts[pos:pos + batchsize,:,:,np.newaxis],(0,3,1,2))
                    #if (pos+batchsize>num_excerpts):
                    #    label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts,
                    #            np.newaxis].astype(np.float32)
                    #else:
                    #    label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize,
                    #            np.newaxis].astype(np.float32)
                    if (pos+batchsize>num_excerpts):
                        label_batch = label[pos:num_excerpts,
                               np.newaxis].astype(np.float32)
                    else:
                        label_batch = label[pos:pos+batchsize,
                                np.newaxis].astype(np.float32)
                    
                    #input_data_loss = input_data  
                    if lossgradient!='None':
                        #grads = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(label_batch).to(device), criterion)
                        input_data = (input_data-mean) * istd
                        for i in range(input_data.shape[0]):
                            if(options.lossgrad_algorithm=='grad'):
                                rank_matrix = np.abs(g[i])
                            elif(options.lossgrad_algorithm=='gradxinp'):
                                rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:])
                            elif(options.lossgrad_algorithm=='gradorig'):
                                rank_matrix = g[i]
                
                            v = np.argsort(np.abs(rank_matrix), axis=None)[-cfg['occlude']:]
                            input_data[i,:,v//80,v%80] = 0
                    else:
                        for i in range(input_data.shape[0]):
                            #print('random')
                            v = np.random.choice(115*80, cfg['occlude'], replace=False)
                            input_data[i,:,v//80,v%80] = 0
          
                    input_data = input_data.astype(floatX)
          
                    pred = mdl(torch.from_numpy(input_data).to(device))
                    e = criterion(pred,torch.from_numpy(label_batch).to(device))
                    preds = np.append(preds,pred[:,0].cpu().detach().numpy())
                    labs = np.append(labs,label_batch)
                    val_loss +=e.item()
                    num_iter+=1
            #mdl.model_type = 'mel_spects_norm'
            print("Validation loss: %.3f" % (val_loss / num_iter))
            _, results = evaluate(preds,labs)
            print("Validation error: %.3f" % (1 - results['accuracy']))
            
            if(1-results['accuracy']<best_val_error):
                torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth'))
                best_val_loss = val_loss/num_iter
                best_val_error = 1-results['accuracy']
                print('New saved model',best_val_loss, best_val_error)
                    
        #Update the learning rate
        scheduler.step()
        
        print('Training Loss per epoch', loss_accum/epochsize) 
        
        # - Save parameters for examining
        writer.add_scalar('Training Loss',loss_accum/epochsize,epoch)
        if(options.validate):
            writer.add_scalar('Validation loss', val_loss/num_iter,epoch)
            writer.add_scalar('Gradient norm', total_norm, epoch)
            writer.add_scalar('Validation error', 1-results['accuracy'])
        #for param_group in optimizer.param_groups:
            #print(param_group['lr'])
    
    if not options.validate:
        torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth'))
    with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f:
        f.writelines('%s=%s\n' % kv for kv in cfg.items())
Пример #17
0
def main():
    global args, best_prec1, use_gpu
    args = parser.parse_args()

    use_gpu = torch.cuda.is_available()
    num_classes = 79

    # define state params
    state = {
        'batch_size': args.batch_size,
        'image_size': args.image_size,
        'max_epochs': args.epochs,
        'evaluate': args.evaluate,
        'resume': args.resume,
        'num_classes': num_classes,
        'load': args.load,
        'test': args.test
    }
    state['difficult_examples'] = True
    state['save_model_path'] = args.checkpoint
    state['workers'] = args.workers
    state['epoch_step'] = args.epoch_step
    state['lr'] = args.lr
    state['device_ids'] = args.device_ids
    if args.evaluate:
        state['evaluate'] = True
    if args.test:
        state['test'] = True

    if not args.test:
        #TODO: Make annotation paths more general
        train_dataset = DatasetLoader(
            args.data,
            img_set='train_symm',
            annotation=os.path.join(
                '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation',
                'symm_trainset_annotation.json'))

        if args.val_hnh:
            val_dataset = DatasetLoader_HNH(
                args.data,
                img_set='val',
                annotation=os.path.join(
                    '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation',
                    'i2l_valset_annotation.json'))
        else:
            val_dataset = DatasetLoader(
                args.data,
                img_set='val_coco',
                annotation=os.path.join(
                    '/srv/data1/ashishsingh/Half_and_Half_Data',
                    'valset_complete_metadata.json'))

        print("Initializing model: {}".format(args.arch))
        if args.pretrained:
            model = models.init_model(name=args.arch,
                                      num_classes=num_classes,
                                      pretrained='imagenet',
                                      use_gpu=use_gpu)
        else:
            model = models.init_model(name=args.arch,
                                      num_classes=num_classes,
                                      pretrained=None,
                                      use_gpu=use_gpu)
        print("Model size: {:.3f} M".format(count_num_param(model)))

        # define loss function (criterion)
        #criterion = nn.MultiLabelSoftMarginLoss()
        criterion = MultiLabelSoftmaxLoss()
        if args.val_hnh or args.test:
            criterion_val = nn.CrossEntropyLoss()
        else:
            criterion_val = nn.MultiLabelSoftMarginLoss()

        # define optimizer
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        #Validate using test-like objective
        if args.val_hnh:
            engine = SymmetricMultiLabelHNHEngine(state)
        else:
            engine = SymmetricMultiLabelMAPEngine(state)

        engine.learning(model, criterion, criterion_val, train_dataset,
                        val_dataset, optimizer)

    else:
        test_dataset = DatasetLoader_HNH(
            args.data,
            img_set='test_cleaned',
            annotation=os.path.join(
                '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation',
                'i2l_testset_annotation.json'))

        print("Initializing model: {}".format(args.arch))
        model = models.init_model(name=args.arch,
                                  num_classes=num_classes,
                                  pretrained=None,
                                  use_gpu=use_gpu)
        criterion_test = nn.CrossEntropyLoss()
        engine = SymmetricMultiLabelHNHEngine(state)
        engine.test(model, criterion_test, test_dataset)
Пример #18
0
def test_DataLoader_init(get_fake_dataset):
    fake_dataset = get_fake_dataset
    dataset = DatasetLoader(fake_dataset)

    if not dataset.current_task == 0:
        raise AssertionError("Test fail")
Пример #19
0
    def load_data(self, data_param, verbose=False):
        # load data
        if data_param['dataset'] == 'old':
            data_path = data_param['data_path'] + data_param['phase'] + '/'
        else:
            data_path = data_param['data_path']
        data = DatasetLoader(data_path=data_path,
                             n_sites=data_param['n_sites'],
                             train_size=data_param['train_size'],
                             val_size=data_param['val_size'],
                             test_size=data_param['test_size'])

        # normalization
        data.normalization(x=data_param['normalize_input'],
                           y=data_param['normalize_output'])

        # convert to torch tensor
        data.torch_tensor(device=data_param['device'])

        # print out
        statistics = \
        """
==========================================================================
Action: load data.
Time: %s
Task Id: %s

Training Inputs:    %s
Training Outputs:   %s
Validation Inputs:  %s
Validation Outputs: %s
Test Inputs:  %s
Test Outputs: %s

X Scaler: %s
Y Scaler: %s
Device:   %s
        Status: Successful
--------------------------------------------------------------------------
            
        """ % \
        (
            self.str_now(),
            self.current_hash,
            data.X_train.shape,
            data.y_train.shape,
            data.X_val.shape,
            data.y_val.shape,
            data.X_test.shape,
            data.y_test.shape,
            data.std_scaler_x,
            data.std_scaler_y,
            data_param['device']
        )

        if verbose:
            print(statistics)

        # write log file
        log_file = open(self.path_log + 'log_' + self.current_hash + '.txt',
                        "a")
        log_file.write(statistics)
        log_file.close()

        return data
Пример #20
0
Файл: train.py Проект: evu/VDCNN
flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
flags.DEFINE_integer("num_epochs", 100, "Number of training epochs")
flags.DEFINE_integer(
    "evaluate_every", 500,
    "Evaluate model on validation dataset after this many steps")
flags.DEFINE_float("lr", 0.001, "Learning rate")

FLAGS = flags.FLAGS
FLAGS(sys.argv)
print("\nParameters:")
print("-" * 20)
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr, value.value))
print("")

loader = DatasetLoader(sequence_max_length=FLAGS.sequence_length)


def preprocess():
    # Load data
    print("Loading data...")
    train_data, train_label, test_data, test_label = loader.load_dataset(
        dataset_path=FLAGS.dataset_path, dataset_type=FLAGS.dataset_type)
    print("Loading data succees...")

    # Preprocessing steps can go here

    return train_data, train_label, test_data, test_label


def train(x_train, y_train, x_test, y_test):