all_ids = folds[0] + folds[1] + folds[2] + folds[3] + folds[4] bad_ids = [] train_ids = [x for x in train_ids if x not in bad_ids] valid_ids = [x for x in valid_ids if x not in bad_ids] test_ids = np.arange(40669) test2_ids = np.arange(20522) train_data_iterator = data_iterators.DataGenerator( dataset='train-jpg', batch_size=chunk_size, img_ids=train_ids, p_transform=p_transform, data_prep_fun=data_prep_function_train, label_prep_fun=label_prep_function, rng=rng, full_batch=True, random=True, infinite=True) feat_data_iterator = data_iterators.DataGenerator( dataset='train-jpg', batch_size=chunk_size, img_ids=all_ids, p_transform=p_transform, data_prep_fun=data_prep_function_valid, label_prep_fun=label_prep_function, rng=rng, full_batch=False,
folds = app.make_stratified_split(no_folds=5) print len(folds) train_ids = folds[0] + folds[1] + folds[2] + folds[3] valid_ids = folds[4] bad_ids = [18772, 28173, 5023] train_ids = [x for x in train_ids if x not in bad_ids] valid_ids = [x for x in valid_ids if x not in bad_ids] train_data_iterator = data_iterators.DataGenerator(dataset='train', batch_size=chunk_size, img_ids = train_ids, p_transform=p_transform, data_prep_fun = data_prep_function_train, rng=rng, full_batch=True, random=True, infinite=True) valid_data_iterator = data_iterators.DataGenerator(dataset='train', batch_size=chunk_size, img_ids = valid_ids, p_transform=p_transform, data_prep_fun = data_prep_function_valid, rng=rng, full_batch=False, random=False, infinite=False) nchunks_per_epoch = train_data_iterator.nsamples / chunk_size max_nchunks = nchunks_per_epoch * 100