def test_train_dataset(): output = load_train_df( dataset_dir='/store/tellus/train', output='/store/tmp/train.pqt' ) df = pd.read_parquet(output) dataset = TellusDataset( df=df, has_y=True, ) assert len(dataset[0]) == 4
def test_kfold(): output = load_train_df( dataset_dir='/store/tellus/train', output='/store/tmp/train.pqt' ) df = pd.read_parquet(output) sets = kfold(df, n_splits=10) for s in sets: assert pipe( s['train_pos'], take(100), map(lambda x: x['label']), filter(lambda x: x == 0), list, len ) == 0 assert pipe( s['val_pos'], take(100), map(lambda x: x['label']), filter(lambda x: x == 0), list, len ) == 0 assert pipe( s['train_neg'], take(100), map(lambda x: x['label']), filter(lambda x: x == 1), list, len ) == 0 assert pipe( s['val_neg'], take(100), map(lambda x: x['label']), filter(lambda x: x == 1), list, len ) == 0 assert len(s) == 4
def test_aug(idx): output = load_train_df( dataset_dir='/store/tellus/train', output='/store/tellus/train.pqt' ) df = pd.read_parquet(output) dataset = TellusDataset( df=df, has_y=True, ) writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test') writer.add_image( f"palser/{dataset[idx]['id']}/{dataset[idx]['label']}", vutils.make_grid( pipe(range(1), map(lambda x: dataset[idx]), map(lambda x: [ x['palser_before'], x['palser_after'], ]), concat, list) ), ) writer.add_image( f"landsat/{dataset[idx]['id']}/{dataset[idx]['label']}", vutils.make_grid( pipe(range(1), map(lambda x: dataset[idx]), map(lambda x: [ x['landsat_before'], x['landsat_after'] ]), concat, list) ), )
def test_test_dataset(): output = load_train_df(dataset_dir='/store/tellus/train', output='/store/tmp/train.pqt') df = pd.read_parquet(output) dataset = TellusDataset( df=df, has_y=True, ) loader = DataLoader( dataset=dataset, batch_size=8, shuffle=False, ) sample = pipe( loader, first, ) aug = Augment() writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test/aug') writer.add_image( f"brightness/landsat", vutils.make_grid([ *batch_aug(aug, sample['landsat'], ch=3)[:, 0:3, :, :], *batch_aug(aug, sample['landsat'], ch=3)[:, 3:6, :, :], *sample['landsat'][:, 0:3, :, :], *sample['landsat'][:, 3:6, :, :], ]), ) writer.add_image( f"brightness/palsar", vutils.make_grid([ *batch_aug(aug, sample['palsar'], ch=1)[:, 0:1, :, :], *batch_aug(aug, sample['palsar'], ch=1)[:, 1:2, :, :], *sample['palsar'][:, 0:1, :, :], *sample['palsar'][:, 1:2, :, :], ]), )
def test_esampler(): output = load_train_df( dataset_dir='/store/tellus/train', output='/store/tmp/train.pqt' ) df = pd.read_parquet(output) dataset = TellusDataset( df=df, has_y=True, ) subset = Subset( dataset, list(range(1500, 1600)) ) epoch_size = 10 s = ChunkSampler( epoch_size=epoch_size, len_indices=len(subset), shuffle=True, ) batch_size = 2 train_loader = DataLoader( subset, sampler=s, batch_size=batch_size, pin_memory=True, ) for i in range(11): samples = pipe( train_loader, map(lambda x: x['id']), filter(lambda x: len(x) == batch_size), list ) assert len(samples) == epoch_size//batch_size