示例#1
0
def test_train_dataset():
    output = load_train_df(
        dataset_dir='/store/tellus/train',
        output='/store/tmp/train.pqt'
    )
    df = pd.read_parquet(output)
    dataset = TellusDataset(
        df=df,
        has_y=True,
    )
    assert len(dataset[0]) == 4
示例#2
0
def test_kfold():
    output = load_train_df(
        dataset_dir='/store/tellus/train',
        output='/store/tmp/train.pqt'
    )
    df = pd.read_parquet(output)
    sets = kfold(df, n_splits=10)
    for s in sets:
        assert pipe(
            s['train_pos'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 0),
            list,
            len
        ) == 0
        assert pipe(
            s['val_pos'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 0),
            list,
            len
        ) == 0
        assert pipe(
            s['train_neg'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 1),
            list,
            len
        ) == 0
        assert pipe(
            s['val_neg'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 1),
            list,
            len
        ) == 0
        assert len(s) == 4
示例#3
0
def test_aug(idx):
    output = load_train_df(
        dataset_dir='/store/tellus/train',
        output='/store/tellus/train.pqt'
    )
    df = pd.read_parquet(output)

    dataset = TellusDataset(
        df=df,
        has_y=True,
    )

    writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test')
    writer.add_image(
        f"palser/{dataset[idx]['id']}/{dataset[idx]['label']}",
        vutils.make_grid(
            pipe(range(1),
                 map(lambda x: dataset[idx]),
                 map(lambda x: [
                     x['palser_before'],
                     x['palser_after'],
                 ]),
                 concat,
                 list)
        ),
    )

    writer.add_image(
        f"landsat/{dataset[idx]['id']}/{dataset[idx]['label']}",
        vutils.make_grid(
            pipe(range(1),
                 map(lambda x: dataset[idx]),
                 map(lambda x: [
                     x['landsat_before'],
                     x['landsat_after']
                 ]),
                 concat,
                 list)
        ),
    )
示例#4
0
def test_test_dataset():
    output = load_train_df(dataset_dir='/store/tellus/train',
                           output='/store/tmp/train.pqt')
    df = pd.read_parquet(output)
    dataset = TellusDataset(
        df=df,
        has_y=True,
    )
    loader = DataLoader(
        dataset=dataset,
        batch_size=8,
        shuffle=False,
    )
    sample = pipe(
        loader,
        first,
    )
    aug = Augment()

    writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test/aug')
    writer.add_image(
        f"brightness/landsat",
        vutils.make_grid([
            *batch_aug(aug, sample['landsat'], ch=3)[:, 0:3, :, :],
            *batch_aug(aug, sample['landsat'], ch=3)[:, 3:6, :, :],
            *sample['landsat'][:, 0:3, :, :],
            *sample['landsat'][:, 3:6, :, :],
        ]),
    )

    writer.add_image(
        f"brightness/palsar",
        vutils.make_grid([
            *batch_aug(aug, sample['palsar'], ch=1)[:, 0:1, :, :],
            *batch_aug(aug, sample['palsar'], ch=1)[:, 1:2, :, :],
            *sample['palsar'][:, 0:1, :, :],
            *sample['palsar'][:, 1:2, :, :],
        ]),
    )
示例#5
0
def test_esampler():

    output = load_train_df(
        dataset_dir='/store/tellus/train',
        output='/store/tmp/train.pqt'
    )
    df = pd.read_parquet(output)
    dataset = TellusDataset(
        df=df,
        has_y=True,
    )
    subset = Subset(
        dataset,
        list(range(1500, 1600))
    )

    epoch_size = 10
    s = ChunkSampler(
        epoch_size=epoch_size,
        len_indices=len(subset),
        shuffle=True,
    )

    batch_size = 2
    train_loader = DataLoader(
        subset,
        sampler=s,
        batch_size=batch_size,
        pin_memory=True,
    )
    for i in range(11):
        samples = pipe(
            train_loader,
            map(lambda x: x['id']),
            filter(lambda x: len(x) == batch_size),
            list
        )
        assert len(samples) == epoch_size//batch_size