def _test_learn2learning_training_strategies(gpus, accelerator, training_strategy, tmpdir):
    train_dir = Path(tmpdir / "train")
    train_dir.mkdir()

    (train_dir / "a").mkdir()
    pa_1 = train_dir / "a" / "1.png"
    pa_2 = train_dir / "a" / "2.png"
    pb_1 = train_dir / "b" / "1.png"
    pb_2 = train_dir / "b" / "2.png"
    image_size = (96, 96)
    _rand_image(image_size).save(pa_1)
    _rand_image(image_size).save(pa_2)

    (train_dir / "b").mkdir()
    _rand_image(image_size).save(pb_1)
    _rand_image(image_size).save(pb_2)

    n = 5

    dm = ImageClassificationData.from_files(
        train_files=[str(pa_1)] * n + [str(pa_2)] * n + [str(pb_1)] * n + [str(pb_2)] * n,
        train_targets=[0] * n + [1] * n + [2] * n + [3] * n,
        batch_size=1,
        num_workers=0,
        transform_kwargs=dict(image_size=image_size),
    )

    model = ImageClassifier(
        backbone="resnet18",
        training_strategy=training_strategy,
        training_strategy_kwargs={"ways": dm.num_classes, "shots": 4, "meta_batch_size": 4},
    )

    trainer = Trainer(fast_dev_run=2, gpus=gpus, accelerator=accelerator)
    trainer.fit(model, datamodule=dm)
示例#2
0
def test_saving_with_serializers(tmpdir):

    checkpoint_file = os.path.join(tmpdir, 'tmp.ckpt')

    class CustomModel(Task):
        def __init__(self):
            super().__init__(model=torch.nn.Linear(1, 1),
                             loss_fn=torch.nn.MSELoss())

    serializer = Labels(["a", "b"])
    model = CustomModel()
    trainer = Trainer(fast_dev_run=True)
    data_pipeline = DataPipeline(DefaultPreprocess(), serializer=serializer)
    data_pipeline.initialize()
    model.data_pipeline = data_pipeline
    assert isinstance(model.preprocess, DefaultPreprocess)
    dummy_data = DataLoader(
        list(
            zip(torch.arange(10, dtype=torch.float),
                torch.arange(10, dtype=torch.float))))
    trainer.fit(model, train_dataloader=dummy_data)
    trainer.save_checkpoint(checkpoint_file)
    model = CustomModel.load_from_checkpoint(checkpoint_file)
    assert isinstance(model.preprocess._data_pipeline_state, DataPipelineState)
    assert model.preprocess._data_pipeline_state._state[
        ClassificationState] == ClassificationState(['a', 'b'])
def test_default_strategies(tmpdir):
    num_classes = 10
    ds = DummyDataset()
    model = ImageClassifier(num_classes, backbone="resnet50")

    trainer = Trainer(fast_dev_run=2)
    trainer.fit(model, train_dataloader=DataLoader(ds))
示例#4
0
def test_pointcloud_object_detection_data(tmpdir):

    seed_everything(52)

    download_data("https://pl-flash-data.s3.amazonaws.com/KITTI_micro.zip",
                  tmpdir)

    dm = PointCloudObjectDetectorData.from_folders(
        train_folder=join(tmpdir, "KITTI_Micro", "Kitti", "train"))

    class MockModel(PointCloudObjectDetector):
        def training_step(self, batch, batch_idx: int):
            assert isinstance(batch, ObjectDetectBatchCollator)
            assert len(batch.point) == 2
            assert batch.point[0][1].shape == torch.Size([4])
            assert len(batch.bboxes) > 1
            assert batch.attr[0]["name"] in ("000000.bin", "000001.bin")
            assert batch.attr[1]["name"] in ("000000.bin", "000001.bin")

    num_classes = 19
    model = MockModel(backbone="pointpillars_kitti", num_classes=num_classes)
    trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0)
    trainer.fit(model, dm)

    predict_path = join(tmpdir, "KITTI_Micro", "Kitti", "predict")
    model.eval()

    predictions = model.predict([join(predict_path, "scans/000000.bin")])
    assert predictions[0][DefaultDataKeys.INPUT].shape[1] == 4
    assert len(predictions[0][DefaultDataKeys.PREDS]) == 158
示例#5
0
def test_train(tmpdir):
    """Tests that the model can be trained on our ``DummyDataset``."""
    model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features,
                                      num_classes=DummyDataset.num_classes)
    train_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=4)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
示例#6
0
def test_datapipeline_transformations_overridden_by_task():
    # define input transforms
    class ImageInput(Input):
        def load_data(self, folder):
            # from folder -> return files paths
            return ["a.jpg", "b.jpg"]

        def load_sample(self, path):
            # from a file path, load the associated image
            return np.random.uniform(0, 1, (64, 64, 3))

    class ImageClassificationInputTransform(InputTransform):
        def per_sample_transform(self) -> Callable:
            return T.Compose([T.ToTensor()])

        def per_batch_transform_on_device(self) -> Callable:
            return T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    class OverrideInputTransform(InputTransform):
        def per_sample_transform(self) -> Callable:
            return T.Compose([T.ToTensor(), T.Resize(128)])

    # define task which overrides transforms using set_state
    class CustomModel(Task):
        def __init__(self):
            super().__init__(model=torch.nn.Linear(1, 1),
                             loss_fn=torch.nn.MSELoss())

            # override default transform to resize images
            self.input_transform = OverrideInputTransform

        def training_step(self, batch, batch_idx):
            assert batch.shape == torch.Size([2, 3, 128, 128])
            assert torch.max(batch) <= 1.0
            assert torch.min(batch) >= 0.0

        def validation_step(self, batch, batch_idx):
            assert batch.shape == torch.Size([2, 3, 128, 128])
            assert torch.max(batch) <= 1.0
            assert torch.min(batch) >= 0.0

    transform = ImageClassificationInputTransform()
    datamodule = DataModule(
        ImageInput(RunningStage.TRAINING, [1]),
        ImageInput(RunningStage.VALIDATING, [1]),
        transform=transform,
        batch_size=2,
        num_workers=0,
    )

    # call trainer
    model = CustomModel()
    trainer = Trainer(
        max_epochs=1,
        limit_train_batches=2,
        limit_val_batches=1,
        num_sanity_val_steps=1,
    )
    trainer.fit(model, datamodule=datamodule)
示例#7
0
def test_train(tmpdir):
    """Tests that the model can be trained on a pytorch geometric dataset."""
    tudataset = datasets.TUDataset(root=tmpdir, name="KKI")
    model = GraphClassifier(num_features=tudataset.num_features, num_classes=tudataset.num_classes)
    model.data_pipeline = DataPipeline(preprocess=GraphClassificationPreprocess())
    train_dl = torch.utils.data.DataLoader(tudataset, batch_size=4)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
示例#8
0
def test_task_fit(tmpdir: str):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10),
                          nn.LogSoftmax())
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    val_dl = torch.utils.data.DataLoader(DummyDataset())
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(task, train_dl, val_dl)
示例#9
0
def test_init_train(tmpdir):
    if os.name == "nt":
        # TODO: huggingface stuff timing out on windows
        return True
    model = TranslationTask(TEST_BACKBONE)
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
def test_trainer_fit(tmpdir, callbacks, should_warn):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax())
    train_dl = DataLoader(DummyDataset())
    val_dl = DataLoader(DummyDataset())
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir, callbacks=callbacks)

    if should_warn:
        with pytest.warns(UserWarning, match="trainer is using a fine-tuning callback"):
            trainer.fit(task, train_dl, val_dl)
    else:
        trainer.fit(task, train_dl, val_dl)
示例#11
0
def test_ort_callback_fails_no_model(tmpdir):
    model = BoringModel()
    trainer = Trainer(default_root_dir=tmpdir,
                      fast_dev_run=True,
                      callbacks=ORTCallback())
    with pytest.raises(MisconfigurationException,
                       match="Torch ORT requires to wrap a single model"):
        trainer.fit(
            model,
            train_dataloader=torch.utils.data.DataLoader(DummyDataset()),
            val_dataloaders=torch.utils.data.DataLoader(DummyDataset()),
        )
示例#12
0
def test_classification_json(tmpdir):
    json_path = json_data(tmpdir)

    data = SpeechRecognitionData.from_json(
        "file",
        "text",
        train_file=json_path,
        num_workers=0,
        batch_size=2,
    )
    model = SpeechRecognition(backbone=TEST_BACKBONE)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, datamodule=data)
示例#13
0
def test_transformations(tmpdir):

    transform = TestInputTransform()
    datamodule = DataModule(
        TestInput(RunningStage.TRAINING, [1]),
        TestInput(RunningStage.VALIDATING, [1]),
        TestInput(RunningStage.TESTING, [1]),
        transform=transform,
        batch_size=2,
        num_workers=0,
    )

    assert datamodule.train_dataloader().dataset[0] == (0, 1, 2, 3)
    batch = next(iter(datamodule.train_dataloader()))
    assert torch.equal(batch, torch.tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]]))

    assert datamodule.val_dataloader().dataset[0] == {"a": 0, "b": 1}
    assert datamodule.val_dataloader().dataset[1] == {"a": 1, "b": 2}
    batch = next(iter(datamodule.val_dataloader()))

    datamodule = DataModule(
        TestInput(RunningStage.TRAINING, [1]),
        TestInput(RunningStage.VALIDATING, [1]),
        TestInput(RunningStage.TESTING, [1]),
        transform=TestInputTransform2,
        batch_size=2,
        num_workers=0,
    )
    batch = next(iter(datamodule.val_dataloader()))
    assert torch.equal(batch["a"], torch.tensor([0, 1]))
    assert torch.equal(batch["b"], torch.tensor([1, 2]))

    model = CustomModel()
    trainer = Trainer(
        max_epochs=1,
        limit_train_batches=2,
        limit_val_batches=1,
        limit_test_batches=2,
        limit_predict_batches=2,
        num_sanity_val_steps=1,
    )
    trainer.fit(model, datamodule=datamodule)
    trainer.test(model, datamodule=datamodule)

    assert datamodule.input_transform.train_per_sample_transform_called
    assert datamodule.input_transform.train_collate_called
    assert datamodule.input_transform.train_per_batch_transform_on_device_called
    assert datamodule.input_transform.train_per_sample_transform_called
    assert datamodule.input_transform.val_collate_called
    assert datamodule.input_transform.val_per_batch_transform_on_device_called
    assert datamodule.input_transform.test_per_sample_transform_called
示例#14
0
def test_model(coco_instances, backbone, head):
    datamodule = InstanceSegmentationData.from_coco(
        train_folder=coco_instances.train_folder,
        train_ann_file=coco_instances.train_ann_file,
        predict_folder=coco_instances.predict_folder,
        transform_kwargs=dict(image_size=(128, 128)),
        batch_size=2,
    )

    assert datamodule.num_classes == 3
    assert datamodule.labels == ["background", "cat", "dog"]

    model = InstanceSegmentation(num_classes=datamodule.num_classes, backbone=backbone, head=head)
    trainer = Trainer(fast_dev_run=True)
    trainer.fit(model, datamodule=datamodule)
    trainer.predict(model, datamodule=datamodule)
示例#15
0
def test_init_train_enable_ort(tmpdir):
    class TestCallback(Callback):
        def on_train_start(self, trainer: Trainer,
                           pl_module: LightningModule) -> None:
            assert isinstance(pl_module.model, ORTModule)

    model = TextClassifier(2, TEST_BACKBONE, enable_ort=True)
    trainer = Trainer(default_root_dir=tmpdir,
                      fast_dev_run=True,
                      callbacks=TestCallback())
    trainer.fit(
        model,
        train_dataloader=torch.utils.data.DataLoader(DummyDataset()),
        val_dataloaders=torch.utils.data.DataLoader(DummyDataset()),
    )
    trainer.test(model,
                 test_dataloaders=torch.utils.data.DataLoader(DummyDataset()))
示例#16
0
def test_pointcloud_segmentation_data(tmpdir):

    seed_everything(52)

    download_data(
        "https://pl-flash-data.s3.amazonaws.com/SemanticKittiMicro.zip",
        tmpdir)

    datamodule = PointCloudSegmentationData.from_folders(
        train_folder=join(tmpdir, "SemanticKittiMicro", "train"),
        predict_folder=join(tmpdir, "SemanticKittiMicro", "predict"),
        batch_size=4,
    )

    class MockModel(PointCloudSegmentation):
        def training_step(self, batch, batch_idx: int):
            assert batch[DataKeys.INPUT]["xyz"][0].shape == torch.Size(
                [2, 45056, 3])
            assert batch[DataKeys.INPUT]["xyz"][1].shape == torch.Size(
                [2, 11264, 3])
            assert batch[DataKeys.INPUT]["xyz"][2].shape == torch.Size(
                [2, 2816, 3])
            assert batch[DataKeys.INPUT]["xyz"][3].shape == torch.Size(
                [2, 704, 3])
            assert batch[DataKeys.INPUT]["labels"].shape == torch.Size(
                [2, 45056])
            assert batch[DataKeys.INPUT]["labels"].max() == 19
            assert batch[DataKeys.INPUT]["labels"].min() == 0
            assert batch[DataKeys.METADATA][0]["name"] in ("00_000000",
                                                           "00_000001")
            assert batch[DataKeys.METADATA][1]["name"] in ("00_000000",
                                                           "00_000001")

    num_classes = 19
    model = MockModel(backbone="randlanet", num_classes=num_classes)
    trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0)
    trainer.fit(model, datamodule=datamodule)

    predictions = trainer.predict(model, datamodule=datamodule)[0]
    assert predictions[0][DataKeys.INPUT].shape == torch.Size([45056, 3])
    assert predictions[0][DataKeys.PREDS].shape == torch.Size([45056, 19])
    assert predictions[0][DataKeys.TARGET].shape == torch.Size([45056])
示例#17
0
def test_not_trainable(tmpdir):
    """Tests that the model gives an error when training, validating, or testing."""
    tudataset = datasets.TUDataset(root=tmpdir, name="KKI")
    model = GraphEmbedder(
        GraphClassifier(num_features=1, num_classes=1).backbone)
    datamodule = DataModule(
        GraphClassificationDatasetInput(RunningStage.TRAINING, tudataset),
        GraphClassificationDatasetInput(RunningStage.VALIDATING, tudataset),
        GraphClassificationDatasetInput(RunningStage.TESTING, tudataset),
        transform=GraphClassificationInputTransform,
        batch_size=4,
    )
    trainer = Trainer(default_root_dir=tmpdir, num_sanity_val_steps=0)
    with pytest.raises(NotImplementedError,
                       match="Training a `GraphEmbedder` is not supported."):
        trainer.fit(model, datamodule=datamodule)

    with pytest.raises(NotImplementedError,
                       match="Validating a `GraphEmbedder` is not supported."):
        trainer.validate(model, datamodule=datamodule)

    with pytest.raises(NotImplementedError,
                       match="Testing a `GraphEmbedder` is not supported."):
        trainer.test(model, datamodule=datamodule)
示例#18
0
def test_data_module():
    seed_everything(42)

    def train_fn(data):
        return data - 100

    def val_fn(data):
        return data + 100

    def test_fn(data):
        return data - 1000

    def predict_fn(data):
        return data + 1000

    @dataclass
    class TestTransform(InputTransform):
        def per_sample_transform(self):
            def fn(x):
                return x

            return fn

        def train_per_batch_transform_on_device(self) -> Callable:
            return train_fn

        def val_per_batch_transform_on_device(self) -> Callable:
            return val_fn

        def test_per_batch_transform_on_device(self) -> Callable:
            return test_fn

        def predict_per_batch_transform_on_device(self) -> Callable:
            return predict_fn

    transform = TestTransform()
    assert transform._transform is not None

    train_dataset = Input(RunningStage.TRAINING, np.arange(10,
                                                           dtype=np.float32))
    assert train_dataset.running_stage == RunningStage.TRAINING

    val_dataset = Input(RunningStage.VALIDATING, np.arange(10,
                                                           dtype=np.float32))
    assert val_dataset.running_stage == RunningStage.VALIDATING

    test_dataset = Input(RunningStage.TESTING, np.arange(10, dtype=np.float32))
    assert test_dataset.running_stage == RunningStage.TESTING

    predict_dataset = Input(RunningStage.PREDICTING,
                            np.arange(10, dtype=np.float32))
    assert predict_dataset.running_stage == RunningStage.PREDICTING

    dm = DataModule(
        train_input=train_dataset,
        val_input=val_dataset,
        test_input=test_dataset,
        predict_input=predict_dataset,
        transform=transform,
        batch_size=2,
    )
    assert len(dm.train_dataloader()) == 5
    batch = next(iter(dm.train_dataloader()))
    assert batch.shape == torch.Size([2])
    assert batch.min() >= 0 and batch.max() < 10

    assert len(dm.val_dataloader()) == 5
    batch = next(iter(dm.val_dataloader()))
    assert batch.shape == torch.Size([2])
    assert batch.min() >= 0 and batch.max() < 10

    class TestModel(Task):
        def training_step(self, batch, batch_idx):
            assert sum(batch < 0) == 2

        def validation_step(self, batch, batch_idx):
            assert sum(batch > 0) == 2

        def test_step(self, batch, batch_idx):
            assert sum(batch < 500) == 2

        def predict_step(self, batch, *args, **kwargs):
            assert sum(batch > 500) == 2
            assert torch.equal(batch, torch.tensor([1000.0, 1001.0]))

        def on_train_dataloader(self) -> None:
            pass

        def on_val_dataloader(self) -> None:
            pass

        def on_test_dataloader(self, *_) -> None:
            pass

        def on_predict_dataloader(self) -> None:
            pass

        def on_predict_end(self) -> None:
            pass

        def on_fit_end(self) -> None:
            pass

    model = TestModel(torch.nn.Linear(1, 1))
    trainer = Trainer(fast_dev_run=True)
    trainer.fit(model, datamodule=dm)
    trainer.validate(model, datamodule=dm)
    trainer.test(model, datamodule=dm)
    trainer.predict(model, datamodule=dm)

    # Test that plain lightning module works with FlashDataModule
    class SampleBoringModel(BoringModel):
        def __init__(self):
            super().__init__()
            self.layer = torch.nn.Linear(2, 1)

    model = SampleBoringModel()
    trainer = Trainer(fast_dev_run=True)
    trainer.fit(model, datamodule=dm)
    trainer.validate(model, datamodule=dm)
    trainer.test(model, datamodule=dm)
    trainer.predict(model, datamodule=dm)

    transform = TestTransform()
    input = Input(RunningStage.TRAINING)
    dm = DataModule(train_input=input, batch_size=1, transform=transform)
    assert isinstance(dm.input_transform, TestTransform)

    class RandomDataset(Dataset):
        def __init__(self, size: int, length: int):
            self.len = length
            self.data = torch.ones(length, size)

        def __getitem__(self, index):
            return self.data[index]

        def __len__(self):
            return self.len

    def _add_hundred(x):
        if isinstance(x, Dict):
            x["input"] += 100
        else:
            x += 100
        return x

    class TrainInputTransform(InputTransform):
        def _add_one(self, x):
            if isinstance(x, Dict):
                x["input"] += 1
            else:
                x += 1
            return x

        def per_sample_transform(self) -> Callable:
            return self._add_one

        def val_per_sample_transform(self) -> Callable:
            return _add_hundred

    dm = DataModule(
        train_input=DatasetInput(RunningStage.TRAINING, RandomDataset(64, 32)),
        val_input=DatasetInput(RunningStage.VALIDATING, RandomDataset(64, 32)),
        test_input=DatasetInput(RunningStage.TESTING, RandomDataset(64, 32)),
        batch_size=3,
        transform=TrainInputTransform(),
    )
    batch = next(iter(dm.train_dataloader()))
    assert batch["input"][0][0] == 2
    batch = next(iter(dm.val_dataloader()))
    assert batch["input"][0][0] == 101
    batch = next(iter(dm.test_dataloader()))
    assert batch["input"][0][0] == 2
def test_init_train(tmpdir):
    model = SummarizationTask(TEST_BACKBONE)
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
parser.add_argument("--submission_path", type=str, required=True)
parser.add_argument("--test_data_path", type=str, required=True)
parser.add_argument("--best_model_path", type=str, required=True)
# Optional
parser.add_argument("--backbone", type=str, default="resnet18")
parser.add_argument("--learning_rate", type=float, default=0.01)
args = parser.parse_args()

datamodule = ImageClassificationData.from_folders(
    train_folder=args.train_data_path,
    batch_size=8,
)

model = ImageClassifier(datamodule.num_classes, backbone=args.backbone)
trainer = Trainer(fast_dev_run=True)
trainer.fit(model, datamodule=datamodule)
trainer.save_checkpoint(args.best_model_path)

datamodule = ImageClassificationData.from_folders(
    predict_folder=args.test_data_path,
    batch_size=8,
)

predictions = Trainer().predict(model, datamodule=datamodule)
submission_data = [{
    "filename": os.path.basename(p["metadata"]["filepath"]),
    "label": torch.argmax(p["preds"]).item()
} for batch in predictions for p in batch]
df = pd.DataFrame(submission_data)
df.to_csv(args.submission_path, index=False)
示例#21
0
def test_init_train(tmpdir):
    model = SpeechRecognition(backbone=TEST_BACKBONE)
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
def test_init_train(tmpdir):
    model = TextClassifier(2, TEST_BACKBONE)
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)