def test_faster_rcnn_train_all(mock_loss, mock_train_one_epoch, config, dataset): """test train on all epochs.""" loss_val = 0.1 mock_loss.return_value = loss_val log_dir = os.path.join(tmp_name, "train") if not os.path.exists(log_dir): os.mkdir(log_dir) writer = MagicMock() # XXX This is just a hot fix to prevent a mysterious folder such as: # <MagicMock name='mock.logdir' id='140420520377936'> showed up after # running this test. writer.logdir = tmp_name kfp_writer = MagicMock() checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, checkpoint_dir=log_dir, distributed=False, ) estimator = FasterRCNN( config=config, writer=writer, checkpointer=checkpointer, kfp_writer=kfp_writer, logdir="/tmp", ) estimator.writer = writer estimator.kfp_writer = kfp_writer estimator.checkpointer = checkpointer estimator.device = torch.device("cpu") checkpointer.save = MagicMock() train_dataset = dataset val_dataset = dataset label_mappings = train_dataset.label_mappings is_distributed = False train_sampler = FasterRCNN.create_sampler(is_distributed=is_distributed, dataset=train_dataset, is_train=True) val_sampler = FasterRCNN.create_sampler(is_distributed=is_distributed, dataset=val_dataset, is_train=False) train_loader = dataloader_creator(config, train_dataset, train_sampler, TRAIN, is_distributed) val_loader = dataloader_creator(config, val_dataset, val_sampler, VAL, is_distributed) epoch = 0 estimator.train_loop( train_dataloader=train_loader, label_mappings=label_mappings, val_dataloader=val_loader, train_sampler=train_sampler, ) writer.add_scalar.assert_called_with("val/loss", loss_val, epoch) mock_train_one_epoch.assert_called_once()
def test_faster_rcnn_save(config): """test save model.""" log_dir = tmp_name + "/train/" kfp_writer = MagicMock() writer = MagicMock() # XXX This is just a hot fix to prevent a mysterious folder such as: # <MagicMock name='mock.logdir' id='140420520377936'> showed up after # running this test. writer.logdir = tmp_name checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, checkpoint_dir=log_dir, distributed=False, ) estimator = FasterRCNN( config=config, writer=writer, checkpointer=checkpointer, kfp_writer=kfp_writer, logdir="/tmp", ) estimator.writer = writer estimator.kfp_writer = kfp_writer estimator.checkpointer = checkpointer estimator.device = torch.device("cpu") estimator.save(log_dir + "FasterRCNN.estimator") assert any([ name.startswith("FasterRCNN.estimator") for name in os.listdir(log_dir) ])
def test_faster_rcnn_load(config): """test load model.""" ckpt_dir = tmp_name + "/train/FasterRCNN.estimator" config.checkpoint_file = ckpt_dir log_dir = tmp_name + "/load/" config.logdir = log_dir kfp_writer = MagicMock() writer = SummaryWriter(config.logdir, write_to_disk=True) checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, checkpoint_dir=log_dir, distributed=False, ) estimator = FasterRCNN( config=config, writer=writer, checkpointer=checkpointer, kfp_writer=kfp_writer, logdir="/tmp", ) estimator.writer = writer estimator.kfp_writer = kfp_writer estimator.checkpointer = checkpointer estimator.device = torch.device("cpu") estimator.load(ckpt_dir) assert os.listdir(log_dir)[0].startswith("events.out.tfevents")
def test_faster_rcnn_predict(mock_create, config, dataset): """test predict.""" mock_create.return_value = dataset ckpt_dir = tmp_name + "/train/FasterRCNN.estimator" config.checkpoint_file = ckpt_dir kfp_writer = MagicMock() writer = MagicMock() checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, log_dir=config.system.logdir, distributed=config.system["distributed"], ) estimator = FasterRCNN( config=config, writer=writer, device=torch.device("cpu"), checkpointer=checkpointer, kfp_writer=kfp_writer, ) image_size = (256, 256) image = Image.fromarray(np.random.random(image_size), "L") image = torchvision.transforms.functional.to_tensor(image) result = estimator.predict(image) assert result == []
def test_create_writer_when_checkpoint_dir_gcs(): mock_gcs_writer = Mock() with patch( "datasetinsights.io.checkpoint.GCSEstimatorWriter", MagicMock(return_value=mock_gcs_writer), ): writer = EstimatorCheckpoint._create_writer("gs://abucket/path", "def") assert writer == mock_gcs_writer
def test_create_writer(): mock_local_writer = Mock() with patch( "datasetinsights.io.checkpoint.LocalEstimatorWriter", MagicMock(return_value=mock_local_writer), ): writer = EstimatorCheckpoint._create_writer("/path/to/folder", "abc") assert writer == mock_local_writer mock_gcs_writer = Mock() with patch( "datasetinsights.io.checkpoint.GCSEstimatorWriter", MagicMock(return_value=mock_gcs_writer), ): writer = EstimatorCheckpoint._create_writer("gs://abucket/path", "def") assert writer == mock_gcs_writer
def test_create_writer_when_checkpoint_dir_local(): mock_local_writer = Mock() with patch( "datasetinsights.io.checkpoint.LocalEstimatorWriter", MagicMock(return_value=mock_local_writer), ): with tempfile.TemporaryDirectory() as tmp: writer = EstimatorCheckpoint._create_writer(tmp, "abc") assert writer == mock_local_writer
def test_create_writer_when_checkpoint_dir_none(): mock_local_writer = Mock() with patch( "datasetinsights.io.checkpoint.LocalEstimatorWriter", MagicMock(return_value=mock_local_writer), ): writer = EstimatorCheckpoint._create_writer( checkpoint_dir=None, estimator_name="abc" ) assert writer == mock_local_writer
def test_faster_rcnn_train_all( mock_create, mock_loss, mock_train_one_epoch, config, dataset ): """test train on all epochs.""" loss_val = 0.1 mock_create.return_value = dataset mock_loss.return_value = loss_val log_dir = tmp_name + "/train/" config.system.logdir = log_dir writer = MagicMock() kfp_writer = MagicMock() checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, log_dir=log_dir, distributed=config.system["distributed"], ) estimator = FasterRCNN( config=config, writer=writer, device=torch.device("cpu"), checkpointer=checkpointer, kfp_writer=kfp_writer, ) train_dataset = create_dataset(config, TRAIN) val_dataset = create_dataset(config, VAL) label_mappings = train_dataset.label_mappings is_distributed = config.system.distributed train_sampler = FasterRCNN.create_sampler( is_distributed=is_distributed, dataset=train_dataset, is_train=True ) val_sampler = FasterRCNN.create_sampler( is_distributed=is_distributed, dataset=val_dataset, is_train=False ) train_loader = dataloader_creator( config, train_dataset, train_sampler, TRAIN ) val_loader = dataloader_creator(config, val_dataset, val_sampler, VAL) epoch = 0 estimator.train_loop( train_dataloader=train_loader, label_mappings=label_mappings, val_dataloader=val_loader, train_sampler=train_sampler, ) writer.add_scalar.assert_called_with("val/loss", loss_val, epoch) mock_train_one_epoch.assert_called_once()
def create_estimator( name, config, *, tb_log_dir=None, no_cuda=None, checkpoint_dir=None, kfp_metrics_dir=const.DEFAULT_KFP_METRICS_DIR, kfp_metrics_filename=const.DEFAULT_KFP_METRICS_FILENAME, no_val=None, **kwargs, ): """Create a new instance of the estimators subclass Args: name (str): unique identifier for a estimators subclass config (dict): parameters specific to each estimators subclass used to create a estimators instance Returns: an instance of the specified estimators subclass """ estimators_cls = _find_estimator(name) # todo this makes it so that we lose the tensorboard # writer of non-master processes which could make debugging harder writer = SummaryWriter(tb_log_dir) kfp_writer = KubeflowPipelineWriter( filename=kfp_metrics_filename, filepath=kfp_metrics_dir, ) checkpointer = EstimatorCheckpoint( estimator_name=name, checkpoint_dir=checkpoint_dir, distributed=False, ) return estimators_cls( config=config, writer=writer, kfp_writer=kfp_writer, checkpointer=checkpointer, logdir=tb_log_dir, no_cuda=no_cuda, no_val=no_val, kfp_metrics_dir=kfp_metrics_dir, kfp_metrics_filename=kfp_metrics_filename, **kwargs, )
def test_get_loader_from_path(): loader = EstimatorCheckpoint._get_loader_from_path("gs://some/path") assert loader == load_from_gcs loader = EstimatorCheckpoint._get_loader_from_path("http://some/path") assert loader == load_from_http loader = EstimatorCheckpoint._get_loader_from_path("https://some/path") assert loader == load_from_http loader = EstimatorCheckpoint._get_loader_from_path("/path/to/folder") assert loader == load_local with pytest.raises(ValueError, match=r"Given path:"): EstimatorCheckpoint._get_loader_from_path("dfdge")
def test_faster_rcnn_load(mock_create, config, dataset): """test load model.""" mock_create.return_value = dataset ckpt_dir = tmp_name + "/train/FasterRCNN.estimator" config.checkpoint_file = ckpt_dir log_dir = tmp_name + "/load/" config.system.logdir = log_dir kfp_writer = MagicMock() writer = SummaryWriter(config.system.logdir, write_to_disk=True) checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, log_dir=log_dir, distributed=config.system["distributed"], ) estimator = FasterRCNN( config=config, writer=writer, device=torch.device("cpu"), checkpointer=checkpointer, kfp_writer=kfp_writer, ) estimator.load(ckpt_dir) assert os.listdir(log_dir)[0].startswith("events.out.tfevents")
def test_faster_rcnn_predict(config, dataset): """test predict.""" checkpoint_file = tmp_name + "/train/FasterRCNN.estimator" kfp_writer = MagicMock() writer = MagicMock() # XXX This is just a hot fix to prevent a mysterious folder such as: # <MagicMock name='mock.logdir' id='140420520377936'> showed up after # running this test. writer.logdir = tmp_name checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, checkpoint_dir="/tmp", distributed=False, ) estimator = FasterRCNN( config=config, writer=writer, checkpointer=checkpointer, kfp_writer=kfp_writer, checkpoint_file=checkpoint_file, logdir="/tmp", no_cuda=True, ) estimator.writer = writer estimator.kfp_writer = kfp_writer estimator.checkpointer = checkpointer estimator.device = torch.device("cpu") image_size = (256, 256) image = Image.fromarray(np.random.random(image_size), "L") result = estimator.predict(image) assert result == []
def test_faster_rcnn_save(mock_create, config, dataset): """test save model.""" mock_create.return_value = dataset log_dir = tmp_name + "/test_save/" config.system.logdir = log_dir kfp_writer = MagicMock() writer = MagicMock() checkpointer = EstimatorCheckpoint( estimator_name=config.estimator, log_dir=log_dir, distributed=config.system["distributed"], ) estimator = FasterRCNN( config=config, writer=writer, device=torch.device("cpu"), checkpointer=checkpointer, kfp_writer=kfp_writer, ) estimator.save(log_dir + "FasterRCNN_test") assert any( [name.startswith("FasterRCNN_test") for name in os.listdir(log_dir)] )
def test_get_gcs_loader_from_path(): loader = EstimatorCheckpoint._get_loader_from_path("gs://some/path") assert loader == load_from_gcs
def test_get_loader_raises_error(): filepath = "some/wrong/path" with pytest.raises(ValueError, match=r"Given path:"): EstimatorCheckpoint._get_loader_from_path(filepath)
def test_create_raises_value_error(): incorrect_checkpoint_dir = "http://some/path" with pytest.raises(ValueError): EstimatorCheckpoint._create_writer(incorrect_checkpoint_dir, "abc")
def test_get_http_loader_from_path(filepath): loader = EstimatorCheckpoint._get_loader_from_path(filepath) assert loader == load_from_http
def test_get_local_loader_from_path(): file_name = "FasterRCNN.estimator" with tempfile.TemporaryDirectory() as tmp: with open(os.path.join(tmp, file_name), "w") as f: loader = EstimatorCheckpoint._get_loader_from_path(f.name) assert loader == load_local
def run(command, cfg): if cfg.system.verbose: root_logger = logging.getLogger() root_logger.setLevel(logging.DEBUG) logger.info("Run command: %s with config: %s\n", command, cfg) if torch.cuda.is_available() and not cfg.system.no_cuda: device = torch.device("cuda") else: device = torch.device("cpu") logdir = cfg.system.logdir if logdir == const.NULL_STRING: # Use logdir=None to force using SummaryWriter default logdir, # which points to ./runs/<model>_<timestamp> logdir = None # todo this makes it so that we lose the tensorboard writer of non-master # processes which could make debugging harder writer = SummaryWriter(logdir, write_to_disk=is_master()) kfp_writer = KubeflowPipelineWriter(filename=cfg.system.metricsfilename, filepath=cfg.system.metricsdir) checkpointer = EstimatorCheckpoint( estimator_name=cfg.estimator, log_dir=writer.logdir, distributed=cfg.system.distributed, ) estimator = Estimator.create( cfg.estimator, config=cfg, writer=writer, kfp_writer=kfp_writer, device=device, checkpointer=checkpointer, gpu=args.gpu, rank=args.rank, ) if command == "train": estimator.train() elif command == "evaluate": estimator.evaluate() elif command == "download-train": # TODO (YC) # We should remove reference to auth-token in various places to # enable download synthetic dataset. Usim is working on a solution # that will enable customers to sprcify cloud storage path # to store simulations. In the future, we should simply rely # on gcs service accounts to access simulation data for a given # run execution id. Dataset.create( cfg.train.dataset.name, data_root=cfg.system.data_root, auth_token=cfg.system.auth_token, # XXX(YC) This should be removed **cfg.train.dataset.args, ) Dataset.create( cfg.val.dataset.name, data_root=cfg.system.data_root, auth_token=cfg.system.auth_token, # XXX(YC) This should be removed **cfg.val.dataset.args, ) elif command == "download-evaluate": Dataset.create( cfg.test.dataset.name, data_root=cfg.system.data_root, auth_token=cfg.system.auth_token, # XXX(YC) This should be removed **cfg.test.dataset.args, ) writer.close() kfp_writer.write_metric()