def log_metric(self, log_name, x, y=None, timestamp=None, dvc=False): if dvc: if y is not None: dvclive.log(log_name, y, x) else: dvclive.log(log_name, x) self.experiment.log_metric(log_name, x, y, timestamp)
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): if type(value) == np.float32: value = float(value) dvclive.log(metric, value) dvclive.next_step()
def main(): """Train model and evaluate on test data.""" torch.manual_seed(0) model = ConvNet() # Load model. if os.path.exists("model.pt"): model.load_state_dict(torch.load("model.pt")) # Load train and test data. mnist_train = torchvision.datasets.MNIST("data", download=True) x_train, y_train = transform(mnist_train) mnist_test = torchvision.datasets.MNIST("data", download=True, train=False) x_test, y_test = transform(mnist_test) try: # Iterate over training epochs. for i in range(1, EPOCHS+1): # Train in batches. train_loader = torch.utils.data.DataLoader( dataset=list(zip(x_train, y_train)), batch_size=512, shuffle=True) for x_batch, y_batch in train_loader: train(model, x_batch, y_batch) torch.save(model.state_dict(), "model.pt") # Evaluate and checkpoint. metrics = evaluate(model, x_test, y_test) for metric, value in metrics.items(): dvclive.log(metric, value) dvclive.next_step() except KeyboardInterrupt: pass
def __call__(self, env): for eval_result in env.evaluation_result_list: metric = eval_result[1] value = eval_result[2] dvclive.log(metric, value) if self.model_file: env.model.save_model(self.model_file) dvclive.next_step()
def after_iteration(self, model, epoch, evals_log): for key, values in evals_log[self._metric_data].items(): if values: latest_metric = values[-1] dvclive.log(key, latest_metric) if self.model_file: model.save_model(self.model_file) dvclive.next_step()
def after_epoch(self): for key, value in zip(self.learn.recorder.metric_names, self.learn.recorder.log): key = key.replace("_", "/") dvclive.log(f"{key}", float(value), self.learn.epoch) if self.model_file: self.learn.save(self.model_file) dvclive.next_step()
def test_log_reset_with_step_0(tmp_dir): for i in range(3): dvclive.log("train_m", 1, step=i) for i in range(3): dvclive.log("val_m", 1, step=i) assert read_history("dvclive", "train_m") == ([0, 1, 2], [1, 1, 1]) assert read_history("dvclive", "val_m") == ([0, 1, 2], [1, 1, 1])
def test_get_step_control_flow(tmp_dir): dvclive.init("logs") while dvclive.get_step() < 10: dvclive.log("i", dvclive.get_step()) dvclive.next_step() steps, values = read_history("logs", "i") assert steps == list(range(10)) assert values == [float(x) for x in range(10)]
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): dvclive.log(metric, value) if self.model_file: if self.save_weights_only: self.model.save_weights(self.model_file) else: self.model.save(self.model_file) dvclive.next_step()
def test_custom_steps(tmp_dir): dvclive.init("logs") steps = [0, 62, 1000] metrics = [0.9, 0.8, 0.7] for step, metric in zip(steps, metrics): dvclive.log("m", metric, step=step) assert read_history("logs", "m") == (steps, metrics)
def test_init_from_env(tmp_dir, summary, html, monkeypatch): monkeypatch.setenv(env.DVCLIVE_PATH, "logs") monkeypatch.setenv(env.DVCLIVE_SUMMARY, str(int(summary))) monkeypatch.setenv(env.DVCLIVE_HTML, str(int(html))) dvclive.log("m", 0.1) assert dvclive._metric_logger._path == "logs" assert dvclive._metric_logger._summary == summary assert dvclive._metric_logger._html == html
def test_html(tmp_dir, dvc_repo, html, signal_exists, monkeypatch): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) monkeypatch.setenv(env.DVCLIVE_PATH, "logs") monkeypatch.setenv(env.DVCLIVE_HTML, str(int(html))) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def test_logging(tmp_dir, summary): dvclive.init("logs", summary=summary) dvclive.log("m1", 1) assert (tmp_dir / "logs").is_dir() assert (tmp_dir / "logs" / "m1.tsv").is_file() assert not (tmp_dir / "logs.json").is_file() dvclive.next_step() assert (tmp_dir / "logs.json").is_file() == summary
def test_html(tmp_dir, dvc_repo, html, signal_exists): if dvc_repo: from dvc.repo import Repo Repo.init(no_scm=True) dvclive.init("logs", html=html) dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / ".dvc" / "tmp" / SIGNAL_FILE).is_file() == signal_exists
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): float_value = value if type(value) == np.float32: float_value = float(value) dvclive.log(metric, float_value) mem = psutil.virtual_memory().used / 8 / 1024 / 1024 / 1024 dvclive.log('memory_use_GB', mem) mlflow.log_metric('memory_use_GB', mem) dvclive.next_step()
def test_get_step_resume(tmp_dir): dvclive.init("logs") for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert dvclive.get_step() == 2 dvclive.init("logs", resume=True) assert dvclive.get_step() == 2 dvclive.init("logs", resume=False) assert dvclive.get_step() == 0
def on_epoch_end(self, runner) -> None: step = runner.stage_epoch_step for loader_key, per_loader_metrics in runner.epoch_metrics.items(): for key, value in per_loader_metrics.items(): key = key.replace("/", "_") dvclive.log(f"{loader_key}/{key}", float(value), step) if self.model_file: checkpoint = runner.engine.pack_checkpoint( model=runner.model, criterion=runner.criterion, optimizer=runner.optimizer, scheduler=runner.scheduler, ) runner.engine.save_checkpoint(checkpoint, self.model_file) dvclive.next_step()
def test_nested_logging(tmp_dir): dvclive.init("logs", summary=True) dvclive.log("train/m1", 1) dvclive.log("val/val_1/m1", 1) assert (tmp_dir / "logs").is_dir() assert (tmp_dir / "logs" / "train").is_dir() assert (tmp_dir / "logs" / "val" / "val_1").is_dir() assert (tmp_dir / "logs" / "train" / "m1.tsv").is_file() assert (tmp_dir / "logs" / "val" / "val_1" / "m1.tsv").is_file() dvclive.next_step() _, summary = read_logs("logs") assert summary["train"]["m1"] == 1 assert summary["val"]["val_1"]["m1"] == 1
def test_continue(tmp_dir, resume, steps, metrics): dvclive.init("logs") for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert read_history("logs", "metric") == ([0, 1], [0.9, 0.8]) assert read_latest("logs", "metric") == (1, 0.8) dvclive.init("logs", resume=resume) for new_metric in [0.7, 0.6]: dvclive.log("metric", new_metric) dvclive.next_step() assert read_history("logs", "metric") == (steps, metrics) assert read_latest("logs", "metric") == (last(steps), last(metrics))
def test_cleanup(tmp_dir, summary, html): dvclive.init("logs", summary=summary) dvclive.log("m1", 1) dvclive.next_step() if html: (tmp_dir / "logs.html").touch() (tmp_dir / "logs" / "some_user_file.txt").touch() assert (tmp_dir / "logs" / "m1.tsv").is_file() assert (tmp_dir / "logs.json").is_file() == summary assert (tmp_dir / "logs.html").is_file() == html dvclive.init("logs") assert (tmp_dir / "logs" / "some_user_file.txt").is_file() assert not (tmp_dir / "logs" / "m1.tsv").is_file() assert not (tmp_dir / "logs.json").is_file() assert not (tmp_dir / "logs.html").is_file()
def main(): """Train model and evaluate on test data.""" torch.manual_seed(0) model = ConvNet() # Load model. if os.path.exists("model.pt"): model.load_state_dict(torch.load("model.pt")) # Load train and test data. mnist_train = torchvision.datasets.MNIST("data", download=True) x_train, y_train = transform(mnist_train) mnist_test = torchvision.datasets.MNIST("data", download=True, train=False) x_test, y_test = transform(mnist_test) # Iterate over training epochs. for i in range(1, EPOCHS+1): train(model, x_train, y_train) torch.save(model.state_dict(), "model.pt") # Evaluate and checkpoint. metrics = evaluate(model, x_test, y_test) for metric, value in metrics.items(): dvclive.log(metric, value) dvclive.next_step()
def test_infer_next_step(tmp_dir, mocker): dvclive.init("logs") m = mocker.spy(dvclive.metrics.MetricLogger, "next_step") dvclive.log("m1", 1.0) dvclive.log("m1", 2.0) dvclive.log("m1", 3.0) assert read_history("logs", "m1") == ([0, 1, 2], [1.0, 2.0, 3.0]) assert m.call_count == 2
def test_get_step_custom_steps(tmp_dir): dvclive.init("logs") steps = [0, 62, 1000] metrics = [0.9, 0.8, 0.7] for step, metric in zip(steps, metrics): dvclive.log("x", metric, step=step) assert dvclive.get_step() == step dvclive.log("y", metric, step=step) assert dvclive.get_step() == step dvclive.log("z", metric) assert dvclive.get_step() == step for metric in ["x", "y", "z"]: assert read_history("logs", "x") == (steps, metrics)
collate_fn=collator) valid_loader = DataLoader(valid_subset, batch_size=256, collate_fn=collator) model = SimpleConv(n_channels=1, n_classes=1) criterion = torch.nn.BCEWithLogitsLoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=lr) for n in range(nepochs): global_loss = 0.0 model.train() for batch in train_loader: optimizer.zero_grad() y = model.forward(batch['light_curve']) loss = criterion(y.squeeze(1), batch['label'].float()) loss.backward() optimizer.step() global_loss += loss.item() dvclive.log('train/loss', global_loss / len(train_subset)) global_loss = 0.0 model.eval() for batch in valid_loader: y = model.forward(batch['light_curve']) loss = criterion(y.squeeze(1), batch['label'].float()) global_loss += loss.item() dvclive.log('valid/loss', global_loss / len(valid_subset)) dvclive.next_step() torch.save(model, model_path)
def test_no_init(tmp_dir): dvclive.log("m", 0.1) assert os.path.isdir("dvclive")
def test_fail_on_conflict(tmp_dir, monkeypatch): dvclive.init("some_dir") monkeypatch.setenv(env.DVCLIVE_PATH, "logs") with pytest.raises(ConfigMismatchError): dvclive.log("m", 0.1)
def test_invalid_metric_type(tmp_dir, invalid_type): with pytest.raises( InvalidMetricTypeError, match=f"Metrics 'm' has not supported type {type(invalid_type)}", ): dvclive.log("m", invalid_type)
def log(self, runner): tags = self.get_loggable_tags(runner) if tags: for k, v in tags.items(): step = self.get_iter(runner) dvclive.log(k, v, step=step)
def on_epoch_end(self, epoch: int, logs: dict = None): logs = logs or {} for metric, value in logs.items(): dvclive.log(metric, value) dvclive.next_step()