def test_report_metrics_to_others(self): """Check that tensorboard channel catches errors when model has Inf or NaN weights """ tensorboard_channel = TensorBoardChannel() # create simple model and optimizers model = FCModelWithNanAndInfWts() optimizer = optim.SGD(model.parameters(), lr=0.1) tensorboard_channel.report( stage=Stage.OTHERS, epoch=1, metrics=0.0, model_select_metric=0.0, loss=1.0, preds=[1], targets=[1], scores=[1], context={}, meta={}, model=model, optimizer=optimizer, log_gradient=False, gradients={}, )
def test(context, model_snapshot, test_path, use_cuda, use_tensorboard): """Test a trained model snapshot. If model-snapshot is provided, the models and configuration will then be loaded from the snapshot rather than any passed config file. Otherwise, a config file will be loaded. """ if model_snapshot: print(f"Loading model snapshot and config from {model_snapshot}") if use_cuda is None: raise Exception( "if --model-snapshot is set --use-cuda/--no-cuda must be set") else: print(f"No model snapshot provided, loading from config") config = context.obj.load_config() model_snapshot = config.save_snapshot_path use_cuda = config.use_cuda_if_available print(f"Configured model snapshot {model_snapshot}") print("\n=== Starting testing...") metric_channels = [] if config.use_tensorboard: metric_channels.append(TensorBoardChannel()) try: test_model_from_snapshot_path(model_snapshot, use_cuda, test_path, metric_channels) finally: for mc in metric_channels: mc.close()
def test(context, model_snapshot, test_path, use_cuda, use_tensorboard, field_names): """Test a trained model snapshot. If model-snapshot is provided, the models and configuration will then be loaded from the snapshot rather than any passed config file. Otherwise, a config file will be loaded. """ model_snapshot, use_cuda, use_tensorboard = _get_model_snapshot( context, model_snapshot, use_cuda, use_tensorboard ) print("\n=== Starting testing...") metric_channels = [] if use_tensorboard: metric_channels.append(TensorBoardChannel()) try: test_model_from_snapshot_path( model_snapshot, use_cuda, test_path, metric_channels, field_names=field_names, ) finally: for mc in metric_channels: mc.close()
def prepare_task( config: PyTextConfig, dist_init_url: str = None, device_id: int = 0, rank: int = 0, world_size: int = 1, summary_writer: Optional[SummaryWriter] = None, metadata: CommonMetadata = None, ) -> Task: if dist_init_url and world_size > 1: assert metadata is not None dist_init(rank, world_size, dist_init_url) print("\nParameters: {}\n".format(config)) _set_cuda(config.use_cuda_if_available, device_id, world_size) if config.load_snapshot_path and os.path.isfile(config.load_snapshot_path): task = load(config.load_snapshot_path) else: task = create_task(config.task, metadata=metadata) if summary_writer: task.metric_reporter.add_channel( TensorBoardChannel(summary_writer=summary_writer)) return task
def test_model_from_snapshot_path( snapshot_path: str, use_cuda_if_available: bool, test_path: Optional[str] = None, summary_writer: Optional[SummaryWriter] = None, ): _set_cuda(use_cuda_if_available) task, train_config = load(snapshot_path) if not test_path: test_path = train_config.task.data_handler.test_path if summary_writer: task.metric_reporter.add_channel( TensorBoardChannel(summary_writer=summary_writer)) return (task.test(test_path), train_config.task.metric_reporter.output_path)
def train(context): """Train a model and save the best snapshot.""" config = context.obj.load_config() print("\n===Starting training...") metric_channels = [] if config.use_tensorboard: metric_channels.append(TensorBoardChannel()) try: if config.distributed_world_size == 1: train_model(config, metric_channels=metric_channels) else: train_model_distributed(config, metric_channels) print("\n=== Starting testing...") test_model_from_snapshot_path( config.save_snapshot_path, config.use_cuda_if_available, test_path=None, metric_channels=metric_channels, ) finally: for mc in metric_channels: mc.close()
def from_config(cls, config, tensorizers): return MyTaggingMetricReporter( channels=[ConsoleChannel(), TensorBoardChannel()], label_names=tensorizers["slots"].vocab, )
def from_config0(cls, config, vocab): return MyTaggingMetricReporter( channels=[ConsoleChannel(), TensorBoardChannel()], label_names=vocab)
def create_metric_reporter(cls, config, tensorizers): return MyTaggingMetricReporter( channels=[ConsoleChannel(), TensorBoardChannel()], label_names=list(tensorizers["slots"].vocab), )