def create_run(experiment, command_name, config_updates=None, log_level=None, named_configs=()): scaffolding = create_scaffolding(experiment) distribute_config_updates(scaffolding, config_updates) distribute_named_configs(scaffolding, named_configs) for scaffold in scaffolding.values(): scaffold.set_up_config() for scaffold in reversed(list(scaffolding.values())): scaffold.set_up_seed() # partially recursive config = get_configuration(scaffolding) config_modifications = get_config_modifications(scaffolding) experiment_info = experiment._get_info() host_info = get_host_info() main_function = get_command(scaffolding, command_name) logger = initialize_logging(experiment, scaffolding, log_level) run = Run(config, config_modifications, main_function, experiment.observers, logger, experiment_info, host_info) for scaffold in scaffolding.values(): scaffold.finalize_initialization(run=run) return run
def create_run(experiment, command_name, config_updates=None, named_configs=(), force=False): sorted_ingredients = gather_ingredients_topological(experiment) scaffolding = create_scaffolding(experiment, sorted_ingredients) # --------- configuration process ------------------- distribute_named_configs(scaffolding, named_configs) config_updates = config_updates or {} config_updates = convert_to_nested_dict(config_updates) root_logger, run_logger = initialize_logging(experiment, scaffolding) past_paths = set() for scaffold in scaffolding.values(): scaffold.pick_relevant_config_updates(config_updates, past_paths) past_paths.add(scaffold.path) scaffold.gather_fallbacks() scaffold.set_up_config() # update global config config = get_configuration(scaffolding) # run config hooks config_updates = scaffold.run_config_hooks(config, config_updates, command_name, run_logger) for scaffold in reversed(list(scaffolding.values())): scaffold.set_up_seed() # partially recursive config = get_configuration(scaffolding) config_modifications = get_config_modifications(scaffolding) # ---------------------------------------------------- experiment_info = experiment.get_experiment_info() host_info = get_host_info() main_function = get_command(scaffolding, command_name) pre_runs = [pr for ing in sorted_ingredients for pr in ing.pre_run_hooks] post_runs = [pr for ing in sorted_ingredients for pr in ing.post_run_hooks] run = Run(config, config_modifications, main_function, copy(experiment.observers), root_logger, run_logger, experiment_info, host_info, pre_runs, post_runs, experiment.captured_out_filter) if hasattr(main_function, 'unobserved'): run.unobserved = main_function.unobserved run.force = force for scaffold in scaffolding.values(): scaffold.finalize_initialization(run=run) return run
def create_run(experiment, command_name, config_updates=None, named_configs=(), force=False): sorted_ingredients = gather_ingredients_topological(experiment) scaffolding = create_scaffolding(experiment, sorted_ingredients) # --------- configuration process ------------------- distribute_named_configs(scaffolding, named_configs) config_updates = config_updates or {} config_updates = convert_to_nested_dict(config_updates) root_logger, run_logger = initialize_logging(experiment, scaffolding) past_paths = set() for scaffold in scaffolding.values(): scaffold.pick_relevant_config_updates(config_updates, past_paths) past_paths.add(scaffold.path) scaffold.gather_fallbacks() scaffold.set_up_config() # update global config config = get_configuration(scaffolding) # run config hooks config_updates = scaffold.run_config_hooks(config, config_updates, command_name, run_logger) for scaffold in reversed(list(scaffolding.values())): scaffold.set_up_seed() # partially recursive config = get_configuration(scaffolding) config_modifications = get_config_modifications(scaffolding) # ---------------------------------------------------- experiment_info = experiment.get_experiment_info() host_info = get_host_info() main_function = get_command(scaffolding, command_name) pre_runs = [pr for ing in sorted_ingredients for pr in ing.pre_run_hooks] post_runs = [pr for ing in sorted_ingredients for pr in ing.post_run_hooks] run = Run(config, config_modifications, main_function, experiment.observers, root_logger, run_logger, experiment_info, host_info, pre_runs, post_runs) if hasattr(main_function, 'unobserved'): run.unobserved = main_function.unobserved run.force = force for scaffold in scaffolding.values(): scaffold.finalize_initialization(run=run) return run
def test_host_info_decorator(): try: assert "greeting" not in host_info_gatherers @host_info_getter def greeting(): return "hello" assert "greeting" in host_info_gatherers assert host_info_gatherers["greeting"] == greeting assert get_host_info()["greeting"] == "hello" finally: del host_info_gatherers["greeting"]
def test_host_info_decorator(): try: assert 'greeting' not in host_info_gatherers @host_info_getter def greeting(): return "hello" assert 'greeting' in host_info_gatherers assert host_info_gatherers['greeting'] == greeting assert get_host_info()['greeting'] == 'hello' finally: del host_info_gatherers['greeting']
def test_host_info_decorator_with_name(): try: assert "foo" not in host_info_gatherers @host_info_getter(name="foo") def greeting(): return "hello" assert "foo" in host_info_gatherers assert "greeting" not in host_info_gatherers assert host_info_gatherers["foo"] == greeting assert get_host_info()["foo"] == "hello" finally: del host_info_gatherers["foo"]
def test_host_info_decorator_with_name(): try: assert 'foo' not in host_info_gatherers @host_info_getter(name='foo') def greeting(): return "hello" assert 'foo' in host_info_gatherers assert 'greeting' not in host_info_gatherers assert host_info_gatherers['foo'] == greeting assert get_host_info()['foo'] == 'hello' finally: del host_info_gatherers['foo']
def create_run(experiment, command_name, config_updates=None, named_configs=(), force=False, log_level=None): sorted_ingredients = gather_ingredients_topological(experiment) scaffolding = create_scaffolding(experiment, sorted_ingredients) # get all split non-empty prefixes sorted from deepest to shallowest prefixes = sorted([s.split('.') for s in scaffolding if s != ''], reverse=True, key=lambda p: len(p)) # --------- configuration process ------------------- # Phase 1: Config updates config_updates = config_updates or {} config_updates = convert_to_nested_dict(config_updates) root_logger, run_logger = initialize_logging(experiment, scaffolding, log_level) distribute_config_updates(prefixes, scaffolding, config_updates) # Phase 2: Named Configs for ncfg in named_configs: scaff, cfg_name = get_scaffolding_and_config_name(ncfg, scaffolding) scaff.gather_fallbacks() ncfg_updates = scaff.run_named_config(cfg_name) distribute_presets(prefixes, scaffolding, ncfg_updates) for ncfg_key, value in iterate_flattened(ncfg_updates): set_by_dotted_path(config_updates, join_paths(scaff.path, ncfg_key), value) distribute_config_updates(prefixes, scaffolding, config_updates) # Phase 3: Normal config scopes for scaffold in scaffolding.values(): scaffold.gather_fallbacks() scaffold.set_up_config() # update global config config = get_configuration(scaffolding) # run config hooks config_hook_updates = scaffold.run_config_hooks( config, command_name, run_logger) recursive_update(scaffold.config, config_hook_updates) # Phase 4: finalize seeding for scaffold in reversed(list(scaffolding.values())): scaffold.set_up_seed() # partially recursive config = get_configuration(scaffolding) config_modifications = get_config_modifications(scaffolding) # ---------------------------------------------------- experiment_info = experiment.get_experiment_info() host_info = get_host_info() main_function = get_command(scaffolding, command_name) pre_runs = [pr for ing in sorted_ingredients for pr in ing.pre_run_hooks] post_runs = [pr for ing in sorted_ingredients for pr in ing.post_run_hooks] run = Run(config, config_modifications, main_function, copy(experiment.observers), root_logger, run_logger, experiment_info, host_info, pre_runs, post_runs, experiment.captured_out_filter) if hasattr(main_function, 'unobserved'): run.unobserved = main_function.unobserved run.force = force for scaffold in scaffolding.values(): scaffold.finalize_initialization(run=run) return run
def test_get_host_info(): host_info = get_host_info() assert isinstance(host_info["hostname"], str) assert isinstance(host_info["cpu"], str) assert isinstance(host_info["os"], (tuple, list)) assert isinstance(host_info["python_version"], str)
def test_get_host_info(): host_info = get_host_info() assert isinstance(host_info['hostname'], basestring) assert isinstance(host_info['cpu'], basestring) assert isinstance(host_info['os'], (tuple, list)) assert isinstance(host_info['python_version'], basestring)
def sacred_main(_run: Run, seed, showoff, out_dir, batch_size, epochs, tags, model_desc, experiment_id, weights, train_examples, val_examples, deterministic, train_datasets, val_datasets, lr, lr_milestones, lr_gamma, optim_algorithm): seed_all(seed) init_algorithms(deterministic=deterministic) exp_out_dir = None if out_dir: exp_out_dir = path.join(out_dir, experiment_id) makedirs(exp_out_dir, exist_ok=True) print(f'Experiment ID: {experiment_id}') #### # Model #### if weights is None: model = create_model(model_desc) else: details = torch.load(weights) model_desc = details['model_desc'] model = create_model(model_desc) model.load_state_dict(details['state_dict']) model.to(global_opts['device']) print(json.dumps(model_desc, sort_keys=True, indent=2)) #### # Data #### train_loader = create_train_dataloader(train_datasets, model.data_specs, batch_size, train_examples) if len(val_datasets) > 0: val_loader = create_val_dataloader(val_datasets, model.data_specs, batch_size, val_examples) else: val_loader = None #### # Reporting #### reporter = Reporter(with_val=(val_loader is not None)) reporter.setup_console_output() reporter.setup_sacred_output(_run) notebook = None if showoff: title = '3D pose model ({}@{})'.format(model_desc['type'], model_desc['version']) notebook = create_showoff_notebook(title, tags) reporter.setup_showoff_output(notebook) def set_progress(value): if notebook is not None: notebook.set_progress(value) tel = reporter.telemetry tel['config'].set_value(_run.config) tel['host_info'].set_value(get_host_info()) #### # Optimiser #### if optim_algorithm == '1cycle': from torch import optim optimiser = optim.SGD(model.parameters(), lr=0) scheduler = make_1cycle(optimiser, epochs * len(train_loader), lr_max=lr, momentum=0.9) else: scheduler = learning_schedule(model.parameters(), optim_algorithm, lr, lr_milestones, lr_gamma) #### # Training #### model_file = None if exp_out_dir: model_file = path.join(exp_out_dir, 'model-latest.pth') with open(path.join(exp_out_dir, 'config.json'), 'w') as f: json.dump(tel['config'].value(), f, sort_keys=True, indent=2) for epoch in range(epochs): tel['epoch'].set_value(epoch) print('> Epoch {:3d}/{:3d}'.format(epoch + 1, epochs)) def on_train_progress(samples_processed): so_far = epoch * len(train_loader.dataset) + samples_processed total = epochs * len(train_loader.dataset) set_progress(so_far / total) do_training_pass(epoch, model, tel, train_loader, scheduler, on_train_progress) if val_loader: do_validation_pass(epoch, model, tel, val_loader) _run.result = tel['train_pck'].value()[0] if model_file is not None: state = { 'state_dict': model.state_dict(), 'model_desc': model_desc, 'train_datasets': train_datasets, 'optimizer': scheduler.optimizer.state_dict(), 'epoch': epoch + 1, } torch.save(state, model_file) tel.step() # Add the final model as a Sacred artifact if model_file is not None and path.isfile(model_file): _run.add_artifact(model_file) set_progress(1.0) return _run.result
def create_run(experiment, command_name, config_updates=None, named_configs=(), force=False): sorted_ingredients = gather_ingredients_topological(experiment) scaffolding = create_scaffolding(experiment, sorted_ingredients) # get all split non-empty prefixes sorted from deepest to shallowest prefixes = sorted([s.split('.') for s in scaffolding if s != ''], reverse=True, key=lambda p: len(p)) # --------- configuration process ------------------- # Phase 1: Config updates config_updates = config_updates or {} config_updates = convert_to_nested_dict(config_updates) root_logger, run_logger = initialize_logging(experiment, scaffolding) distribute_config_updates(prefixes, scaffolding, config_updates) # Phase 2: Named Configs for ncfg in named_configs: scaff, cfg_name = get_scaffolding_and_config_name(ncfg, scaffolding) scaff.gather_fallbacks() ncfg_updates = scaff.run_named_config(cfg_name) distribute_presets(prefixes, scaffolding, ncfg_updates) for ncfg_key, value in iterate_flattened(ncfg_updates): set_by_dotted_path(config_updates, join_paths(scaff.path, ncfg_key), value) distribute_config_updates(prefixes, scaffolding, config_updates) # Phase 3: Normal config scopes for scaffold in scaffolding.values(): scaffold.gather_fallbacks() scaffold.set_up_config() # update global config config = get_configuration(scaffolding) # run config hooks config_updates = scaffold.run_config_hooks(config, config_updates, command_name, run_logger) # Phase 4: finalize seeding for scaffold in reversed(list(scaffolding.values())): scaffold.set_up_seed() # partially recursive config = get_configuration(scaffolding) config_modifications = get_config_modifications(scaffolding) # ---------------------------------------------------- experiment_info = experiment.get_experiment_info() host_info = get_host_info() main_function = get_command(scaffolding, command_name) pre_runs = [pr for ing in sorted_ingredients for pr in ing.pre_run_hooks] post_runs = [pr for ing in sorted_ingredients for pr in ing.post_run_hooks] run = Run(config, config_modifications, main_function, copy(experiment.observers), root_logger, run_logger, experiment_info, host_info, pre_runs, post_runs, experiment.captured_out_filter) if hasattr(main_function, 'unobserved'): run.unobserved = main_function.unobserved run.force = force for scaffold in scaffolding.values(): scaffold.finalize_initialization(run=run) return run
def sacred_main(_run: Run, seed, showoff, batch_size, model_desc, deterministic, train_datasets, lr_min, lr_max, max_iters, ema_beta, weight_decay, momentum): seed_all(seed) init_algorithms(deterministic=deterministic) model = create_model(model_desc).to(global_opts['device']) data_loader = create_train_dataloader(train_datasets, model.data_specs, batch_size, examples_per_epoch=(max_iters * batch_size)) data_iter = iter(data_loader) print(json.dumps(model_desc, sort_keys=True, indent=2)) def do_training_iteration(optimiser): batch = next(data_iter) in_var = batch['input'].to(global_opts['device'], torch.float32) target_var = batch['target'].to(global_opts['device'], torch.float32) mask_var = batch['joint_mask'].to(global_opts['device'], torch.float32) # Calculate predictions and loss out_var = model(in_var) loss = forward_loss(model, out_var, target_var, mask_var, batch['valid_depth']) # Calculate gradients optimiser.zero_grad() loss.backward() # Update parameters optimiser.step() return loss.item() optimiser = SGD(model.parameters(), lr=1, weight_decay=weight_decay, momentum=momentum) tel = tele.Telemetry({ 'config': ValueMeter(skip_reset=True), 'host_info': ValueMeter(skip_reset=True), 'loss_lr_fig': ValueMeter(), }) notebook = None if showoff: title = 'Hyperparameter search ({}@{})'.format(model_desc['type'], model_desc['version']) notebook = create_showoff_notebook(title, ['lrfinder']) from tele.showoff import views tel.sink(tele.showoff.Conf(notebook), [ views.Inspect(['config'], 'Experiment configuration', flatten=True), views.Inspect(['host_info'], 'Host information', flatten=True), views.FrameContent(['loss_lr_fig'], 'Loss vs learning rate graph', 'plotly'), ]) def set_progress(value): if notebook is not None: notebook.set_progress(value) tel['config'].set_value(_run.config) tel['host_info'].set_value(get_host_info()) lrs = np.geomspace(lr_min, lr_max, max_iters) losses = [] avg_loss = 0 min_loss = np.inf for i, lr in enumerate(tqdm(lrs, ascii=True)): set_progress(i / len(lrs)) for param_group in optimiser.param_groups: param_group['lr'] = lr loss = do_training_iteration(optimiser) avg_loss = ema_beta * avg_loss + (1 - ema_beta) * loss smoothed_loss = avg_loss / (1 - ema_beta ** (i + 1)) if min_loss > 0 and smoothed_loss > 4 * min_loss: break min_loss = min(smoothed_loss, min_loss) losses.append(smoothed_loss) if i % 10 == 0: fig = go.Figure( data=[go.Scatter(x=lrs[:len(losses)].tolist(), y=losses, mode='lines')], layout=go.Layout( margin=go.Margin(l=60, r=40, b=80, t=20, pad=4), xaxis=go.XAxis(title='Learning rate', type='log', exponentformat='power'), yaxis=go.YAxis(title='Training loss'), ) ) tel['loss_lr_fig'].set_value(fig) tel.step() set_progress(1)