Exemplo n.º 1
0
def plot_comparison(result_dict: dict,
                    metric_name: str,
                    num_samples: int,
                    width: int,
                    height: int,
                    out_path: str,
                    scatter_params: dict = {},
                    layout_params: dict = {}):
    p = deep_merge(dict(line=dict(width=1.0)), scatter_params)
    fig = go.Figure()
    for name, data in result_dict.items():
        x = data[:, 0]
        y = data[:, 1]
        fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name=name, **p))
    params = dict(
        title=f'{metric_name.title()} Comparison ({num_samples} samples)',
        width=width,
        height=height,
        xaxis_title="Epoch",
        yaxis_title=metric_name.title(),
        font=dict(size=18, ))
    params = deep_merge(params, layout_params)
    fig.update_layout(**params)
    dir = os.path.dirname(out_path)
    if not os.path.exists(dir):
        os.makedirs(dir)
    fig.write_image(out_path)
Exemplo n.º 2
0
def hparam_search(config: dict, run_args: dict):
    import ray
    from ray import tune
    run_config = load_config(config['experiment'])
    run_config = generate_run_config(run_config)
    run_config['trainer_params'] = deep_merge(
        run_config['trainer_params'].copy(),
        {
            'max_steps': config['num_train_steps'],
            #'val_check_interval': config['num_train_steps'],
            'limit_val_batches': config['num_val_steps'],
            #'log_every_n_steps': 1,
            #'max_epochs': config.get('num_epochs', 1),
        })
    if config.get('randomize_seed', False):
        print('Warning: randomizing seed for each trial')
        run_config['manual_seed'] = tune.sample_from(
            lambda spec: np.random.randint(0, 64_000))
    ray.init(num_gpus=1)
    print("ray.get_gpu_ids(): {}".format(ray.get_gpu_ids()))
    analysis = tune.run(
        tune.with_parameters(experiment_main,
                             run_args=dict(**run_args, enable_tune=True)),
        name=run_config['entrypoint'],
        config=run_config,
        local_dir=run_args['save_dir'],
        num_samples=config['num_samples'],
        resources_per_trial=deep_merge({
            'cpu': 6,
            'gpu': 1,
        }, config.get('resources_per_trial', {})),
    )
    metric = config.get('metric', 'val/loss')
    scope = config.get('scope', 'last')
    best_config = get_best_config(analysis=analysis,
                                  metric=metric,
                                  scope=scope)
    # Restore original trainer_params, which were overridden
    # so the hparam search is shorter than a full experiment.
    best_config['trainer_params'] = config['trainer_params']

    print('Best config:')
    print(best_config)

    experiment_main(best_config, run_args)
def load_config(path: Union[str, List[str]]) -> Union[dict, List[dict]]:
    paths = path if type(path) == list else [path]
    result = {}
    for path in paths:
        if os.path.isdir(path):
            # If a directory is passed, it's the same as having
            # a yaml with a 'series' list of all the files in
            # the directory.
            configs = []
            for f in os.listdir(path):
                if os.path.basename(f) == 'include':
                    # Include folders do not contain any files
                    # that can be executed directly.
                    continue
                fp = os.path.join(path, f)
                if not os.path.isdir(fp) and not f.endswith('.yaml'):
                    # Ignore non-yaml files like README.md
                    continue
                fc = load_config(fp)
                if type(fc) == list and len(fc) == 0:
                    # Exclude empty directories
                    continue
                configs.append(fc)
            return configs
        with open(path, 'r') as f:
            config = yaml.safe_load(f)
        if 'include' in config:
            # Recursively deep merge all the includes
            includes = config['include']
            if type(includes) is not list:
                includes = [includes]
            merged = {}
            for include in includes:
                merged = deep_merge(merged, load_config(include))
            # Merge this config file in last
            config = deep_merge(merged, config)
            # Remove include directive now that merge has occured
            del config['include']
        result = deep_merge(result, config)
    return result
    def val_dataloader(self):
        ds_params = deep_merge(
            self.params['data'].get('training', {}).copy(),
            self.params['data'].get('validation', {}))
        dataset = get_dataset(self.params['data']['name'], ds_params)

        self.sample_dataloader = DataLoader(dataset,
                                            batch_size=self.params['batch_size'],
                                            shuffle=False,
                                            **self.params['data'].get('loader', {}))
        self.num_val_imgs = len(self.sample_dataloader)
        n = len(dataset)
        self.val_indices = [torch.randint(low=0,
                                          high=n,
                                          size=(plot['batch_size'], 1)).squeeze()
                            for plot in self.plots]
        return self.sample_dataloader
 def val_dataloader(self):
     ds_params = deep_merge(self.params['data'].get('training', {}).copy(),
                            self.params['data'].get('validation', {}))
     dataset = get_dataset(self.params['data']['name'],
                           ds_params,
                           split=self.params['data'].get('split', None),
                           safe=self.params['data'].get('safe', True),
                           train=False)
     params = self.params['data'].get('loader', {}).copy()
     if 'balanced' in self.params['data']:
         params['sampler'] = balanced_sampler(
             dataset, **self.params['data']['balanced'])
     self.sample_dataloader = DataLoader(
         dataset,
         batch_size=self.params['batch_size'],
         shuffle=False,
         **params)
     self.num_val_imgs = len(self.sample_dataloader)
     self.val_batches = self.get_val_batches(dataset)
     return self.sample_dataloader