def test_dataset_formats_with_hours(): keys = { '1:20200312': 2, '1:20200312_0545': 3, } assert get_dataset_from_key('1', keys) == 3 keys['1:20200312_0435'] = 4 assert get_dataset_from_key('1', keys) == 3 keys['1:20200313'] = 5 assert get_dataset_from_key('1', keys) == 5 keys['1:2020-03-13_23-12'] = 6 assert get_dataset_from_key('1', keys) == 6
def read_config(self, config_params, datasets_index, **kwargs): """ Read the config file and the datasets index Between the config file and the dataset index, we have enough information to configure the backend and the models. We can also initialize the data readers :param config_file: The config file :param datasets_index: The index of datasets :return: """ datasets_index = read_config_file_or_json(datasets_index, 'datasets') datasets_set = index_by_label(datasets_index) self.config_params = config_params config_file = deepcopy(config_params) basedir = self.get_basedir() if basedir is not None and not os.path.exists(basedir): logger.info('Creating: %s', basedir) os.makedirs(basedir) self.config_params['train']['basedir'] = basedir # Read GPUS from env variables now so that the reader has access if self.config_params['model'].get('gpus', -1) == -1: self.config_params['model']['gpus'] = len(get_env_gpus()) self._setup_task(**kwargs) self._load_user_modules() self.dataset = get_dataset_from_key(self.config_params['dataset'], datasets_set) # replace dataset in config file by the latest dataset label, this will be used by some reporting hooks config_file['dataset'] = self.dataset['label'] self._configure_reporting(config_params.get('reporting', {}), config_file=config_file, **kwargs) self.reader = self._create_task_specific_reader()
def read_config(self, config_params, datasets_index, **kwargs): """ Read the config file and the datasets index Between the config file and the dataset index, we have enough information to configure the backend and the models. We can also initialize the data readers :param config_file: The config file :param datasets_index: The index of datasets :return: """ datasets_index = read_config_file_or_json(datasets_index, 'datasets') datasets_set = index_by_label(datasets_index) self.config_params = config_params basedir = self.get_basedir() if basedir is not None and not os.path.exists(basedir): logger.info('Creating: %s', basedir) os.makedirs(basedir) self.config_params['train']['basedir'] = basedir # Read GPUS from env variables now so that the reader has access if self.config_params['model'].get('gpus', -1) == -1: self.config_params['model']['gpus'] = len(get_env_gpus()) self.config_file = kwargs.get('config_file') self._setup_task(**kwargs) self._load_user_modules() self._configure_reporting(config_params.get('reporting', {}), **kwargs) self.dataset = get_dataset_from_key(self.config_params['dataset'], datasets_set) self.reader = self._create_task_specific_reader()
def test_dataset_handle_no_date(): """Test that we can handle a dataset we are looking at that doesn't have a date on it.""" keys = { 'prefix:20190405': 'bad', 'prefix:not-a-date': 'bad', 'prefix:20190506': 'gold' } assert get_dataset_from_key('prefix', keys) == 'gold'
def test_dataset_hard_match(): """Test that we only match datasets that are exact match % date, ignore ones that have extra before or after.""" keys = { 'prefix:postfix:20190101': 'bad', 'prefix:20190103': 'gold', 'prefix:postfix:20190402': 'bad', 'pre-prefix:prefix:20190801': 'bad' } assert get_dataset_from_key('prefix', keys) == 'gold'
def test_dataset_formats(): keys = {'1': 1, '1:1978': 7, '2:1996': 2, '2:20190327': 3, '2:2019-03-28': 42 } # Test exact match first assert get_dataset_from_key('1', keys) == 1 # Test where not exact that we get last date assert get_dataset_from_key('2', keys) == 42 # Test where we do not get last date that we get an exception try: j = get_dataset_from_key('3', keys) assert j is None except: pass
def putresult(task, config, log, dataset, user, label, cbase, cstore): """ Puts the results in a database. provide task name, config file, the reporting log file, and the dataset index file used in the experiment. Optionally can put the model files in a persistent storage. """ logf = log.format(task) if not os.path.exists(logf): click.echo(click.style("the log file at {} doesn't exist, provide a valid location".format(logf), fg='red')) return if not os.path.exists(config): click.echo(click.style("the config file at {} doesn't exist, provide a valid location".format(config), fg='red')) return if not os.path.exists(dataset): click.echo(click.style("the dataset file at {} doesn't exist, provide a valid location".format(dataset), fg='red')) return config_obj = read_config_file(config) datasets_set = index_by_label(read_config_file(dataset)) dataset_key = config_obj['dataset'] dataset_key = get_dataset_from_key(dataset_key, datasets_set) config_obj['dataset'] = dataset_key['label'] ServerManager.get() result = ServerManager.api.put_result(task, to_swagger_experiment(task, config_obj, log, username=user, label=label)) if result.response_type == 'success': eid = result.message click.echo(click.style('results stored with experiment: {}'.format(result.message), fg='green')) if cbase is None: return result = store_model(checkpoint_base=cbase, config_sha1=hash_config(read_config_file(config)), checkpoint_store=cstore, print_fn=click.echo) if result is not None: click.echo(click.style('model stored at {}'.format(result), fg='green')) update_result = ServerManager.api.update_property(task, eid, prop='checkpoint', value=result) if update_result.response_type == 'success': click.echo(click.style(update_result.message, fg='green')) else: click.echo(click.style(update_result.message, fg='red')) else: click.echo(click.style('failed to store model'.format(result), fg='red')) else: click.echo(click.style(result.message, fg='red'))
def _find_dataset(self, dataset): datasets = self.api.task_summary(self.task) datasets = {k: k for k in datasets.summary} return get_dataset_from_key(dataset, datasets)