def hash_parameters(parameters): # we can specify either None, directly parameters, or a Path if not parameters: params = {} elif isinstance(parameters, dict): params = parameters else: with parameters.open('r') as f: if parameters.suffix == '.yaml': params = yaml.load(f, Loader=yaml.SafeLoader) elif parameters.suffix == '.cde': from cde import Config params = Config.loads(f.read()).asdict() else: params = json.load(f) return make_hash(params)
def load_tuning_search(tuning_search, tuning_search_file): if tuning_search and tuning_search_file: click.secho('Error: specify only one of --tuning-search or --tuning-search-file', fg='red', err=True) exit(1) if tuning_search_file: if not tuning_search_file.exists(): click.secho('Error: could not find the file specified by --tuning-search-file', fg='red', err=True) exit(1) with tuning_search_file.open('r') as f: tuning_search = f.read() if tuning_search_file.suffix == '.yaml': tuning_search_dict = yaml.load(tuning_search, Loader=yaml.SafeLoader) filetype = 'yaml' elif tuning_search_file.suffix == '.cde': from cde import Config tuning_search_dict = Config.loads(f.read()).asdict() filetype = 'cde' else: tuning_search_dict = json.loads(tuning_search) filetype = 'json' else: tuning_search_dict = json.loads(tuning_search) if tuning_search else None filetype = 'json' # we default to json return tuning_search_dict, filetype
def iter_parameters(tuning_search=None, filetype='json', extra_parameters=None): extra_params = extra_parameters if extra_parameters else {} if not tuning_search: tuning_search = { 'parameter_search': {}, 'search_type': 'grid', } if isinstance(tuning_search['parameter_search'], list): for param_search in tuning_search['parameter_search']: yield from iter_parameters(tuning_search={ **tuning_search, 'parameter_search': param_search }, filetype=filetype, extra_parameters=extra_parameters) return parameter_search = tuning_search['parameter_search'] ## Support for functions/ranges was removed - no one ever used them. # for parameter, values in parameter_search.items(): # if isinstance(values, dict): # if not 'function' in values or not 'arguments' in values: # raise ValueError # if values['function'] == 'range': # args = values['arguments'] # if 'start' not in args: args['start']=0 # if 'stop' not in args: args['stop']=0 # if 'step' not in args: args['step']=1 # tuning_search[parameter] = list(range(args['start'], args['stop'], args['step'])) n_iter = tuning_search.get('search_options', {}).get('n_iter') if not parameter_search: params_iterator = [{}] elif tuning_search['search_type'] == 'grid': # http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterSampler.html#sklearn.model_selection.ParameterSampler from sklearn.model_selection import ParameterGrid # from search import ParameterGrid params_iterator = ParameterGrid(parameter_search) elif tuning_search['search_type'] == 'sampler': from sklearn.model_selection import ParameterSampler # from search import ParameterSampler params_iterator = ParameterSampler(parameter_search, n_iter=n_iter) else: raise ValueError for counter, params_ in enumerate(params_iterator): if n_iter and counter >= n_iter and n_iter > 0: click.secho( f"Stopping tuning combination after {n_iter} iterations", fg='yellow', err=True) return # the search overrides the extra parameters specified earlier params = {**extra_params, **params_} # we sort to avoid ordering issues; we want a unique hash per tuning configuration params_s = json.dumps(params, sort_keys=True) params_hash = make_hash(params) working_directory = Path('.') # can we do something smarter? params_file = working_directory / 'configurations' / 'tuning' / make_pretty_tuning_filename( params_s, filetype) if params: params_file.parent.mkdir(parents=True, exist_ok=True) with params_file.open('w') as f: if filetype == 'json': f.write(params_s) elif filetype == 'yaml': yaml.dump(params, f) elif filetype == 'cde': from cde import Config config = Config() config.load_fromdict(config_dict) yaml.dump(params, f) yield params_file, params_hash, params
def cli(ctx, platform, configuration, label, tuning, tuning_filepath, dryrun, share, database, input_type, offline): """Entrypoint to running your algo, launching batchs...""" # We want all paths to be relative to top-most qatools.yaml # it should be located at the root of the git repository if config_has_error: click.secho(f'Aborting: please first fix the configuration errrors in qatools.yaml', fg='red', err=True, bold=True) exit(1) # Click passes `ctx.obj` to downstream commands, we can use it as a scratchpad # http://click.pocoo.org/6/complex/ ctx.obj = {} will_show_help = '-h' in sys.argv or '--help' in sys.argv get_command = 'get' in sys.argv if root_qatools != Path().resolve() and not will_show_help and not get_command: ctx.obj['previous_cwd'] = os.getcwd() click.echo(click.style("Working directory changed to: ", fg='cyan') + click.style(str(root_qatools), fg='cyan', bold=True), err=True) os.chdir(root_qatools) # We want open permissions on outputs and artifacts # it makes collaboration among mutliple users / automated tools so much easier... os.umask(0) ctx.obj['project'] = config['project']['name'] ctx.obj['HOST'] = os.environ.get('HOST', os.environ.get('HOSTNAME')) ctx.obj['user'] = user ctx.obj['dryrun'] = dryrun ctx.obj['share'] = share ctx.obj['offline'] = offline ctx.obj['commit_ci_dir'] = commit_ci_dir # Note: to support multiple databases per project, # either use / as database, or somehow we need to hash the db in the output path. ctx.obj['raw_batch_label'] = label ctx.obj['batch_label'] = label if not share else f"@{user}| {label}" ctx.obj['platform'] = platform ctx.obj['input_type'] = input_type ctx.obj['inputs_settings'] = get_settings(input_type, config) ctx.obj['database'] = database if database else get_default_database(ctx.obj['inputs_settings']) ctx.obj['configuration'] = configuration if configuration else get_default_configuration(ctx.obj['inputs_settings']) ctx.obj['configurations'] = deserialize_config(ctx.obj['configuration']) ctx.obj['extra_parameters'] = {} if tuning: ctx.obj['extra_parameters'] = json.loads(tuning) elif tuning_filepath: ctx.obj['tuning_filepath'] = tuning_filepath with tuning_filepath.open('r') as f: if tuning_filepath.suffix == '.yaml': ctx.obj['extra_parameters'] = yaml.load(f, Loader=yaml.SafeLoader) elif tuning_filepath.suffix == '.cde': from cde import Config ctx.obj['extra_parameters'] = Config.loads(f.read()).asdict() else: ctx.obj['extra_parameters'] = json.load(f) # batch runs will override this since batches may have different configurations ctx.obj['prefix_output_dir'] = make_prefix_outputs_path(commit_ci_dir, ctx.obj['batch_label'], platform, ctx.obj['configuration'], ctx.obj['extra_parameters'] if tuning else tuning_filepath, share) # For convenience, we allow users to change environment variables using {ENV: {VAR: value}} # in configurations or tuning parameters environment_variables = {} for c in ctx.obj['configurations']: if not isinstance(c, dict): continue if 'ENV' in c: environment_variables.update(c['ENV']) if 'ENV' in ctx.obj['extra_parameters']: environment_variables.update(ctx.obj['extra_parameters']['ENV']) os.environ.update(environment_variables) # we manage stripping ansi color codes ourselfs since we redirect std streams # to both the original stream and a log file ctx.color = True # colors in log files colors will be interpreted in the UIs ctx.obj['color'] = is_ci or share