def parallel_job_execution(script_func, jobs, num_jobs=1): """ Takes a .yml file with structure as follows:: script: name of script in scripts/ folder config: path/to/yml/config.yml run_in: 'host' or 'container' (default: host) num_gpus: how many gpus (default: 0) blocking: whether to block on this job or not (default: false) Could also be multiple jobs:: num_jobs: how many jobs to run in parallel (default: 1) jobs: - script: script1.py config: config1.yml - script: script2.py config: config2.yml ... The jobs get executed in sequence or in parallel. Args: path_to_yml_file (str): Path to .yml file specifying the sequence of jobs that should be run. """ num_jobs = min(cpu_count(), num_jobs) logging.info( f"\n Executing scripts with num_jobs: {num_jobs}" ) pool = ScriptRunnerPool(max_workers=num_jobs) pool.submit(jobs)
def manage_superviser(): global state if state.is_running() == True: if superviser.start(): status_socket.set_starttime(utils.get_curr_time()) else: superviser.stop() turn_off_fan_f() turn_off_heating_f() mylogger.info("Superviser stopped successfully")
def run_job(): job = request.json try: if check_auth(job['auth']) == 'Error': #check auth token, if it errs throw a 403 and end return response_build(403,{'error':'Authorization incorrect'}) logging.debug(job) del job['auth'] #remove auth so sent data passes data check logging.debug('passed auth') db_playbook = db.db_lookup(job['name']) #data check logging.debug('passed name lookup') if db_playbook != 'Error': # if no error continue ans_command, password = dict_mgm.make_play(job, db_playbook, location) #parse dict into command and return command and password if ans_command != 'Error': #if make_play did not fail continue items = db.db_outputid() db.db_stdoutinput('No current stdout') task = run_command.delay(ans_command, password, items) return response_build(202,{'taskid':items}) else: #handles datacheck error logging.info('[X] Data submitted does not match corresponding blueprint in db') return response_build(400,{'error':'Data submitted does not match corresponding blueprint in db'}) else: logging.info('[X] No playbook by that name in the database') return response_build(400,{'error':'No playbook by that name in the database'}) except KeyError: #missing name or auth field logging.info('[X] No name or auth contained in request') return response_build(400,{'error':' No name or auth contained in request'}) except IndexError: #empty dict in request logging.info('[X] Empty dict in params section of request') return response_build(400,{'error':'empty dict in params section of request'})
def update_config_with_sweep(config, sweep, combo): """ Update a configuration with a sweep. The experiment configuration is updated using the sweep and combo. The sweep contains every key that needs to be updated in the configuration. If something in the sweep is a list, then the associated key is updated with only one of the elements of the list. Which element is specified by 'combo. Otherwise, the value from sweep is used. Args: config (dict): The experiment configuration that is being updated. sweep (dict): The full sweep that is used to update the configuration. combo (dict): The specific values for keys in the sweep that are lists. Returns: dict: An updated configuration using the sweep and combo arguments. """ multiple_parameters = {} keys_to_pop = [] for key in combo: if 'multiple_parameters' in key: multiple_parameters.update(combo[key]) keys_to_pop.append(key) combo.update(multiple_parameters) this_sweep = copy.deepcopy(sweep) this_sweep.update(combo) for k in keys_to_pop: this_sweep.pop(k) logging_str = '' for key in this_sweep: logging_str += f", {key}: {this_sweep[key]}" logging.info(logging_str) this_experiment = copy.deepcopy(config) notes = this_experiment['info'].pop('notes', '') notes += logging_str this_experiment['info']['notes'] = notes for key in this_sweep: if '.' in key: # replace | with . # specific update loc = key.split('.') nested_set(this_experiment, this_sweep[key], *loc) else: # global update this_experiment = replace_item(this_experiment, key, this_sweep[key]) return this_experiment
def cmd(script_func, parser_func, exec_func=sequential_job_execution): """ Builds a parser for any script in the scripts/ directory. Scripts should have two main functions: 1) a function that actually runs the script and 2) a build_parser function that builds up an ArgumentParser with informative help text for the script. This function allows the command line arguments to be passed to the script either through the command line as normal or through a YAML file which has matching keyword arguments for the script. Positional arguments are discouraged. The arguments in the YAML file are checked by passing them back into the command line parser function before giving them to the script. This also allows for default values to be defined in the script argument parser. A script can be called multiple times using a YAML file by having a top-level key called 'jobs'. 'jobs' should contain a list where each item in the list is a set of arguments to be passed to the script one by one. For each script, simply add this like so:: if __name__ == "__main__": cmd(script_func, parser_func) Then to run a script, simply do:: python -m scripts.[script_name] --yml [path_to_yml_file] # for yml python -m scripts.[script_name] [--arg val] # for cmd line Arguments: script_func (function): A function that will take in the arguments as keyword arguments and perform some action. parser_func (function): A function that will build up the argument parser for the script. """ # first check if environment variables exist if not os.getenv('DATA_DIRECTORY'): logging.info( """ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | It doesn't look like you sourced your environment variables! Make sure to | | run 'source setup/environment/[machine_name]_local.sh' before running scripts, | | as the scripts depend on the environment variables. | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ ) return jobs = [] yml_parser = build_parser_for_yml_script() cmd_parser = parser_func() args = vars(yml_parser.parse_known_args()[0]) if args['help']: print('Usage via YML file.') yml_parser_help = yml_parser.print_help() if cmd_parser: print('\nDirect usage via command line arguments.') cmd_parser_help = cmd_parser.print_help() return extra_args = {} if args['yml'] is None: args, unknown_args = cmd_parser.parse_known_args() unknown_args = [u.replace('--', '') for u in unknown_args] unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2])) args = vars(args) script_signature = inspect.getfullargspec(script_func) if script_signature.varkw is not None: args.update(unknown_args) jobs.append(args) else: _args = load_yaml(args['yml']) _jobs = [] if 'jobs' in _args: _jobs = _args.pop('jobs') extra_args = _args else: _jobs.append(_args) for job in _jobs: if cmd_parser: args = [] for key, val in job.items(): if isinstance(val, bool): if val: args.append(f'--{key}') else: args.append(f'--{key}') args.append(str(val)) args, unknown_args = cmd_parser.parse_known_args(args) unknown_args = [u.replace('--', '') for u in unknown_args] unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2])) args = vars(args) script_signature = inspect.getfullargspec(script_func) if script_signature.varkw is not None: args.update(unknown_args) [job.pop(k) for k in args if k in job] args.update(job) else: args = job jobs.append(args) exec_args = inspect.getfullargspec(exec_func) for key in extra_args.copy(): if key not in exec_args.args: extra_args.pop(key) exec_func(script_func, jobs, **extra_args)
def upload_to_gsheet(results, config, exp=None, upload_source_metrics=False): """ Uploads the analysis to the Google Sheet, if possible. Args: results (:class:`pandas.DataFrame`): DataFrame containing all the results - output by :py:func:`scripts.analyze.analyze`. config (dict): Dictionary containing the entire experiment configuration. exp (:class:`comet_ml.Experiment`): Experiment given by comet.ml (optional). upload_source_metrics (bool): Uploads metrics for each source if True. Defaults to False. Can have interactions with the API limit on Google Sheets. If there are two many sources, then it will hit the limit and the script will break. """ credentials_path = os.getenv('PATH_TO_GOOGLE_CREDENTIALS', None) if not credentials_path: logging.info('PATH_TO_GOOGLE_CREDENTIALS not set, cannot proceed.') return None gc = init_gsheet(credentials_path) config = copy.deepcopy(config) sheet_name = config['info'].pop('spreadsheet_name', None) worksheet_name = config['info'].pop('worksheet_name', None) if not sheet_name or not worksheet_name: logging.info( 'Sheet name not specified, not uploading results to Google sheets') return None logging.info(f'Opening {sheet_name} with {worksheet_name}') sheet = gc.open(sheet_name) try: summary_worksheet = sheet.worksheet(worksheet_name) except WorksheetNotFound: logging.info( f'Worksheet not found, creating new sheet w/ name {worksheet_name}' ) template_worksheet = sheet.worksheet('Template') summary_worksheet = template_worksheet.duplicate( new_sheet_name=worksheet_name) datasets = np.unique(results['dataset']) metrics = ['SDR', 'SIR', 'SAR'] notes = config['info'].pop('notes', 'No notes') def trunc(values, decs=0): return np.trunc(values * 10**decs) / (10**decs) existing_rows = summary_worksheet.get_all_values() for dataset in datasets: logging.info( f"Uploading results for {dataset} for {config['info']['experiment_key']} " f"@ {worksheet_name} in {summary_worksheet}") _results = results[results['dataset'] == dataset] dataset_paths = { key: config['datasets'][key]['folder'] for key in config['datasets'] } experiment_key = config['info']['experiment_key'] experiment_url = 'No link' if hasattr(exp, '_get_experiment_url'): experiment_url = exp._get_experiment_url() row_to_insert = [ f'=HYPERLINK("{experiment_url}", "{experiment_key}")', notes, dataset_paths.pop('train', 'No training'), dataset_paths.pop('val', 'No validation.'), dataset, np.unique(_results['file_name']).shape[0], ] row_exists = False row_index = 3 for j, row in enumerate(existing_rows): compared_indices = [2, 3, 4] row = [row[0]] + [row[i] for i in compared_indices] inserted_row = ([config['info']['experiment_key']] + [str(row_to_insert[i]) for i in compared_indices]) if (row == inserted_row): logging.info("Row already exists") row_exists = True row_index = j + 1 break if not row_exists: summary_worksheet.insert_row(row_to_insert, index=3, value_input_option='USER_ENTERED') overall_metrics = ( [np.unique(_results['file_name']).shape[0]] + [trunc(x, decs=2) for x in _results.mean()[metrics]]) overall_index = summary_worksheet.find('Overall').col - 1 for i, value in enumerate(overall_metrics): summary_worksheet.update_cell(row_index, overall_index + i, value) if upload_source_metrics: try: source_names = np.unique(_results['source_name']).tolist() for source_name in source_names: source_metrics = [] try: source_name_cell = summary_worksheet.find(source_name) except Exception as e: source_name_cell = summary_worksheet.find('Source') source_name_cell.value = source_name summary_worksheet.update_cells([source_name_cell]) for i, metric in enumerate(metrics): value = trunc(_results[_results['source_name'] == source_name].mean()[metric], decs=2) summary_worksheet.update_cell(row_index, source_name_cell.col + i, value) except: logging.info( "Failure in uploading. Likely too many unique sources and we hit an API limit." ) pass
def analyze(path_to_yml_file, use_gsheet=False, upload_source_metrics=False): """ Analyzes the metrics for all the files that were evaluated in the experiment. Args: path_to_yml_file (str): Path to the yml file that defines the experiment. The corresponding results folder for the experiment is what will be analyzed and put into a Pandas dataframe. use_gsheet (bool, optional): Whether or not to upload to the Google Sheet. Defaults to False. upload_source_metrics (bool): Uploads metrics for each source if True. Defaults to False. Can have interactions with the API limit on Google Sheets. If there are two many sources, then it will hit the limit and the script will break. Returns: tuple: 3-element tuple containing - results (:class:`pandas.DataFrame`): DataFrame containing all of the results for every file evaluated in the experiment. The DataFrame also has every key in the experiment configuration in flattened format. For example, model_config_recurrent_stack_args_embedding_size is a column in the DataFrame. - config (*dict*): A dictionary containing the configuration of the experiment. - exp (:class:`comet_ml.Experiment`): An instantiated experiment if comet.ml is needed, otherwise it is None. """ config, exp, path_to_yml_file = load_experiment(path_to_yml_file) paths = glob.glob(os.path.join(config['info']['output_folder'], 'results', '**.yml'), recursive=True) results = [] for _path in paths: data = load_yaml(_path, []) for _data in data: keys = sorted(list(_data.keys())) keys.remove('permutation') for key in keys: flattened = { 'experiment_key': config['info']['experiment_key'], 'notes': config['info']['notes'], 'file_name': _path, 'dataset': config['datasets']['test']['folder'], 'source_name': key.split('/')[-1], } flattened.update(flatten(config)) for metric in _data[key]: flattened[metric] = np.mean(_data[key][metric]) results.append(flattened) results = pd.DataFrame(results) logging.info(results.mean()) logging.info(config['info']['experiment_key']) if use_gsheet: upload_to_gsheet(results, config, exp, upload_source_metrics) return results, config, exp
def _supervise(temp_is, temp_should, running, heating): state = HeaterState(temp_is=temp_is, should=temp_should, running=running, heating=heating) state.connect_to_socket() mylogger.info("Superviser process started") state.update_temp_is() if (state.should_preheat()): phase = Phase.PREHEAT elif (state.should_approach_heat()): phase = Phase.APPROACH_HEAT else: phase = Phase.KEEP_HEAT while (state.is_running()): state.update_temp_is() if (phase == Phase.PREHEAT): if (state.should_preheat()): state.turn_on_heating() state.turn_on_fan() time.sleep(state.get_time_to_heat(4.0)) elif (state.should_approach_heat()): phase = Phase.APPROACH_HEAT state.turn_off_heating() time.sleep(state.get_time_to_heat(4.0)) else: phase = Phase.KEEP_HEAT state.turn_off_heating() time.sleep(state.get_time_to_heat(4.0)) elif (phase == Phase.APPROACH_HEAT): if (state.should_preheat()): phase = Phase.PREHEAT elif (state.should_approach_heat()): state.turn_on_heating() state.turn_on_fan() time.sleep(state.get_time_to_heat(1.0)) else: phase = Phase.KEEP_HEAT state.turn_off_heating() time.sleep(state.get_time_to_heat(4.0)) else: # KEEP_HEAT if (state.should_preheat()): phase = Phase.PREHEAT elif (state.should_approach_heat()): phase = Phase.APPROACH_HEAT else: time_heating = state.get_time_to_reach(state.get_temp_should()) if (time_heating <= 1.0): state.turn_off_heating() state.turn_off_fan() time.sleep(60) continue state.turn_on_heating() state.turn_on_fan() time.sleep(time_heating) state.turn_off_heating() time.sleep(20) state.turn_off_fan() time.sleep(600 - 20 - time_heating)
def sweep_experiment(path_to_yml_file, num_jobs=1, num_gpus=0, run_in='host'): """ Takes a base experiment file and sweeps across the 'sweep' key in it, replacing values as needed. Results in the Cartesian product of all of the parameters that are being swept across. Also creates pipeline files that can be passed to :py:mod:`scripts.pipeline` so that everything can be run in sequence easily, or in parallel as determined by num_jobs. The sweep config is used to replace dictionary keys and create experiments on the fly. A separate experiment will be created for each sweep discovered. The set of experiments can then be submitted to the job runner in parallel or in sequence. If one of the arguments is a list, then it will loop across each of the items in the list creating a separate experiment for each one. There's no real error checking so be careful when setting things up as creating invalid or buggy experiments (e.g. num_frequencies and n_fft don't match) is possible. If there is a '.' in the key, then it is an absolute path to the exact value to update in the configuration. If there isn't, then it is a global update for all matching keys. Here's a simple example of a sweep configuration that specifies the STFT parameters and sweeps across the number of hidden units and embedding size: .. code-block:: yaml sweep: - n_fft: 128 hop_length: 64 num_frequencies: 65 # n_fft / 2 + 1 num_features: 65 model_config.modules.recurrent_stack.args.hidden_size: [50, 100] # specific sweep, delimited by '.' embedding_size: [10, 20] # global sweep cache: '${CACHE_DIRECTORY}/musdb_128' populate_cache: true # controls whether to create a separate experiment for caching num_cache_workers: 60 # how many workers to use when populating the cache The above creates 5 experiments, across the Cartesian product of hidden size and embedding size, +1 for the caching experiment:: - caching "experiment" where training data is prepared - hidden_size = 50, embedding_size = 10 # 1st experiment - hidden_size = 50, embedding_size = 20 # 2nd experiment - hidden_size = 100, embedding_size = 10 # 3rd experiment - hidden_size = 100, embedding_size = 20 # 4th experiment Each sweep within an item of the list should use the same cache. The cache is created as a separate experiment. For example, if we want to sweep across STFT parameters, then we need different caches as different STFTs will result in different training data. .. code-block:: yaml sweep: - n_fft: 128 hop_length: 64 num_frequencies: 65 # n_fft / 2 + 1 num_features: 65 model_config.modules.recurrent_stack.args.hidden_size: [50, 100] # specific sweep, delimited by '.' embedding_size: [10, 20] # global sweep cache: '${CACHE_DIRECTORY}/musdb_128' populate_cache: true # controls whether to create a separate experiment for caching num_cache_workers: 60 # how many workers to use when populating the cache - n_fft: 256 hop_length: 64 num_frequencies: 129 # n_fft / 2 + 1 num_features: 129 model_config.modules.recurrent_stack.args.hidden_size: [50, 100] # specific sweep, delimited by '.' embedding_size: [10, 20] # global sweep cache: '${CACHE_DIRECTORY}/musdb_256' populate_cache: true # controls whether to create a separate experiment for caching num_cache_workers: 60 # how many workers to use when populating the cache Now we create 10 experiments, 4 for each item in the list, +1 for each cache. Args: path_to_yml_file ([type]): Path to the configuration for the base experiment. This will be expanded by the script, filling in the values defined in 'sweep' accordingly, and create new experiments. num_jobs (int): Controls the number of jobs to use in the created pipelines. Defaults to 1. num_gpus (int): Controls the number of gpus to use in the created pipelines. Defaults to 0. run_in (str): Run jobs in containers or on the host ('container' or 'host'). Defaults to host. """ experiments, cache_experiments = create_experiments(path_to_yml_file) scripts = ['train', 'evaluate', 'analyze'] pipeline_ymls = [] base_dir = os.path.splitext(os.path.abspath(path_to_yml_file))[0] base_dir = base_dir.split('/') base_dir.insert(-1, 'out') base_dir = os.path.join('/', *base_dir) os.makedirs(base_dir, exist_ok=True) # Block on cache creation if cache_experiments: cache_pipeline = create_pipeline(cache_experiments, 'train', num_jobs=num_jobs) output_path = os.path.join(base_dir, 'cache.yml') dump_yaml(cache_pipeline, output_path) pipeline_ymls.append(output_path) for s in scripts: num_gpus = 0 if s == 'analyze' else num_gpus num_jobs = 1 if s == 'analyze' else num_jobs extra_cmd_args = '' if s == 'analyze': extra_cmd_args += '--use_gsheet' run_in = 'host' if s == 'analyze' else run_in pipeline = create_pipeline(experiments, s, num_jobs=num_jobs, num_gpus=num_gpus, run_in=run_in, extra_cmd_args=extra_cmd_args) output_path = os.path.join(base_dir, f'{s}.yml') dump_yaml(pipeline, output_path) pipeline_ymls.append(output_path) pipeline = create_pipeline(pipeline_ymls, 'pipeline', num_jobs=1, blocking=True, run_in='host', prefix='-y') output_path = os.path.join(base_dir, 'pipeline.yml') dump_yaml(pipeline, output_path) logging.info(f'Inspect the created pipeline files' f' before running them! @ {output_path}')