def load_structure(path: pathlib.Path, expected_type): """ Load the expected NamedTuple (with type hints) from a yaml/json :param expected_type: the class/type you expect to get back. """ return dict_to_type(paths.read_document(path), expected_type)
def ensure_products(index, app_config_files): for app_config_file in app_config_files: # TODO: Add more validation of config? click.secho(f"Loading {app_config_file}", bold=True) app_config = paths.read_document(app_config_file) in_product, out_product = _ensure_products(app_config, index) click.secho(f"Product {in_product.name} → {out_product.name}")
def ensure_products(index, app_config, dry_run): """ Ensure the products exist for the given FC config, creating them if necessary. If dry run is disabled, the validated output product definition will be added to the database. """ # TODO: Add more validation of config? click.secho(f"Loading {app_config}", bold=True) app_config_file = paths.read_document(app_config) _, out_product = _ensure_products(app_config_file, index, dry_run) click.secho( f"Output product definition for {out_product.name} product exits in the database for the given " f"FC input config file")
def _make_config_and_description(index: Index, task_desc_path: Path) -> Tuple[dict, TaskDescription]: task_desc = serialise.load_structure(task_desc_path, TaskDescription) task_time: datetime = task_desc.task_dt app_config = task_desc.runtime_state.config_path config = paths.read_document(app_config) # TODO: This carries over the old behaviour of each load. Should probably be replaced with *tag* config['task_timestamp'] = int(task_time.timestamp()) config['app_config_file'] = Path(app_config) config = make_fc_config(index, config) return config, task_desc
def _make_config_and_description( index: Index, task_desc_path: Path) -> Tuple[dict, TaskDescription]: task_desc = serialise.load_structure(task_desc_path, TaskDescription) app_config = task_desc.runtime_state.config_path config = paths.read_document(app_config) config['output_type'] = config[ 'output_type'] # TODO: Temporary until ODC code is updated config['app_config_file'] = str(app_config) config = stacker.make_stacker_config(index, config) config['taskfile_version'] = make_tag(task_desc) config['version'] = digitalearthau.__version__ + ' ' + datacube.__version__ return config, task_desc
def submit(index: Index, app_config: str, project: str, queue: str, no_qsub: bool, time_range: Tuple[datetime, datetime], tag: str): _LOG.info('Tag: %s', tag) app_config_path = Path(app_config).resolve() app_config = paths.read_document(app_config_path) task_desc, task_path = init_task_app( job_type="fc", source_products=[app_config['source_product']], output_products=[app_config['output_product']], # TODO: Use @datacube.ui.click.parsed_search_expressions to allow params other than time from the cli? datacube_query_args=Query(index=index, time=time_range).search_terms, app_config_path=app_config_path, pbs_project=project, pbs_queue=queue ) _LOG.info("Created task description: %s", task_path) if no_qsub: _LOG.info('Skipping submission due to --no-qsub') return 0 submit_subjob( name='generate', task_desc=task_desc, command=[ 'generate', '-v', '-v', '--task-desc', str(task_path), '--tag', tag ], qsub_params=dict( mem='20G', wd=True, ncpus=1, walltime='1h', name='fc-generate-{}'.format(tag) ) )
def generate(index: Index, app_config: str, output_filename: str, dry_run: bool, time_range: Tuple[datetime, datetime]): """ Generate Tasks into file and Queue PBS job to process them By default, also ensures the Output Product is present in the database. --dry-run will still generate a tasks file, but not add the output product to the database. """ app_config_file = Path(app_config).resolve() app_config = paths.read_document(app_config_file) wofs_config = _make_wofs_config(index, app_config, dry_run) # Patch in config file location, for recording in dataset metadata wofs_config['app_config_file'] = app_config_file wofs_tasks = _make_wofs_tasks(index, wofs_config, time_range) num_tasks_saved = task_app.save_tasks(wofs_config, wofs_tasks, output_filename) _LOG.info('Found %d tasks', num_tasks_saved)
def generate(index: Index, app_config: str, output_filename: str, dry_run: bool): """ Generate Tasks into a queue file. By default, also ensures the Output Product is present in the database. --dry-run will still generate a tasks file, but not add the output product to the database. """ app_config_file = Path(app_config).resolve() app_config = paths.read_document(app_config_file) fc_config = _make_fc_config(index, app_config, dry_run) # Patch in config file location, for recording in dataset metadata fc_config['app_config_file'] = app_config_file fc_tasks = _make_fc_tasks(index, fc_config) num_tasks_saved = save_tasks(fc_config, fc_tasks, output_filename) _LOG.info('Found %d tasks', num_tasks_saved)
def submit(index: Index, app_config: str, project: str, queue: str, no_qsub: bool, time_range: Tuple[datetime, datetime], tag: str, email_options: str, email_id: str, dry_run: bool): """ Kick off two stage PBS job Stage 1 (Generate task file): The task-app machinery loads a config file, from a path specified on the command line, into a dict. If dry is enabled, a dummy DatasetType is created for tasks generation without indexing the product in the database. If dry run is disabled, generate tasks into file and queue PBS job to process them. Stage 2 (Run): During normal run, following are performed: 1) Tasks shall be yielded for dispatch to workers. 2) Load data 3) Run FC algorithm 4) Attach metadata 5) Write output files and 6) Finally index the newly created FC output netCDF files If dry run is enabled, application only prepares a list of output files to be created and does not record anything in the database. """ _LOG.info('Tag: %s', tag) app_config_path = Path(app_config).resolve() app_config = paths.read_document(app_config_path) if not time_range or not all(time_range): query_args = Query(index=index).search_terms else: query_args = Query(index=index, time=time_range).search_terms task_desc, task_path = init_task_app( job_type="fc", source_products=[app_config['source_product']], output_products=[app_config['output_product']], # TODO: Use @datacube.ui.click.parsed_search_expressions to allow params other than time from the cli? datacube_query_args=query_args, app_config_path=app_config_path, pbs_project=project, pbs_queue=queue) _LOG.info("Created task description: %s", task_path) if no_qsub: _LOG.info('Skipping submission due to --no-qsub') return 0 # If dry run is not enabled just pass verbose option dry_run_option = '--dry-run' if dry_run else '-v' extra_qsub_args = '-M {0} -m {1}'.format(email_id, email_options) # Append email options and email id to the PbsParameters dict key, extra_qsub_args task_desc.runtime_state.pbs_parameters.extra_qsub_args.extend( extra_qsub_args.split(' ')) submit_subjob(name='generate', task_desc=task_desc, command=[ 'generate', '-vv', '--task-desc', str(task_path), '--tag', tag, '--log-queries', '--email-id', email_id, '--email-options', email_options, dry_run_option, ], qsub_params=dict(name='fc-generate-{}'.format(tag), mem='medium', wd=True, nodes=1, walltime='1h'))