def test_dump_load_task_structure(tmpdir): # Dump to json and reload, check equality. d = Path(str(tmpdir)) task_description = TaskDescription( type_="reproject", task_dt=datetime.datetime.utcnow(), events_path=d.joinpath('events'), logs_path=d.joinpath('logs'), parameters=DefaultJobParameters( query={'time': [2013, 2015]}, source_products=['ls5_nbar_albers'], output_products=['ls5_nbar_waterman_butterfly'], ), # Task-app framework runtime_state=TaskAppState( config_path=Path('config.test.yaml'), task_serialisation_path=d.joinpath('generated-tasks.pickle'), ) ) serialised_file = d.joinpath('task_description.json') serialise.dump_structure(serialised_file, task_description) result = serialise.load_structure(serialised_file, expected_type=TaskDescription) assert result == task_description
def run(index, dry_run: bool, input_filename: str, runner: TaskRunner, skip_indexing: bool, **kwargs): config, tasks = task_app.load_tasks(input_filename) work_dir = Path(input_filename).parent # TODO: Get rid of this completely task_desc = TaskDescription( type_='fc', task_dt=datetime.utcnow().astimezone(timezone.utc), events_path=work_dir, logs_path=work_dir, jobs_path=work_dir, parameters=None, runtime_state=None, ) if dry_run: _LOG.info('Starting Fractional Cover Dry Run...') task_app.check_existing_files( (task['filename_dataset'] for task in tasks)) return 0 _LOG.info('Starting Fractional Cover processing...') task_func = partial(_do_fc_task, config) if skip_indexing: process_func = _skip_indexing_and_only_log else: process_func = partial(_index_datasets, index) try: runner(task_desc, tasks, task_func, process_func) _LOG.info("Runner finished normally, triggering shutdown.") except Exception as err: if "Error 104" in err: _LOG.info( "Processing completed and shutdown was initiated. Exception: %s", str(err)) else: _LOG.info("Exception during processing: %s", err) finally: runner.stop() return 0
def run(self, runner, task_file=None, task_slice=None): if task_file: tasks = unpickle_stream(task_file) else: tasks = self.generate_tasks(self.configure_outputs()) if task_slice is not None: tasks = islice(tasks, task_slice.start, task_slice.stop, task_slice.step) app_info = _get_app_metadata(self.config_file) output_driver = partial(self.output_driver, output_path=self.location, app_info=app_info, storage=self.storage, global_attributes=self.global_attributes, var_attributes=self.var_attributes) task_runner = partial(execute_task, output_driver=output_driver, chunking=self.computation.get('chunking', {})) # does not need to be thorough for now task_desc = TaskDescription( type_='datacube_stats', task_dt=datetime.utcnow().replace(tzinfo=tz.tzutc()), events_path=Path(self.location), logs_path=Path(self.location), parameters=DefaultJobParameters(query={}, source_products=[], output_products=[])) result = runner(task_desc, tasks, task_runner) _LOG.debug('Stopping runner.') runner.stop() _LOG.debug('Runner stopped.') return result
def run_tasks(self, tasks, runner=None, task_slice=None): from digitalearthau.qsub import TaskRunner from digitalearthau.runners.model import TaskDescription, DefaultJobParameters if task_slice is not None: tasks = islice(tasks, task_slice.start, task_slice.stop, task_slice.step) output_driver = self._partially_applied_output_driver() task_runner = partial(execute_task, output_driver=output_driver, chunking=self.computation.get('chunking', {})) # does not need to be thorough for now task_desc = TaskDescription( type_='datacube_stats', task_dt=datetime.utcnow().replace(tzinfo=tz.tzutc()), events_path=Path(self.location) / 'events', logs_path=Path(self.location) / 'logs', jobs_path=Path(self.location) / 'jobs', parameters=DefaultJobParameters(query={}, source_products=[], output_products=[])) task_desc.logs_path.mkdir(parents=True, exist_ok=True) task_desc.events_path.mkdir(parents=True, exist_ok=True) task_desc.jobs_path.mkdir(parents=True, exist_ok=True) if runner is None: runner = TaskRunner() result = runner(task_desc, tasks, task_runner) _LOG.debug('Stopping runner.') runner.stop() _LOG.debug('Runner stopped.') return result
def run(index, input_filename: str, runner: TaskRunner, skip_indexing: bool, redirect_outputs: str, **kwargs): """ Process WOfS tasks from a task file. """ config, tasks = task_app.load_tasks(input_filename) work_dir = Path(input_filename).parent if redirect_outputs is not None: tasks = _prepend_path_to_tasks(redirect_outputs, tasks) # TODO: Get rid of this completely task_desc = TaskDescription( type_='wofs', task_dt=datetime.utcnow().astimezone(timezone.utc), events_path=work_dir, logs_path=work_dir, jobs_path=work_dir, parameters=None, runtime_state=None, ) _LOG.info('Starting WOfS processing...') task_func = partial(_do_wofs_task, config) if skip_indexing: process_func = _skip_indexing_and_only_log else: process_func = partial(_index_datasets, index) try: runner(task_desc, tasks, task_func, process_func) _LOG.info("Runner finished normally, triggering shutdown.") finally: runner.stop() # TODO: Check for failures and return error state sys.exit(0)