def dump_task_info_tree(task: TaskOnKart, task_info_dump_path: str, ignore_task_names: Optional[List[str]] = None, use_unique_id: bool = True): """Dump the task info tree object (TaskInfo) to a pickle file. Parameters ---------- - task: TaskOnKart Root task. - task_info_dump_path: str Output target file path. Path destination can be `local`, `S3`, or `GCS`. File extension must be '.pkl'. - ignore_task_names: Optional[List[str]] List of task names to ignore. - use_unique_id: bool = True Whether to use unique id to dump target file. Default is True. Returns ------- None """ extension = os.path.splitext(task_info_dump_path)[1] assert extension == '.pkl', f'File extention must be `.pkl`, not `{extension}`.' task_info_tree = make_task_info_tree(task, ignore_task_names=ignore_task_names) unique_id = task.make_unique_id() if use_unique_id else None task_info_target = make_target(file_path=task_info_dump_path, unique_id=unique_id) task_info_target.dump(obj=task_info_tree, lock_at_dump=False)
def dump_task_info_table(task: TaskOnKart, task_info_dump_path: str, ignore_task_names: Optional[List[str]] = None): """Dump a table containing information about dependent tasks. Parameters ---------- - task: TaskOnKart Root task. - task_info_dump_path: str Output target file path. Path destination can be `local`, `S3`, or `GCS`. File extension can be any type that gokart file processor accepts, including `csv`, `pickle`, or `txt`. See `TaskOnKart.make_target module <https://gokart.readthedocs.io/en/latest/task_on_kart.html#taskonkart-make-target>` for details. - ignore_task_names: Optional[List[str]] List of task names to ignore. Returns ------- None """ task_info_table = make_task_info_as_table( task=task, ignore_task_names=ignore_task_names) unique_id = task.make_unique_id() task_info_target = make_target(file_path=task_info_dump_path, unique_id=unique_id) task_info_target.dump(obj=task_info_table, lock_at_dump=False)
def _get_output(task: TaskOnKart) -> Any: output = task.output() if isinstance(output, list) or isinstance(output, tuple): return [t.load() for t in output] if isinstance(output, dict): return {k: t.load() for k, t in output.items()} return output.load()
def make_task_info_tree( task: TaskOnKart, ignore_task_names: Optional[List[str]] = None) -> TaskInfo: with warnings.catch_warnings(): warnings.filterwarnings( action='ignore', message='Task .* without outputs has no custom complete() method') is_task_complete = task.complete() name = task.__class__.__name__ unique_id = task.make_unique_id() output_paths = [t.path() for t in luigi.task.flatten(task.output())] params = task.get_info(only_significant=True) processing_time = task.get_processing_time() if type(processing_time) == float: processing_time = str(processing_time) + 's' is_complete = ('COMPLETE' if is_task_complete else 'PENDING') task_log = dict(task.get_task_log()) requires = _make_requires_info(task.requires()) children = luigi.task.flatten(task.requires()) children_task_infos: List[TaskInfo] = [] for child in children: if ignore_task_names is None or child.__class__.__name__ not in ignore_task_names: children_task_infos.append( make_task_info_tree(child, ignore_task_names=ignore_task_names)) return TaskInfo(name=name, unique_id=unique_id, output_paths=output_paths, params=params, processing_time=processing_time, is_complete=is_complete, task_log=task_log, requires=requires, children_task_infos=children_task_infos)
def _get_output(task: TaskOnKart) -> Any: output = task.output() if type(output) == list: return [x.load() for x in output] return output.load()