Пример #1
0
def dump_task_info_tree(task: TaskOnKart,
                        task_info_dump_path: str,
                        ignore_task_names: Optional[List[str]] = None,
                        use_unique_id: bool = True):
    """Dump the task info tree object (TaskInfo) to a pickle file.

    Parameters
    ----------
    - task: TaskOnKart
        Root task.
    - task_info_dump_path: str
        Output target file path. Path destination can be `local`, `S3`, or `GCS`.
        File extension must be '.pkl'.
    - ignore_task_names: Optional[List[str]]
        List of task names to ignore.
    - use_unique_id: bool = True
        Whether to use unique id to dump target file. Default is True.
    Returns
    -------
    None
    """
    extension = os.path.splitext(task_info_dump_path)[1]
    assert extension == '.pkl', f'File extention must be `.pkl`, not `{extension}`.'

    task_info_tree = make_task_info_tree(task,
                                         ignore_task_names=ignore_task_names)

    unique_id = task.make_unique_id() if use_unique_id else None

    task_info_target = make_target(file_path=task_info_dump_path,
                                   unique_id=unique_id)
    task_info_target.dump(obj=task_info_tree, lock_at_dump=False)
Пример #2
0
def dump_task_info_table(task: TaskOnKart,
                         task_info_dump_path: str,
                         ignore_task_names: Optional[List[str]] = None):
    """Dump a table containing information about dependent tasks.

    Parameters
    ----------
    - task: TaskOnKart
        Root task.
    - task_info_dump_path: str
        Output target file path. Path destination can be `local`, `S3`, or `GCS`.
        File extension can be any type that gokart file processor accepts, including `csv`, `pickle`, or `txt`.
        See `TaskOnKart.make_target module <https://gokart.readthedocs.io/en/latest/task_on_kart.html#taskonkart-make-target>` for details.
    - ignore_task_names: Optional[List[str]]
        List of task names to ignore.
    Returns
    -------
    None
    """
    task_info_table = make_task_info_as_table(
        task=task, ignore_task_names=ignore_task_names)

    unique_id = task.make_unique_id()

    task_info_target = make_target(file_path=task_info_dump_path,
                                   unique_id=unique_id)
    task_info_target.dump(obj=task_info_table, lock_at_dump=False)
Пример #3
0
def _get_output(task: TaskOnKart) -> Any:
    output = task.output()
    if isinstance(output, list) or isinstance(output, tuple):
        return [t.load() for t in output]
    if isinstance(output, dict):
        return {k: t.load() for k, t in output.items()}
    return output.load()
Пример #4
0
def make_task_info_tree(
        task: TaskOnKart,
        ignore_task_names: Optional[List[str]] = None) -> TaskInfo:
    with warnings.catch_warnings():
        warnings.filterwarnings(
            action='ignore',
            message='Task .* without outputs has no custom complete() method')
        is_task_complete = task.complete()

    name = task.__class__.__name__
    unique_id = task.make_unique_id()
    output_paths = [t.path() for t in luigi.task.flatten(task.output())]
    params = task.get_info(only_significant=True)
    processing_time = task.get_processing_time()
    if type(processing_time) == float:
        processing_time = str(processing_time) + 's'
    is_complete = ('COMPLETE' if is_task_complete else 'PENDING')
    task_log = dict(task.get_task_log())
    requires = _make_requires_info(task.requires())

    children = luigi.task.flatten(task.requires())
    children_task_infos: List[TaskInfo] = []
    for child in children:
        if ignore_task_names is None or child.__class__.__name__ not in ignore_task_names:
            children_task_infos.append(
                make_task_info_tree(child,
                                    ignore_task_names=ignore_task_names))
    return TaskInfo(name=name,
                    unique_id=unique_id,
                    output_paths=output_paths,
                    params=params,
                    processing_time=processing_time,
                    is_complete=is_complete,
                    task_log=task_log,
                    requires=requires,
                    children_task_infos=children_task_infos)
Пример #5
0
def _get_output(task: TaskOnKart) -> Any:
    output = task.output()
    if type(output) == list:
        return [x.load() for x in output]
    return output.load()