Python InMemoryDAG примеры использования

Язык программирования: Python

Пространство имен/Пакет: ploomber

Класс/Тип: InMemoryDAG

Примеров на hotexamples.com: 10

Python InMemoryDAG - 10 примеров найдено. Это лучшие примеры Python кода для ploomber.InMemoryDAG, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

InMemoryDAG(9)

build(7)

Основные методы

InMemoryDAG (9)

build (7)

Пример #1

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_copy(copy):
    def _assign_upstream(upstream):
        _assign_upstream.obj = upstream
        return 42

    dag_ = DAG()

    root = PythonCallable(_root,
                          File('root.parquet'),
                          dag_,
                          name='root',
                          serializer=serializer,
                          params={'input_data': {
                              'x': [0, 0, 0]
                          }})

    task = PythonCallable(_assign_upstream,
                          File('task.parquet'),
                          dag_,
                          name='task',
                          unserializer=unserializer,
                          serializer=serializer)

    root >> task

    dag = InMemoryDAG(dag_)

    out = dag.build({'root': {'x': [1]}}, copy=copy)

    # test that the function _assign_upstream received the same object
    # the task root returned in the upstream argument if copy is disabled.
    # if copying, then it should be a different object
    assert (_assign_upstream.obj['root'] is out['root']) is (not copy)

Пример #2

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_in_memory_dag(dag):

    dag_in_memory = InMemoryDAG(dag)

    out = dag_in_memory.build({'root': {'x': [1, 2, 3]}})

    assert out['root']['x'].tolist() == [1, 2, 3]
    assert out['task']['x'].tolist() == [2, 3, 4]

Пример #3

Показать файл

Файл: InMemoryDAG.py Проект: ploomber/ploomber

def make_predict():
    """Instantiate a prediction DAG using a previously trained model
    """
    dag_pred = DAG()

    # this special function adds a task with name "get" that will just forward
    # whatever value we pass when calling .build(). You can pass a function
    # in the "preprocessor" argument to perform arbitrary logic like parsing
    # or validation
    input_data_passer(dag=dag_pred,
                      name='get',
                      preprocessor=validate_input_data)

    # we re-use the same code that we used for training!
    add_features(dag_pred)

    # load model generated by the training graph
    with open(Path('output', 'model.pickle'), 'rb') as f:
        model = pickle.load(f)

    # add the final task, this special function just executes whatever
    # function we pass as the first argument, we can pass arbitrary parameters
    # using "params"
    predict_task = in_memory_callable(predict,
                                      dag=dag_pred,
                                      name='predict',
                                      params=dict(model=model))

    # predict after joining features
    dag_pred['join'] >> predict_task

    # convert our batch-processing pipeline to a in-memory one and return
    return InMemoryDAG(dag_pred)

Пример #4

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_input_data_passer():
    dag = DAG()

    root = input_data_passer(dag, name='root')
    task = PythonCallable(_add_one,
                          File('task.parquet'),
                          dag,
                          name='task',
                          unserializer=unserializer,
                          serializer=serializer)

    root >> task

    dag_ = InMemoryDAG(dag)

    assert dag_.build({'root': 1}) == {'root': 1, 'task': 2}

Пример #5

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_in_memory_callable():
    dag = DAG()

    def add_some(upstream, to_add):
        return upstream['root'] + to_add

    root = input_data_passer(dag, name='root')
    task = in_memory_callable(add_some,
                              dag,
                              name='task',
                              params=dict(to_add=2))

    root >> task

    dag_ = InMemoryDAG(dag)

    assert dag_.build({'root': 1}) == {'root': 1, 'task': 3}

Пример #6

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_error_if_a_task_returns_none():
    dag = DAG()

    PythonCallable(_return_none,
                   File('root.parquet'),
                   dag,
                   name='root',
                   params={'input_data': None},
                   serializer=serializer)

    dag_ = InMemoryDAG(dag)

    with pytest.raises(ValueError) as excinfo:
        dag_.build({'root': None})

    expected = ('All callables in a InMemoryDAG must return a value. '
                'Callable "_return_none", from task "root" returned None')
    assert str(excinfo.value) == expected

Пример #7

Показать файл

    def __init__(self):
        dag = self.init_dag_from_partial(self.get_partial())

        # TODO: add support for manually specifying upstream dependencies
        upstream = {
            name: dag[name].source.extract_upstream()
            for name in dag._iter()
        }

        # names of all tasks used as upstream
        upstream_tasks = chain(*upstream.values())

        # find tasks that are declared as upstream but do not exist in the dag
        missing = set(upstream_tasks) - set(dag)

        for name in missing:
            input_data_passer(dag, name=name)

        # TODO: maybe delete all upstream dependencies and set them again
        # (raise a warning if there are some upstream dependencies?)
        # this doesn't happen when we get a yaml file because we control
        # that using extract_upstream=False but might happen if we receive
        # a DAG object already
        # the dag is complete now, set all upstream dependencies
        for name in dag._iter():
            for dependency in upstream.get(name, []):
                dag[name].set_upstream(dag[dependency])

        # get all terminal nodes and make them a dependency of the  node
        terminal_current = [
            name for name, degree in dag._G.out_degree() if not degree
        ]

        # TODO: extract upstream and make sure they match with the ones in
        # terminal_current
        terminal = in_memory_callable(self.terminal_task,
                                      dag,
                                      name='terminal',
                                      params=self.terminal_params())

        for dependency in terminal_current:
            terminal.set_upstream(dag[dependency])

        self.in_memory = InMemoryDAG(dag)

Пример #8

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_error_if_non_compatible_tasks():
    dag = DAG()
    ShellScript('touch {{product}}', File('file.txt'), dag, name='task')

    with pytest.raises(TypeError) as excinfo:
        InMemoryDAG(dag)

    expected = ('All tasks in the DAG must be PythonCallable, '
                'got unallowed types: ShellScript')
    assert str(excinfo.value) == expected

Пример #9

Показать файл

class OnlineDAG(abc.ABC):
    """
    Execute partial DAGs in-memory. This is an abstract class, to use it.
    Create a subclass and provide the required static methods.

    See here for a complete example:
    https://github.com/ploomber/projects/blob/master/ml-online/src/ml_online/infer.py
    """

    # FIXME: add a way to customize
    def __init__(self):
        dag = self.init_dag_from_partial(self.get_partial())

        # TODO: add support for manually specifying upstream dependencies
        upstream = {
            name: dag[name].source.extract_upstream()
            for name in dag._iter()
        }

        # names of all tasks used as upstream
        upstream_tasks = chain(*upstream.values())

        # find tasks that are declared as upstream but do not exist in the dag
        missing = set(upstream_tasks) - set(dag)

        for name in missing:
            input_data_passer(dag, name=name)

        # TODO: maybe delete all upstream dependencies and set them again
        # (raise a warning if there are some upstream dependencies?)
        # this doesn't happen when we get a yaml file because we control
        # that using extract_upstream=False but might happen if we receive
        # a DAG object already
        # the dag is complete now, set all upstream dependencies
        for name in dag._iter():
            for dependency in upstream.get(name, []):
                dag[name].set_upstream(dag[dependency])

        # get all terminal nodes and make them a dependency of the  node
        terminal_current = [
            name for name, degree in dag._G.out_degree() if not degree
        ]

        # TODO: extract upstream and make sure they match with the ones in
        # terminal_current
        terminal = in_memory_callable(self.terminal_task,
                                      dag,
                                      name='terminal',
                                      params=self.terminal_params())

        for dependency in terminal_current:
            terminal.set_upstream(dag[dependency])

        self.in_memory = InMemoryDAG(dag)

    @classmethod
    def init_dag_from_partial(cls, partial):
        """Initialize partial returned by get_partial()
        """
        if isinstance(partial, (str, Path)):
            with open(partial) as f:
                tasks = yaml.safe_load(f)

            # cannot extract upstream because this is an incomplete DAG
            meta = {'extract_product': False, 'extract_upstream': False}
            spec = DAGSpec(
                {
                    'tasks': tasks,
                    'meta': meta
                },
                parent_path=Path(partial).parent,
            )

            return spec.to_dag()
        elif isinstance(partial, DAG):
            return partial
        else:
            raise TypeError(f'Expected {cls.__name__}.get_partial() to '
                            'return a str, pathlib.Path or ploomber.DAG, '
                            f'got {type(partial).__name__}')

    def predict(self, **kwargs):
        """
        Run the DAG

        Parameters
        ----------
        **kwargs
            One parameter per root task (task with no upstream dependencies)
            in the partial DAG.

        Returns
        -------
        A dictionary with {task_name: returned_value}
        """
        return self.in_memory.build(kwargs)

    @abc.abstractstaticmethod
    def get_partial():
        """
        Must return the location of a partial dag (str or pathlib.Path)
        """
        pass

    @abc.abstractstaticmethod
    def terminal_task(upstream, model):
        """
        Las function to execute. The ``upstream`` parameter contains the
        output of all tasks that have no downstream dependencies
        """
        pass

    @abc.abstractstaticmethod
    def terminal_params():
        """
        Must return a dictionary with parameters passed to ``terminal_task``
        """
        pass

Пример #10

Показать файл

Файл: test_inmemorydag.py Проект: ploomber/ploomber

def test_error_input_data(input_data, dag):
    dag_ = InMemoryDAG(dag)

    with pytest.raises(KeyError):
        dag_.build(input_data)