Пример #1
0
def test_upload():
    p1, p2 = Mock(), Mock()
    m = MetaProduct({'a': p1, 'b': p2})

    m.upload()

    p1.upload.assert_called_once_with()
    p2.upload.assert_called_once_with()
Пример #2
0
def test_unserialize_multipe_products(tmp_directory):
    Path('a.txt').write_text('contents of a')
    Path('b.txt').write_text('contents of b')

    obj = unserializer_txt(MetaProduct({'a': 'a.txt', 'b': 'b.txt'}))

    assert obj == {'a': 'contents of a', 'b': 'contents of b'}
Пример #3
0
def test_delete_metadata(tmp_directory):
    Path('.a.txt.metadata').touch()
    Path('.b.txt.metadata').touch()

    a = File('a.txt')
    b = File('b.txt')
    m = MetaProduct({'a': a, 'b': b})
    m.metadata.delete()

    assert not Path('.a.txt.metadata').exists()
    assert not Path('.b.txt.metadata').exists()
Пример #4
0
def test_serialize_multipe_products(tmp_directory):
    serializer_txt({
        'a': 'contents of a',
        'b': 'contents of b'
    }, MetaProduct({
        'a': 'a.txt',
        'b': 'b.txt'
    }))

    assert Path('a.txt').read_text() == 'contents of a'
    assert Path('b.txt').read_text() == 'contents of b'
Пример #5
0
    def __init__(self, product, dag, name=None, params=None):
        self._params = Params(params)

        if name is None:
            # use name inferred from the source object
            self._name = self._source.name

            if self._name is None:
                raise AttributeError('Task name can only be None if it '
                                     'can be inferred from the source object. '
                                     'For example, when the task receives a '
                                     'pathlib.Path, when using SourceLoader '
                                     'or in PythonCallable. Pass a value '
                                     'explicitly')
        else:
            self._name = name

        if not isinstance(dag, AbstractDAG):
            raise TypeError(
                f"'dag' must be an instance of DAG, got {type(dag)!r}")

        # NOTE: we should get rid of this, maybe just add hooks that are
        # called back on the dag object to avoid having a reference here
        self.dag = dag
        dag._add_task(self)

        if not hasattr(self, '_source'):
            raise RuntimeError(
                'self._source must be initialized before calling '
                '__init__ in Task')

        if self._source is None:
            raise TypeError(
                '_init_source must return a source object, got None')

        if isinstance(product, Product):
            self._product = product

            if self.PRODUCT_CLASSES_ALLOWED is not None:
                if not isinstance(self._product, self.PRODUCT_CLASSES_ALLOWED):
                    raise TypeError('{} only supports the following product '
                                    'classes: {}, got {}'.format(
                                        type(self).__name__,
                                        self.PRODUCT_CLASSES_ALLOWED,
                                        type(self._product).__name__))
        else:
            # if assigned a tuple/list of products, create a MetaProduct
            self._product = MetaProduct(product)

            if self.PRODUCT_CLASSES_ALLOWED is not None:
                if not all(
                        isinstance(p, self.PRODUCT_CLASSES_ALLOWED)
                        for p in self._product):
                    raise TypeError('{} only supports the following product '
                                    'classes: {}, got {}'.format(
                                        type(self).__name__,
                                        self.PRODUCT_CLASSES_ALLOWED,
                                        type(self._product).__name__))

        self._logger = logging.getLogger('{}.{}'.format(
            __name__,
            type(self).__name__))

        self.product.task = self
        self._client = None

        self.exec_status = TaskStatus.WaitingRender

        self._on_finish = None
        self._on_failure = None
        self._on_render = None
Пример #6
0
def test_can_iterate_over_products():
    p1 = File('1.txt')
    p2 = File('2.txt')
    m = MetaProduct([p1, p2])

    assert list(m) == [p1, p2]
Пример #7
0
def test_can_iterate_when_initialized_with_dictionary():
    p1 = File('1.txt')
    p2 = File('2.txt')
    m = MetaProduct({'a': p1, 'b': p2})

    assert list(m) == [p1, p2]
Пример #8
0
def test_get():
    a = File('1.txt')
    m = MetaProduct({'a': a})

    assert m.get('a') is a
    assert m.get('b') is None
Пример #9
0
def test_repr(arg, expected):
    assert repr(MetaProduct(arg)) == expected
Пример #10
0
 def product(self):
     # NOTE: this allows declaring a dag as a dependency for one task,
     # maybe create a metaclass that applies to DAGs and Task
     # We have to rebuild it since tasks might have been added
     return MetaProduct([t.product for t in self.values()])
Пример #11
0
 def product(self):
     # We have to rebuild it since tasks might have been added
     return MetaProduct([t.product for t in self.values()])
Пример #12
0
def test_serialize_multiple_products_validates_obj(tmp_directory):
    with pytest.raises(TypeError) as excinfo:
        serializer_txt(['a', 'b'], MetaProduct({'a': 'a.txt', 'b': 'b.txt'}))

    assert 'Error serializing task: if task generates multiple' in str(
        excinfo.value)
Пример #13
0
def test_serialize_multiple_products_validates_obj_keys(tmp_directory, obj):
    with pytest.raises(ValueError) as excinfo:
        serializer_txt(obj, MetaProduct({'a': 'a.txt', 'b': 'b.txt'}))

    assert 'Error serializing task' in str(excinfo.value)
    assert 'with valid keys' in str(excinfo.value)
Пример #14
0
    def __init__(self, source, product, dag, name, params=None):
        """
        All subclasses must implement the same constuctor to keep the API
        consistent, optional parameters after "params" are ok

        Parameters
        ----------
        source: str or pathlib.Path
            Source code for the task, for tasks that do not take source code
            as input (such as PostgresCopyFrom), this can be another thing. The
            source can be a template and can make references to any parameter
            in "params", "upstream" parameters or its own "product", not all
            Tasks have templated source (templating code is mostly used by
            Tasks that take SQL source code as input)
        product: Product
            The product that this task will create upon completion
        dag: DAG
            The DAG holding this task
        name: str
            A name for this task, if None a default will be assigned
        params: dict
            Extra parameters passed to the task on rendering (if templated
            source) or during execution (if not templated source)
        """
        self._params = params or {}
        self._name = name
        self._source = self._init_source(source)

        if dag is None:
            raise TypeError('DAG cannot be None')

        self.dag = dag
        dag._add_task(self)

        if self._source is None:
            raise TypeError('_init_source must return a value, got None')

        if not isinstance(self._source, Source):
            raise TypeError('_init_source must return a subclass of Source')

        if isinstance(product, Product):
            self._product = product

            if self.PRODUCT_CLASSES_ALLOWED is not None:
                if not isinstance(self._product, self.PRODUCT_CLASSES_ALLOWED):
                    raise TypeError('{} only supports the following product '
                                    'classes: {}, got {}'
                                    .format(type(self).__name__,
                                            self.PRODUCT_CLASSES_ALLOWED,
                                            type(self._product).__name__))
        else:
            # if assigned a tuple/list of products, create a MetaProduct
            self._product = MetaProduct(product)

            if self.PRODUCT_CLASSES_ALLOWED is not None:
                if not all(isinstance(p, self.PRODUCT_CLASSES_ALLOWED)
                           for p in self._product):
                    raise TypeError('{} only supports the following product '
                                    'classes: {}, got {}'
                                    .format(type(self).__name__,
                                            self.PRODUCT_CLASSES_ALLOWED,
                                            type(self._product).__name__))

        self._logger = logging.getLogger('{}.{}'.format(__name__,
                                                        type(self).__name__))

        self.product.task = self
        self.client = None

        self._status = TaskStatus.WaitingRender
        self.build_report = None
        self._on_finish = None
        self._on_failure = None