Exemplo n.º 1
0
def test_env_dict_initialized_with_replaced_env_dict():
    a = EnvDict({'a': {'b': 1}})
    a_mod = a._replace_flatten_keys({'env__a__b': 2})
    b = EnvDict(a_mod)

    # make sure the new object has the updated values
    assert b['a']['b'] == 2
Exemplo n.º 2
0
    def decorator(fn):
        _validate_and_modify_signature(fn)

        try:
            # FIXME: we should deprecate initializing from a decorator
            # with a dictionary, it isn't useful. leaving it for now
            if isinstance(source, Mapping):
                env_dict = EnvDict(source)
            else:
                # when the decorator is called without args, look for
                # 'env.yaml'
                env_dict = EnvDict.find(source or 'env.yaml')
        except Exception as e:
            raise RuntimeError('Failed to resolve environment using '
                               '@with_env decorator in function "{}". '
                               'Tried to call Env with argument: {}'.format(
                                   _get_function_name_w_module(fn),
                                   source)) from e

        fn._env_dict = env_dict

        @wraps(fn)
        def wrapper(*args, **kwargs):
            to_replace = {
                k: v
                for k, v in kwargs.items() if k.startswith('env__')
            }

            for key in to_replace.keys():
                kwargs.pop(key)

            env_dict_new = env_dict._replace_flatten_keys(to_replace)

            try:
                Env._init_from_decorator(env_dict_new,
                                         _get_function_name_w_module(fn))
            except Exception as e:
                current = Env.load()
                raise RuntimeError('Failed to initialize environment using '
                                   '@with_env decorator in function "{}". '
                                   'Current environment: {}'.format(
                                       _get_function_name_w_module(fn),
                                       repr(current))) from e

            Env._ref = _get_function_name_w_module(fn)

            try:
                res = fn(Env.load(), *args, **kwargs)
            except Exception as e:
                Env.end()
                raise e

            Env.end()

            return res

        return wrapper
Exemplo n.º 3
0
def _process_file_dir_or_glob(parser, dagspec_arg=None):
    """
    Process a file entry point file, directory or glob-like pattern,
    the initialized dag and parsed args

    Parameters
    ----------
    parser : CustomParser
        CLI arg parser
    """
    # NOTE: we must use parser.parse_entry_point_value() instead or
    # args.parse_args because calling the latter wont allow us to add more
    # cli parameters, but we want that to expose parms from env
    entry_point_value = dagspec_arg or parser.parse_entry_point_value()
    entry = EntryPoint(entry_point_value)

    if entry.type in {EntryPoint.Directory, EntryPoint.Pattern}:
        # pipelines initialized from directories or patterns cannot be
        # parametrized
        path_to_env = None
    # file
    else:
        path_to_env = default.path_to_env_from_spec(entry_point_value)

    if path_to_env:
        env_dict = EnvDict(path_to_env,
                           path_to_here=Path(entry_point_value).parent
                           if entry.type == EntryPoint.File else None)
        _add_cli_args_from_env_dict_keys(parser, env_dict)

    args = parser.parse_args()
    dagspec_arg = dagspec_arg or args.entry_point

    if hasattr(args, 'log'):
        if args.log is not None:
            logging.basicConfig(level=args.log.upper())

    entry_point = EntryPoint(dagspec_arg)

    # directory
    if entry_point.type == EntryPoint.Directory:
        dag = DAGSpec.from_directory(dagspec_arg).to_dag()
    # pattern
    elif entry_point.type == EntryPoint.Pattern:
        dag = DAGSpec.from_files(dagspec_arg).to_dag()
    # file
    else:
        if path_to_env:
            # and replace keys depending on passed cli args
            replaced = _env_keys_to_override(args, parser.static_args)
            env = env_dict._replace_flatten_keys(replaced)
            dag = DAGSpec(dagspec_arg, env=env).to_dag()
        else:
            dag = DAGSpec(dagspec_arg).to_dag()

    return dag, args
Exemplo n.º 4
0
def test_replace_value_casts_if_possible():
    env = EnvDict({'a': False, 'b': 1, 'c': 1.1})
    env._replace_value('True', ['a'])
    env._replace_value('2', ['b'])
    env._replace_value('2.2', ['c'])
    assert env.a is True
    assert env.b == 2
    assert env.c == 2.2
Exemplo n.º 5
0
def test_env_dict_initialized_with_env_dict(data, keys):
    original = EnvDict(data)
    env = EnvDict(original)

    # ensure we initialized the object correctly
    assert repr(env)
    assert str(env)

    # check default keys are correctly copied
    assert original._default_keys == env._default_keys

    # check we can access nested keys
    for key in keys:
        env = env[key]
    assert env == 1
Exemplo n.º 6
0
    def __init__(self, source='env.yaml'):
        """Start the environment

        Parameters
        ----------
        source: dict, pathlib.Path, str, optional
            If dict, loads it directly, if pathlib.Path or path, reads the file
            (assumes yaml format).

        Raises
        ------
        FileNotFoundError
            If source is None and an environment file cannot be found
            automatically
        RuntimeError
            If one environment has already started

        Returns
        -------
        ploomber.Env
            An environment object
        """
        if not isinstance(source, EnvDict):
            # try to initialize an EnvDict to perform validation, if any
            # errors occur, discard object
            try:
                source = EnvDict(source)
            except Exception:
                Env.__instance = None
                raise

        self._data = source
        self._fn_name = None
Exemplo n.º 7
0
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch):
    """
    DAGSpec can be initialized with a path to a spec or a dictionary, but
    they have a slightly different behavior. This ensure the cli passes
    the path, instead of a dictionary
    """
    mock_DAGSpec = Mock(wraps=parsers.DAGSpec)
    mock_default_path_to_env = Mock(
        wraps=parsers.default.path_to_env_from_spec)
    mock_EnvDict = Mock(wraps=parsers.EnvDict)

    monkeypatch.setattr(sys, 'argv', ['python'])
    monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec)
    monkeypatch.setattr(parsers.default, 'path_to_env_from_spec',
                        mock_default_path_to_env)
    monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict)

    parser = CustomParser()

    with parser:
        pass

    dag, args = _custom_command(parser)

    # ensure called using the path to the yaml spec
    mock_DAGSpec.assert_called_once_with('pipeline.yaml',
                                         env=EnvDict({'sample': False},
                                                     path_to_here='.'))

    # and EnvDict initialized from env.yaml
    mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()),
                                         path_to_here=Path('.'))
Exemplo n.º 8
0
def test_default_with_root(monkeypatch):
    mock = Mock(return_value='some_value')
    monkeypatch.setattr(default, 'find_root_recursively', mock)

    env = EnvDict(dict())

    assert env.root == 'some_value'
Exemplo n.º 9
0
def test_default(monkeypatch):
    monkeypatch.setattr(getpass, 'getuser', Mock(return_value='User'))
    monkeypatch.setattr(os, 'getcwd', Mock(return_value='/some_path'))

    env = EnvDict(dict())

    assert env.cwd == str(Path('/some_path').resolve())
    assert env.user == 'User'
Exemplo n.º 10
0
def test_attribute_error_message():
    env = EnvDict({'user': '******', 'cwd': 'cwd', 'root': 'root'})

    with pytest.raises(AttributeError) as excinfo_attr:
        env.aa

    with pytest.raises(KeyError) as excinfo_key:
        env['aa']

    assert str(excinfo_attr.value) == f"{env!r} object has no atttribute 'aa'"
    assert str(excinfo_key.value) == f'"{env!r} object has no key \'aa\'"'
Exemplo n.º 11
0
def test_error_when_loaded_obj_is_not_dict(content, type_, tmp_directory):
    path = Path(tmp_directory, 'file.yaml')
    path.write_text(content)

    with pytest.raises(ValueError) as excinfo:
        EnvDict('file.yaml')

    expected = ("Expected object loaded from 'file.yaml' to be "
                "a dict but got '{}' instead, "
                "verify the content").format(type_)
    assert str(excinfo.value) == expected
Exemplo n.º 12
0
def test_find(tmp_directory):
    path = Path('some', 'dir')
    path.mkdir(parents=True)
    Path('some', 'env.yaml').write_text('key: value')
    expected_here = str(Path('some').resolve())

    os.chdir(path)

    env = EnvDict.find('env.yaml')

    assert env.cwd == str(Path('.').resolve())
    assert env.here == expected_here
Exemplo n.º 13
0
def test_serialize_env_dict():
    # this tests an edge case due to EnvDict's implementation: to enable
    # accessing values in the underlying dictionary as attributes, we are
    # customizing __getattr__, however, when an object is unserialized,
    # Python tries to look for __getstate__ (which triggers calling
    # __getattr__), since it cannot find it, it will go to __getitem__
    # (given the current implementation of __getattr__). But __getitem__
    # uses self.preprocessed. At unserialization time, this attribute does
    # not exist yet!, which will cause another call to __getattr__. To avoid
    # this recursive loop, we have to prevent special methods to call
    # __getitem__ if they do not exist - EnvDict and Env objects are not
    # expected to be serialized but we have fix it anyway
    env = EnvDict({'a': 1})
    assert pickle.loads(pickle.dumps(env))
Exemplo n.º 14
0
def test_expand_raw_dict_nested():
    mapping = EnvDict({'key': 'value'})
    d = {
        'section': {
            'some_settting': '{{key}}'
        },
        'list': ['{{key}}', '{{key}}']
    }
    assert (expand_raw_dictionary(d, mapping) == {
        'section': {
            'some_settting': 'value'
        },
        'list': ['value', 'value']
    })
Exemplo n.º 15
0
def test_expand_raw_dictionaries_and_extract_tags():
    mapping = EnvDict({'key': 'value'})
    d = [{'some_setting': '{{key}}'}, {'another_setting': '{{key}}'}]
    expanded, tags = expand_raw_dictionaries_and_extract_tags(d, mapping)

    assert expanded == (
        {
            'some_setting': 'value',
        },
        {
            'another_setting': 'value'
        },
    )
    assert tags == {'key'}
Exemplo n.º 16
0
def test_adds_default_keys_if_they_dont_exist(monkeypatch):
    monkeypatch.setattr(getpass, 'getuser', Mock(return_value='User'))
    monkeypatch.setattr(os, 'getcwd', Mock(return_value='/some_path'))
    mock = Mock(return_value='some_value')
    monkeypatch.setattr(default, 'find_root_recursively', mock)
    monkeypatch.setattr(expand.default, 'find_root_recursively', mock)

    env = EnvDict({'a': 1}, path_to_here='/dir')

    assert env.cwd == str(Path('/some_path').resolve())
    assert env.here == str(Path('/dir').resolve())
    assert env.user == 'User'
    assert env.root == 'some_value'
    assert env.default_keys == {'cwd', 'here', 'user', 'root', 'now'}
Exemplo n.º 17
0
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch):
    """
    DAGSpec can be initialized with a path to a spec or a dictionary, but
    they have a slightly different behavior. This ensure the cli passes
    the path, instead of a dictionary
    """
    mock_DAGSpec = Mock(wraps=parsers.DAGSpec)
    mock_default_path_to_env = Mock(
        wraps=parsers.default.path_to_env_from_spec)
    mock_EnvDict = Mock(wraps=parsers.EnvDict)

    monkeypatch.setattr(sys, 'argv', ['python'])
    monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec)
    monkeypatch.setattr(parsers.default, 'path_to_env_from_spec',
                        mock_default_path_to_env)
    monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict)

    # ensure current timestamp does not change
    mock = Mock()
    mock.datetime.now().isoformat.return_value = 'current-timestamp'
    monkeypatch.setattr(expand, "datetime", mock)

    parser = CustomParser()

    with parser:
        pass

    dag, args = parser.load_from_entry_point_arg()

    # ensure called using the path to the yaml spec
    mock_DAGSpec.assert_called_once_with('pipeline.yaml',
                                         env=EnvDict({'sample': False},
                                                     path_to_here='.'))

    # and EnvDict initialized from env.yaml
    mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()),
                                         path_to_here=Path('.'))
Exemplo n.º 18
0
def test_replace_nested_flatten_key_env_dict():
    env = EnvDict({'a': {'b': 1}})
    new_env = env._replace_flatten_key(2, 'env__a__b')
    assert new_env.a.b == 2 and env is not new_env  # must return a copy
Exemplo n.º 19
0
def test_error_when_flatten_key_doesnt_exist():
    env = EnvDict({'a': 1})
    with pytest.raises(KeyError):
        env._replace_flatten_key(2, 'env__b')
Exemplo n.º 20
0
def test_default_keys(kwargs, expected):
    assert EnvDict(**kwargs).default_keys == expected
Exemplo n.º 21
0
def test_default_with_here_absolute(tmp_directory):
    here = str(Path(tmp_directory, 'dir').resolve())
    env = EnvDict(dict(), path_to_here=here)

    assert env.here == here
Exemplo n.º 22
0
def test_replace_flatten_key_env_dict():
    env = EnvDict({'a': 1})
    new_env = env._replace_flatten_key(2, 'env__a')
    assert new_env.a == 2 and env is not new_env  # must return a copy
Exemplo n.º 23
0
def test_default_with_here_relative(tmp_directory):
    Path('dir').mkdir()
    env = EnvDict(dict(), path_to_here='dir')
    assert env.here == str(Path(tmp_directory, 'dir').resolve())
Exemplo n.º 24
0
def test_expand_raw_dictionary():
    mapping = EnvDict({'key': 'value'})
    d = {'some_setting': '{{key}}'}
    assert expand_raw_dictionary(d, mapping) == {'some_setting': 'value'}
Exemplo n.º 25
0
def test_replace_nested_flatten_keys_env_dict():
    env = EnvDict({'a': {'b': 1, 'c': 1}})
    new_env = env._replace_flatten_keys({'env__a__b': 2, 'env__a__c': 2})
    assert (new_env.a.b == 2 and new_env.a.c == 2
            and env is not new_env)  # must return a copy
Exemplo n.º 26
0
    def _init(self, data, env, lazy_import, reload, parent_path,
              look_up_project_root_recursively):
        self._lazy_import = lazy_import

        # initialized with a path to a yaml file...
        if isinstance(data, (str, Path)):
            # TODO: test this
            if parent_path is not None:
                raise ValueError('parent_path must be None when '
                                 f'initializing {type(self).__name__} with '
                                 'a path to a YAML spec')
            # resolve the parent path to make sources and products unambiguous
            # even if the current working directory changes
            self._path = Path(data).resolve()
            self._parent_path = str(self._path.parent)

            if not Path(data).is_file():
                raise FileNotFoundError(
                    'Error initializing DAGSpec with argument '
                    f'{data!r}: Expected it to be a path to a YAML file, but '
                    'such file does not exist')

            content = Path(data).read_text()

            try:
                data = yaml.safe_load(content)
            except (yaml.parser.ParserError,
                    yaml.constructor.ConstructorError) as e:
                error = e
            else:
                error = None

            if error:
                if '{{' in content or '}}' in content:
                    raise DAGSpecInitializationError(
                        'Failed to initialize spec. It looks like '
                        'you\'re using placeholders (i.e. {{placeholder}}). '
                        'Make sure values are enclosed in parentheses '
                        '(e.g. key: "{{placeholder}}"). Original '
                        'parser error:\n\n'
                        f'{error}')
                else:
                    raise error

        # initialized with a dictionary...
        else:
            self._path = None
            # FIXME: add test cases, some of those features wont work if
            # _parent_path is None. We should make sure that we either raise
            # an error if _parent_path is needed or use the current working
            # directory if it's appropriate - this is mostly to make relative
            # paths consistent: they should be relative to the file that
            # contains them
            self._parent_path = (None if not parent_path else str(
                Path(parent_path).resolve()))

        self.data = data

        if isinstance(self.data, list):
            self.data = {'tasks': self.data}

        # validate keys defined at the top (nested keys are not validated here)
        self._validate_top_keys(self.data, self._path)

        logger.debug('DAGSpec enviroment:\n%s', pp.pformat(env))

        env = env or dict()
        path_to_defaults = default.path_to_env_from_spec(
            path_to_spec=self._path)

        if path_to_defaults:
            defaults = yaml.safe_load(Path(path_to_defaults).read_text())
            self.env = EnvDict(env,
                               path_to_here=self._parent_path,
                               defaults=defaults)
        else:
            self.env = EnvDict(env, path_to_here=self._parent_path)

        self.data, tags = expand_raw_dictionary_and_extract_tags(
            self.data, self.env)

        logger.debug('Expanded DAGSpec:\n%s', pp.pformat(data))

        # if there is a "location" top key, we don't have to do anything else
        # as we will just load the dotted path when .to_dag() is called
        if 'location' not in self.data:

            Meta.initialize_inplace(self.data)

            import_tasks_from = self.data['meta']['import_tasks_from']

            if import_tasks_from is not None:
                # when using a relative path in "import_tasks_from", we must
                # make it absolute...
                if not Path(import_tasks_from).is_absolute():
                    # use _parent_path if there is one
                    if self._parent_path:
                        self.data['meta']['import_tasks_from'] = str(
                            Path(self._parent_path, import_tasks_from))
                    # otherwise just make it absolute
                    else:
                        self.data['meta']['import_tasks_from'] = str(
                            Path(import_tasks_from).resolve())

                imported = yaml.safe_load(
                    Path(self.data['meta']['import_tasks_from']).read_text())

                if self.env is not None:
                    (imported,
                     tags_other) = expand_raw_dictionaries_and_extract_tags(
                         imported, self.env)
                    tags = tags | tags_other

                # relative paths here are relative to the file where they
                # are declared
                base_path = Path(self.data['meta']['import_tasks_from']).parent

                for task in imported:
                    add_base_path_to_source_if_relative(task,
                                                        base_path=base_path)

                self.data['tasks'].extend(imported)

            # check if there are any params declared in env, not used in
            # in the pipeline
            extra = set(self.env) - self.env.default_keys - tags

            if extra:
                warnings.warn('The following placeholders are declared in the '
                              'environment but '
                              f'unused in the spec: {extra}')

            self.data['tasks'] = [
                normalize_task(task) for task in self.data['tasks']
            ]

            # NOTE: for simple projects, project root is the parent folder
            # of pipeline.yaml, for package projects is the parent folder
            # of setup.py
            if look_up_project_root_recursively:
                project_root = (
                    None if not self._parent_path else
                    default.find_root_recursively(
                        starting_dir=self._parent_path,
                        filename=None if not self._path else self._path.name))
            else:
                project_root = self._parent_path

            # make sure the folder where the pipeline is located is in sys.path
            # otherwise dynamic imports needed by TaskSpec will fail
            with add_to_sys_path(self._parent_path, chdir=False):
                self.data['tasks'] = [
                    TaskSpec(t,
                             self.data['meta'],
                             project_root=project_root,
                             lazy_import=lazy_import,
                             reload=reload) for t in self.data['tasks']
                ]
        else:
            self.data['meta'] = Meta.empty()
Exemplo n.º 27
0
def test_add_cli_args_from_env_dict_keys():
    parser = ArgumentParser()

    _add_cli_args_from_env_dict_keys(parser, EnvDict({'a': 1}))

    assert {action.dest for action in parser._actions} == {'env__a', 'help'}