Exemplo n.º 1
0
def _to_target(value, from_string_kwargs=None):
    if value is None:
        return value

    from dbnd._core.task import Task

    if isinstance(value, Target):
        return value

    if isinstance(value, Task):
        if value.task_definition.single_result_output:
            return value.result
        return value.task_outputs

    airflow_op_task = _try_get_task_from_airflow_op(value)
    if airflow_op_task:
        return airflow_op_task.task_outputs

    if isinstance(value, six.string_types):
        from_string_kwargs = from_string_kwargs or {}
        return target(value, **from_string_kwargs)

    if isinstance(value, Path):
        from_string_kwargs = from_string_kwargs or {}
        return target(str(value), **from_string_kwargs)

    raise friendly_error.failed_to_convert_value_to_target(value)
Exemplo n.º 2
0
    def calc_init_value(self, value):
        if value is None:
            # it's None
            # TODO: may be we still can "normalize" the value
            return value
        if isinstance(value, Path):
            return target(str(value), config=self.target_config)
        if isinstance(value, Target):
            # it's deferred result - > we load it lately
            return value

        # we process value regardless parse!
        # cf_value.require_parse:

        if self.env_interpolation and isinstance(value, six.string_types):
            try:
                value = expand_env_var(value)
            except Exception as ex:
                logger.warning(
                    "failed to expand variable '%s' : %s", safe_string(value), str(ex)
                )

        # in case we are output and have value:
        # it's Target or it's str to be converted as target
        load_value = self.load_on_build and not self.is_output()

        return self.value_type.parse_value(
            value, load_value=load_value, target_config=self.target_config
        )
Exemplo n.º 3
0
 def list_partitions(self):
     all_files = self.fs.listdir(self.path)
     all_files = sorted(all_files)
     all_files = filter(lambda x: not self.is_meta_file(x), all_files)
     if self.flag_target:
         all_files = filter(lambda x: x != str(self.flag_target), all_files)
     return [
         target(
             p,
             fs=self._fs,
             config=self.config.as_file(),
             properties=self.properties,
             source=self.source,
         ) for p in all_files
     ]
Exemplo n.º 4
0
    def partition(self, name=NOTHING, extension=NOTHING, config=NOTHING, **kwargs):
        """
        :param config:
        :param name: file name of the partition. if not provided - "part-%04d" % ID
        :param extension: extension. if not provided -> default extension will be used
        :return: FileTarget that represents the partition.
        """
        if is_not_defined(name):
            name = "part-%04d" % self._auto_partition_count
            self._auto_partition_count += 1
        if is_not_defined(config):
            # only if it's a call not from file,folder - we set it as file
            config = self.config.as_file()

        if is_not_defined(extension):
            extension = config.get_ext()
        if extension:
            name += extension
        return target(self.path, name, config=config, fs=self._fs, **kwargs)
Exemplo n.º 5
0
    def __init__(self, path, fs, config=None, io_pipe=None, source=None):
        """
         If `flag` Defines a target directory with a flag-file (defaults to `_SUCCESS`) used
    to signify job success.

        This checks for two things:

        * the path exists (just like the S3Target)
        * the _SUCCESS file exists within the directory.

        Because Hadoop outputs into a directory and not a single file,
        the path is assumed to be a directory.

        :param path:
        :param fs:
        :param config:
        :param io_pipe:
        :param flag:
        :param source:
        """

        super(DirTarget, self).__init__(path=path,
                                        fs=fs,
                                        config=config,
                                        io_pipe=io_pipe,
                                        source=source)
        if path[-1] != "/" and path[-1] != "\\":
            raise ValueError("%s requires the path to be to a "
                             "directory.  It must end with a slash ( / )." %
                             self.__class__.__name__)
        flag = config.flag
        if flag is True:
            flag = DEFAULT_FLAG_FILE_NAME  # default value, otherwise override it

        self.flag_target = target(path, flag, fs=fs) if flag else None

        self._auto_partition_count = 0
        self._write_target = self.partition("part-0000")

        self.meta_files = config.meta_files