Пример #1
0
    def candidates():
        if 'MRJOB_CONF' in os.environ:
            yield expand_path(os.environ['MRJOB_CONF'])

        # $HOME isn't necessarily set on Windows, but ~ works
        # use os.path.join() so we don't end up mixing \ and /
        yield expand_path(os.path.join('~', '.mrjob.conf'))

        # this only really makes sense on Unix, so no os.path.join()
        yield '/etc/mrjob.conf'
Пример #2
0
    def candidates():
        if 'MRJOB_CONF' in os.environ:
            yield expand_path(os.environ['MRJOB_CONF'])

        # $HOME isn't necessarily set on Windows, but ~ works
        # use os.path.join() so we don't end up mixing \ and /
        yield expand_path(os.path.join('~', '.mrjob.conf'))

        # this only really makes sense on Unix, so no os.path.join()
        yield '/etc/mrjob.conf'
Пример #3
0
Файл: job.py Проект: Yelp/mrjob
    def _upload_attr(self, attr_name):
        """Helper for :py:meth:`archives`, :py:meth:`dirs`, and
        :py:meth:`files`"""
        attr_value = getattr(self, attr_name)

        # catch path instead of a list of paths
        if isinstance(attr_value, string_types):
            raise TypeError('%s must be a list or other sequence.' % attr_name)

        script_dir = os.path.dirname(self.mr_job_script())
        paths = []

        for path in attr_value:
            expanded_path = expand_path(path)

            if os.path.isabs(expanded_path):
                paths.append(path)
            else:
                # relative subdirs are confusing; people will expect them
                # to appear in a subdir, not the same directory as the script,
                # but Hadoop doesn't work that way
                if os.sep in path.rstrip(os.sep) and '#' not in path:
                    log.warning(
                        '%s: %s will appear in same directory as job script,'
                        ' not a subdirectory' % (attr_name, path))

                paths.append(os.path.join(script_dir, path))

        return paths
Пример #4
0
def real_mrjob_conf_path(conf_path=None):
    if conf_path is False:
        return None
    elif conf_path is None:
        return find_mrjob_conf()
    else:
        return expand_path(conf_path)
Пример #5
0
    def _upload_attr(self, attr_name):
        """Helper for :py:meth:`archives`, :py:meth:`dirs`, and
        :py:meth:`files`"""
        attr_value = getattr(self, attr_name)

        # catch path instead of a list of paths
        if isinstance(attr_value, string_types):
            raise TypeError('%s must be a list or other sequence.' % attr_name)

        script_dir = os.path.dirname(self.mr_job_script())
        paths = []

        for path in attr_value:
            expanded_path = expand_path(path)

            if os.path.isabs(expanded_path):
                paths.append(path)
            else:
                # relative subdirs are confusing; people will expect them
                # to appear in a subdir, not the same directory as the script,
                # but Hadoop doesn't work that way
                if os.sep in path.rstrip(os.sep) and '#' not in path:
                    log.warning(
                        '%s: %s will appear in same directory as job script,'
                        ' not a subdirectory' % (attr_name, path))

                paths.append(os.path.join(script_dir, path))

        return paths
Пример #6
0
    def libjars(self):
        """Optional list of paths of jar files to run our job with using
        Hadoop's ``-libjar`` option. Normally setting :py:attr:`LIBJARS`
        is sufficient.

        By default, this combines :option:`libjars` options from the command
        lines with :py:attr:`LIBJARS`, with command line arguments taking
        precedence. Paths from :py:attr:`LIBJARS` are interpreted as relative
        to the the directory containing the script (paths from the
        command-line are relative to the current working directory).

        Note that ``~`` and environment variables in paths will always be
        expanded by the job runner (see :mrjob-opt:`libjars`).

        .. versionadded:: 0.5.3
        """
        script_dir = os.path.dirname(self.mr_job_script())

        paths_from_libjars = []

        # libjar paths will eventually be combined with combine_path_lists,
        # which will expand environment variables. We don't want to assume
        # a path like $MY_DIR/some.jar is always relative ($MY_DIR could start
        # with /), but we also don't want to expand environment variables
        # prematurely.
        for path in self.LIBJARS or []:
            if os.path.isabs(expand_path(path)):
                paths_from_libjars.append(path)
            else:
                paths_from_libjars.append(os.path.join(script_dir, path))

        return combine_lists(paths_from_libjars, self.options.libjars)
Пример #7
0
    def candidates():
        # $HOME isn't necessarily set on Windows, but ~ works
        yield expand_path('~/.mrjob')

        if os.environ.get('PYTHONPATH'):
            for dirname in os.environ['PYTHONPATH'].split(os.pathsep):
                yield os.path.join(dirname, 'mrjob.conf')

        yield '/etc/mrjob.conf'
Пример #8
0
    def candidates():
        # $HOME isn't necessarily set on Windows, but ~ works
        yield expand_path('~/.mrjob')

        if os.environ.get('PYTHONPATH'):
            for dirname in os.environ['PYTHONPATH'].split(os.pathsep):
                yield os.path.join(dirname, 'mrjob.conf')

        yield '/etc/mrjob.conf'
Пример #9
0
    def candidates():
        """Return (path, deprecation_warning)"""
        if 'MRJOB_CONF' in os.environ:
            yield (expand_path(os.environ['MRJOB_CONF']), None)

        # $HOME isn't necessarily set on Windows, but ~ works
        # use os.path.join() so we don't end up mixing \ and /
        yield (expand_path(os.path.join('~', '.mrjob.conf')), None)

        # DEPRECATED:
        yield (expand_path(os.path.join('~', '.mrjob')),
                           'use ~/.mrjob.conf instead.')
        if os.environ.get('PYTHONPATH'):
            for dirname in os.environ['PYTHONPATH'].split(os.pathsep):
                yield (os.path.join(dirname, 'mrjob.conf'),
                      'Use $MRJOB_CONF to explicitly specify the path'
                       ' instead.')

        # this only really makes sense on Unix, so no os.path.join()
        yield ('/etc/mrjob.conf', None)
Пример #10
0
def combine_path_lists(*path_seqs):
    """Concatenate the given sequences into a list. Ignore None values.
    Resolve ``~`` (home dir) and environment variables, and expand globs
    that refer to the local filesystem."""
    results = []

    for path in combine_lists(*path_seqs):
        expanded = expand_path(path)
        # if we can't expand a glob, leave as-is (maybe it refers to
        # S3 or HDFS)
        paths = sorted(glob.glob(expanded)) or [expanded]

        results.extend(paths)

    return results
Пример #11
0
def _expanded_mrjob_conf_path(conf_path=None):
    """Return the path of a single conf file. If *conf_path* is ``False``,
    return ``None``, and if it's ``None``, return :py:func:`find_mrjob_conf`.
    Otherwise, expand environment variables and ``~`` in *conf_path* and
    return it.

    Confusingly, this function doesn't actually return a "real" path according
    to ``os.path.realpath()``; it just resolves environment variables and
    ``~``.
    """
    if conf_path is False:
        return None
    elif conf_path is None:
        return find_mrjob_conf()
    else:
        return expand_path(conf_path)
Пример #12
0
def _expanded_mrjob_conf_path(conf_path=None):
    """Return the path of a single conf file. If *conf_path* is ``False``,
    return ``None``, and if it's ``None``, return :py:func:`find_mrjob_conf`.
    Otherwise, expand environment variables and ``~`` in *conf_path* and
    return it.

    Confusingly, this function doesn't actually return a "real" path according
    to ``os.path.realpath()``; it just resolves environment variables and
    ``~``.
    """
    if conf_path is False:
        return None
    elif conf_path is None:
        return find_mrjob_conf()
    else:
        return expand_path(conf_path)
Пример #13
0
def _resolve_path(path):
    """Helper for :py:func:`parse_setup_cmd`.

    Resolve ``~`` (home dir) and environment variables in the
    given path, and unescape backslashes."""
    result = ''

    for m in ESCAPE_RE.finditer(path):
        if m.group('escaped'):
            result += m.group('escaped')
        elif m.group('unescaped'):
            result += expand_path(m.group('unescaped'))
        else:
            raise ValueError('No escaped character')

    return result
Пример #14
0
def _resolve_path(path):
    """Helper for :py:func:`parse_setup_cmd`.

    Resolve ``~`` (home dir) and environment variables in the
    given path, and unescape backslashes."""
    result = ''

    for m in _ESCAPE_RE.finditer(path):
        if m.group('escaped'):
            result += m.group('escaped')
        elif m.group('unescaped'):
            result += expand_path(m.group('unescaped'))
        else:
            raise ValueError('No escaped character')

    return result
Пример #15
0
def _load_opts_from_mrjob_conf(runner_alias, conf_path, already_loaded):
    """Helper for :py:func:`load_opts_from_mrjob_conf` for recursive use.
    This doesn't expand or default *conf_path*.
    """
    conf = _conf_object_at_path(conf_path)

    if conf is None:
        return [(None, {})]

    # don't load same conf file twice
    real_conf_path = os.path.realpath(conf_path)

    if real_conf_path in already_loaded:
        return []
    else:
        already_loaded.append(real_conf_path)

    # get configs for our runner out of conf file
    try:
        values = conf['runners'][runner_alias] or {}
    except (KeyError, TypeError, ValueError):
        values = {}

    inherited = []
    if conf.get('include', None):
        includes = conf['include']
        if isinstance(includes, string_types):
            includes = [includes]

        # handle includes in reverse order so that include order takes
        # precedence over inheritance
        for include in reversed(includes):
            # make include relative to (real) conf_path (see #1166)
            # expand ~ *before* joining to dir of including file (see #1308)
            include = os.path.join(os.path.dirname(real_conf_path),
                                   expand_path(include))

            inherited = _load_opts_from_mrjob_conf(
                runner_alias, include, already_loaded) + inherited

    return inherited + [(conf_path, values)]
Пример #16
0
def _load_opts_from_mrjob_conf(runner_alias, conf_path, already_loaded):
    """Helper for :py:func:`load_opts_from_mrjob_conf` for recursive use.
    This doesn't expand or default *conf_path*.
    """
    conf = _conf_object_at_path(conf_path)

    if conf is None:
        return [(None, {})]

    # don't load same conf file twice
    real_conf_path = os.path.realpath(conf_path)

    if real_conf_path in already_loaded:
        return []
    else:
        already_loaded.append(real_conf_path)

    # get configs for our runner out of conf file
    try:
        values = conf['runners'][runner_alias] or {}
    except (KeyError, TypeError, ValueError):
        values = {}

    inherited = []
    if conf.get('include', None):
        includes = conf['include']
        if isinstance(includes, string_types):
            includes = [includes]

        # handle includes in reverse order so that include order takes
        # precedence over inheritance
        for include in reversed(includes):
            # make include relative to (real) conf_path (see #1166)
            # expand ~ *before* joining to dir of including file (see #1308)
            include = os.path.join(os.path.dirname(real_conf_path),
                                   expand_path(include))

            inherited = _load_opts_from_mrjob_conf(runner_alias, include,
                                                   already_loaded) + inherited

    return inherited + [(conf_path, values)]
Пример #17
0
def combine_paths(*paths):
    """Returns the last value in *paths* that is not ``None``.
    Resolve ``~`` (home dir) and environment variables."""
    return expand_path(combine_values(*paths))