def candidates(): if 'MRJOB_CONF' in os.environ: yield expand_path(os.environ['MRJOB_CONF']) # $HOME isn't necessarily set on Windows, but ~ works # use os.path.join() so we don't end up mixing \ and / yield expand_path(os.path.join('~', '.mrjob.conf')) # this only really makes sense on Unix, so no os.path.join() yield '/etc/mrjob.conf'
def _upload_attr(self, attr_name): """Helper for :py:meth:`archives`, :py:meth:`dirs`, and :py:meth:`files`""" attr_value = getattr(self, attr_name) # catch path instead of a list of paths if isinstance(attr_value, string_types): raise TypeError('%s must be a list or other sequence.' % attr_name) script_dir = os.path.dirname(self.mr_job_script()) paths = [] for path in attr_value: expanded_path = expand_path(path) if os.path.isabs(expanded_path): paths.append(path) else: # relative subdirs are confusing; people will expect them # to appear in a subdir, not the same directory as the script, # but Hadoop doesn't work that way if os.sep in path.rstrip(os.sep) and '#' not in path: log.warning( '%s: %s will appear in same directory as job script,' ' not a subdirectory' % (attr_name, path)) paths.append(os.path.join(script_dir, path)) return paths
def real_mrjob_conf_path(conf_path=None): if conf_path is False: return None elif conf_path is None: return find_mrjob_conf() else: return expand_path(conf_path)
def libjars(self): """Optional list of paths of jar files to run our job with using Hadoop's ``-libjar`` option. Normally setting :py:attr:`LIBJARS` is sufficient. By default, this combines :option:`libjars` options from the command lines with :py:attr:`LIBJARS`, with command line arguments taking precedence. Paths from :py:attr:`LIBJARS` are interpreted as relative to the the directory containing the script (paths from the command-line are relative to the current working directory). Note that ``~`` and environment variables in paths will always be expanded by the job runner (see :mrjob-opt:`libjars`). .. versionadded:: 0.5.3 """ script_dir = os.path.dirname(self.mr_job_script()) paths_from_libjars = [] # libjar paths will eventually be combined with combine_path_lists, # which will expand environment variables. We don't want to assume # a path like $MY_DIR/some.jar is always relative ($MY_DIR could start # with /), but we also don't want to expand environment variables # prematurely. for path in self.LIBJARS or []: if os.path.isabs(expand_path(path)): paths_from_libjars.append(path) else: paths_from_libjars.append(os.path.join(script_dir, path)) return combine_lists(paths_from_libjars, self.options.libjars)
def candidates(): # $HOME isn't necessarily set on Windows, but ~ works yield expand_path('~/.mrjob') if os.environ.get('PYTHONPATH'): for dirname in os.environ['PYTHONPATH'].split(os.pathsep): yield os.path.join(dirname, 'mrjob.conf') yield '/etc/mrjob.conf'
def candidates(): """Return (path, deprecation_warning)""" if 'MRJOB_CONF' in os.environ: yield (expand_path(os.environ['MRJOB_CONF']), None) # $HOME isn't necessarily set on Windows, but ~ works # use os.path.join() so we don't end up mixing \ and / yield (expand_path(os.path.join('~', '.mrjob.conf')), None) # DEPRECATED: yield (expand_path(os.path.join('~', '.mrjob')), 'use ~/.mrjob.conf instead.') if os.environ.get('PYTHONPATH'): for dirname in os.environ['PYTHONPATH'].split(os.pathsep): yield (os.path.join(dirname, 'mrjob.conf'), 'Use $MRJOB_CONF to explicitly specify the path' ' instead.') # this only really makes sense on Unix, so no os.path.join() yield ('/etc/mrjob.conf', None)
def combine_path_lists(*path_seqs): """Concatenate the given sequences into a list. Ignore None values. Resolve ``~`` (home dir) and environment variables, and expand globs that refer to the local filesystem.""" results = [] for path in combine_lists(*path_seqs): expanded = expand_path(path) # if we can't expand a glob, leave as-is (maybe it refers to # S3 or HDFS) paths = sorted(glob.glob(expanded)) or [expanded] results.extend(paths) return results
def _expanded_mrjob_conf_path(conf_path=None): """Return the path of a single conf file. If *conf_path* is ``False``, return ``None``, and if it's ``None``, return :py:func:`find_mrjob_conf`. Otherwise, expand environment variables and ``~`` in *conf_path* and return it. Confusingly, this function doesn't actually return a "real" path according to ``os.path.realpath()``; it just resolves environment variables and ``~``. """ if conf_path is False: return None elif conf_path is None: return find_mrjob_conf() else: return expand_path(conf_path)
def _resolve_path(path): """Helper for :py:func:`parse_setup_cmd`. Resolve ``~`` (home dir) and environment variables in the given path, and unescape backslashes.""" result = '' for m in ESCAPE_RE.finditer(path): if m.group('escaped'): result += m.group('escaped') elif m.group('unescaped'): result += expand_path(m.group('unescaped')) else: raise ValueError('No escaped character') return result
def _resolve_path(path): """Helper for :py:func:`parse_setup_cmd`. Resolve ``~`` (home dir) and environment variables in the given path, and unescape backslashes.""" result = '' for m in _ESCAPE_RE.finditer(path): if m.group('escaped'): result += m.group('escaped') elif m.group('unescaped'): result += expand_path(m.group('unescaped')) else: raise ValueError('No escaped character') return result
def _load_opts_from_mrjob_conf(runner_alias, conf_path, already_loaded): """Helper for :py:func:`load_opts_from_mrjob_conf` for recursive use. This doesn't expand or default *conf_path*. """ conf = _conf_object_at_path(conf_path) if conf is None: return [(None, {})] # don't load same conf file twice real_conf_path = os.path.realpath(conf_path) if real_conf_path in already_loaded: return [] else: already_loaded.append(real_conf_path) # get configs for our runner out of conf file try: values = conf['runners'][runner_alias] or {} except (KeyError, TypeError, ValueError): values = {} inherited = [] if conf.get('include', None): includes = conf['include'] if isinstance(includes, string_types): includes = [includes] # handle includes in reverse order so that include order takes # precedence over inheritance for include in reversed(includes): # make include relative to (real) conf_path (see #1166) # expand ~ *before* joining to dir of including file (see #1308) include = os.path.join(os.path.dirname(real_conf_path), expand_path(include)) inherited = _load_opts_from_mrjob_conf( runner_alias, include, already_loaded) + inherited return inherited + [(conf_path, values)]
def _load_opts_from_mrjob_conf(runner_alias, conf_path, already_loaded): """Helper for :py:func:`load_opts_from_mrjob_conf` for recursive use. This doesn't expand or default *conf_path*. """ conf = _conf_object_at_path(conf_path) if conf is None: return [(None, {})] # don't load same conf file twice real_conf_path = os.path.realpath(conf_path) if real_conf_path in already_loaded: return [] else: already_loaded.append(real_conf_path) # get configs for our runner out of conf file try: values = conf['runners'][runner_alias] or {} except (KeyError, TypeError, ValueError): values = {} inherited = [] if conf.get('include', None): includes = conf['include'] if isinstance(includes, string_types): includes = [includes] # handle includes in reverse order so that include order takes # precedence over inheritance for include in reversed(includes): # make include relative to (real) conf_path (see #1166) # expand ~ *before* joining to dir of including file (see #1308) include = os.path.join(os.path.dirname(real_conf_path), expand_path(include)) inherited = _load_opts_from_mrjob_conf(runner_alias, include, already_loaded) + inherited return inherited + [(conf_path, values)]
def combine_paths(*paths): """Returns the last value in *paths* that is not ``None``. Resolve ``~`` (home dir) and environment variables.""" return expand_path(combine_values(*paths))