示例#1
0
    def _interpret_history_log(self, log_interpretation):
        """Find and interpret the history log, storing the
        interpretation in ``log_interpretation['history']``."""
        if 'history' not in log_interpretation:
            # get job ID from output of hadoop command
            step_interpretation = log_interpretation.get('step') or {}
            job_id = step_interpretation.get('job_id')

            if not job_id:
                log.warning("Can't fetch history without job ID")
                return {}

            def stream_history_log_dirs():
                for log_dir in unique(
                        self._hadoop_log_dirs(
                            output_dir=step_interpretation.get('output_dir'))):

                    if self.fs.exists(log_dir):
                        log.info('Looking for history log in %s' % log_dir)
                        yield [log_dir]

            # wrap _ls_history_logs() to add logging
            def ls_history_logs():
                # there should be at most one history log
                for match in _ls_history_logs(self.fs,
                                              stream_history_log_dirs(),
                                              job_id=job_id):
                    log.info('Found history log: %s' % match['path'])
                    yield match

            log_interpretation['history'] = _interpret_history_log(
                self.fs, ls_history_logs())

        return log_interpretation['history']
示例#2
0
    def _interpret_history_log(self, step_info):
        if 'history' not in step_info:
            job_id = step_info.get('job_id')

            if not job_id:
                log.warning("Can't fetch history without job ID")
                return None

            def stream_history_log_dirs():
                for log_dir in unique(
                        self._hadoop_log_dirs(
                            output_dir=step_info.get('output_dir'))):

                    if self.fs.exists(log_dir):
                        log.info('Looking for history log in %s' % log_dir)
                        yield [log_dir]

            # wrap _ls_history_logs() to add logging
            def ls_history_logs():
                # there should be at most one history log
                for match in _ls_history_logs(self.fs,
                                              stream_history_log_dirs(),
                                              job_id=job_id):
                    log.info('Found history log: %s' % match['path'])
                    yield match

            step_info['history'] = _interpret_history_log(
                self.fs, ls_history_logs())

        return step_info['history']
示例#3
0
文件: hadoop.py 项目: imtiaz39/mrjob
    def _interpret_history_log(self, log_interpretation):
        """Find and interpret the history log, storing the
        interpretation in ``log_interpretation['history']``."""
        if 'history' not in log_interpretation:
            # get job ID from output of hadoop command
            step_interpretation = log_interpretation.get('step') or {}
            job_id = step_interpretation.get('job_id')

            if not job_id:
                log.warning("Can't fetch history without job ID")
                return {}

            def stream_history_log_dirs():
                for log_dir in unique(
                        self._hadoop_log_dirs(
                            output_dir=step_interpretation.get('output_dir'))):

                    if self.fs.exists(log_dir):
                         log.info('Looking for history log in %s' % log_dir)
                         yield [log_dir]

            # wrap _ls_history_logs() to add logging
            def ls_history_logs():
                # there should be at most one history log
                for match in _ls_history_logs(
                        self.fs, stream_history_log_dirs(), job_id=job_id):
                    log.info('Found history log: %s' % match['path'])
                    yield match

            log_interpretation['history'] = _interpret_history_log(
                self.fs, ls_history_logs())

        return log_interpretation['history']
示例#4
0
文件: hadoop.py 项目: BeeswaxIO/mrjob
    def _interpret_history_log(self, step_info):
        if 'history' not in step_info:
            job_id = step_info.get('job_id')

            if not job_id:
                log.warning("Can't fetch history without job ID")
                return None

            def stream_history_log_dirs():
                for log_dir in unique(
                        self._hadoop_log_dirs(
                            output_dir=step_info.get('output_dir'))):

                    if self.fs.exists(log_dir):
                         log.info('Looking for history log in %s' % log_dir)
                         yield [log_dir]

            # wrap _ls_history_logs() to add logging
            def ls_history_logs():
                # there should be at most one history log
                for match in _ls_history_logs(
                        self.fs, stream_history_log_dirs(), job_id=job_id):
                    log.info('Found history log: %s' % match['path'])
                    yield match

            step_info['history'] = _interpret_history_log(
                self.fs, ls_history_logs())

        return step_info['history']
示例#5
0
文件: mixin.py 项目: Dean838/mrjob
    def _interpret_history_log(self, log_interpretation):
        """Fetch history log and add 'history' to log_interpretation."""
        if 'history' in log_interpretation:
            return   # already interpreted

        step_interpretation = log_interpretation.get('step') or {}

        job_id = step_interpretation.get('job_id')
        if not job_id:
            log.warning("Can't fetch history log; missing job ID")
            return

        output_dir = step_interpretation.get('output_dir')

        log_interpretation['history'] = _interpret_history_log(
            self.fs, self._ls_history_logs(
                job_id=job_id, output_dir=output_dir))
示例#6
0
文件: mixin.py 项目: mtai/mrjob
    def _interpret_history_log(self, log_interpretation):
        """Fetch history log and add 'history' to log_interpretation."""
        if 'history' in log_interpretation:
            return  # already interpreted

        step_interpretation = log_interpretation.get('step') or {}

        job_id = step_interpretation.get('job_id')
        if not job_id:
            log.warning("Can't fetch history log; missing job ID")
            return

        output_dir = step_interpretation.get('output_dir')

        log_interpretation['history'] = _interpret_history_log(
            self.fs, self._ls_history_logs(job_id=job_id,
                                           output_dir=output_dir))
示例#7
0
 def interpret_history_log(self, matches):
     """Wrap _interpret_history_log(), since fs doesn't matter."""
     return _interpret_history_log(self.mock_fs, matches)
示例#8
0
 def interpret_history_log(self, matches):
     """Wrap _interpret_history_log(), since fs doesn't matter."""
     return _interpret_history_log(self.mock_fs, matches)