Exemplo n.º 1
0
    def run_mapred_job(self, key_prefix='', raise_on_error=False):
        rec = self.test_config

        # Build environment for command.
        env = None
        hadoop_command_env = rec.get('%shadoop_command_env' % key_prefix)
        if hadoop_command_env:
            env = dict(os.environ)
            env.update(hadoop_command_env)

        t0 = datetime.datetime.utcnow()

        return_code, output, errors = system_command(
            rec['%shadoop_command' % key_prefix],
            print_command=True,
            print_output=True,
            timeout=rec.get('%scommand_timeout_sec' % key_prefix),
            raise_on_error=False,
            shell=False,
            noop=rec.get('%snoop' % key_prefix, False),
            env=env)

        t1 = datetime.datetime.utcnow()
        td = t1 - t0

        rec['%sutc_begin' % key_prefix] = t0.isoformat()
        rec['%sutc_end' % key_prefix] = t1.isoformat()
        rec['%selapsed_sec' % key_prefix] = time_duration_to_seconds(td)
        rec['%serror' % key_prefix] = (return_code != 0)
        rec['%scommand_timed_out' % key_prefix] = (return_code == -1)
        rec['%sexit_code' % key_prefix] = return_code
        rec['%soutput' % key_prefix] = output
        rec['%serrors' % key_prefix] = errors
        rec['%sbytes_read_hdfs' % key_prefix] = float(
            regex_first_group('Bytes Read=(.*)',
                              errors,
                              return_on_no_match='nan',
                              search=True))
        rec['%sbytes_written_hdfs' % key_prefix] = float(
            regex_first_group('Bytes Written=(.*)',
                              errors,
                              return_on_no_match='nan',
                              search=True))
        rec['%shadoop_job_id' % key_prefix] = regex_first_group(
            'Running job: (job_[0-9_]+)', errors, search=True)

        if rec['%serror' % key_prefix]:
            raise Exception('Hadoop job failed')
Exemplo n.º 2
0
    def get_isilon_version_tuple(self, isi_version_output):
        def try_int(x):
            try:
                return int(x)
            except:
                return x

        s = regex_first_group('.*Isilon OneFS v(.*?) ', isi_version_output)
        return tuple(try_int(d) for d in s.split('.'))
Exemplo n.º 3
0
    def run_test(self):
        config = self.test_config

        self.hadoop_authenticate()
        self.configure_environment()

        base_directory = config['base_directory'] % config
        data_size_MB = config['data_size_MB']
        config['requested_data_size_MB'] = data_size_MB

        data_directory = '%s/TestDFSIO' % base_directory

        if not 'jar' in config: 
            config['jar'] = config['job_client_jar']

        generic_options = self.get_hadoop_parameters()
        generic_options.append('-Dtest.build.data=%s' % data_directory)

        options = []
        options.extend(['-nrFiles', '%s' % config['map_tasks']])
        options.extend(['-bufferSize', '%s' % config['buffer_size']])
        config['job_name'] = 'TestDFSIO,%(test)s,%(map_tasks)d,%(data_size_MB)dMB' % config
        file_size_MB = int(data_size_MB / config['map_tasks'])
        options.extend(['-size', '%dMB' % file_size_MB])
        options.append('-%s' % config['test'])

        generic_options.append('-Dmapreduce.job.name=%s' % config['job_name'])

        # TestDFSIO doesn't use the correct file system. We may need to set fs.defaultFS.
        # Note that this will break the subsequent fetch of job info.
        # viprfs://hwxecs1bucket1.ns1.Site1/benchmarks/TestDFSIO
        # default_fs = regex_first_group('(.*://.*/).*', base_directory)
        # if default_fs:
        #     generic_options.append('-Dfs.defaultFS=%s' % default_fs)
        #     original_default_fs = 'hdfs://hwxecs2-master-0.solarch.local:8020'
        #     generic_options.append('-Dmapreduce.jobhistory.done-dir=%s/mr-history/done' % original_default_fs)
        #     generic_options.append('-Dmapreduce.jobhistory.intermediate-done-dir=%s/mr-history/tmp' % original_default_fs)
        #     generic_options.append('-Dyarn.app.mapreduce.am.staging-dir=%s/user' % original_default_fs)            

        cmd = []
        cmd.extend(['hadoop', 'jar', config['jar'], 'TestDFSIO'])
        cmd.extend(generic_options)
        cmd.extend(options)
        config['hadoop_command'] = cmd

        self.run_mapred_job()

        if not config['error']:
            config['data_size_MB'] = float(regex_first_group('Total MBytes processed: (.*)', config['errors'], return_on_no_match='nan', search=True))
            config['total_io_rate_MB_per_sec'] = config['data_size_MB'] / config['elapsed_sec']
        self.record_result()
        if config['error']:
            raise Exception('Hadoop job failed')
Exemplo n.º 4
0
    def run_mapred_job(self):
        config = self.test_config

        with self.metrics_collector_context():
            self.start_metrics()

            # Build environment for command.
            env = None
            hadoop_command_env = config.get('hadoop_command_env')
            if hadoop_command_env:
                env = dict(os.environ)
                env.update(hadoop_command_env)

            logging.info('*****************************************************************');
            logging.info(config['test_desc'])
            
            t0 = datetime.datetime.utcnow()

            exit_code, output, errors = system_command(config['hadoop_command'], print_command=True, print_output=True, 
                raise_on_error=False, shell=False, noop=config['noop'], env=env,
                timeout=config.get('command_timeout_sec',None))
        
            t1 = datetime.datetime.utcnow()
            td = t1 - t0

            config['utc_begin'] = t0.isoformat()
            config['utc_end'] = t1.isoformat()
            config['elapsed_sec'] = time_duration_to_seconds(td)
            config['error'] = (exit_code != 0)
            config['command_timed_out'] = (exit_code == -1)
            config['exit_code'] = exit_code
            config['output'] = output
            config['errors'] = errors

            config['bytes_read_hdfs'] = float(regex_first_group('Bytes Read=(.*)', errors, return_on_no_match='nan', search=True))
            config['bytes_written_hdfs'] = float(regex_first_group('Bytes Written=(.*)', errors, return_on_no_match='nan', search=True))
            config['hadoop_job_id'] = regex_first_group('Running job: (job_[0-9_]+)', errors, search=True)

            self.get_completed_job_info()
Exemplo n.º 5
0
def run_query(query_config):
    rec = query_config
    print_output = rec.get('print_output', True)
    stream_id = rec.get('stream_id', 0)

    rec['db_name'] = rec['db_name'] % rec

    if rec.get('kill_all_yarn_jobs_before_each_query', False):
        kill_all_yarn_jobs()

    rec['query_filename_contents'] = read_file_to_string(rec['query_filename'])

    shell = False
    db_type = rec['db_type']

    # Build query command.

    if db_type == 'hawq':
        cmd = []
        cmd.extend(['psql'])
        cmd.extend(['-v', 'ON_ERROR_STOP=1'])
        cmd.extend(['-d', rec['db_name']])
        cmd.extend(['-tAf', rec['query_filename']])

    elif db_type == 'hive':
        if not 'hiveconf:hive.tez.java.opts' in rec and 'java_opts_xmx_ratio' in rec and 'hiveconf:hive.tez.container.size' in rec:
            rec['hiveconf:hive.tez.java.opts'] = '-Xmx%dm' % (
                rec['hiveconf:hive.tez.container.size'] *
                rec['java_opts_xmx_ratio'])
        hiveconf = []
        for k, v in rec.items():
            prop = regex_first_group('^hiveconf:(.*)', k)
            if prop:
                hiveconf.extend(['--hiveconf', '"%s=%s"' % (prop, v)])
        cmd = []
        cmd.extend(['hive'])
        cmd.extend(['--database', rec['db_name']])
        cmd.extend(['-f', rec['query_filename']])
        if 'hive_init_file' in rec:
            cmd.extend(['-i', rec['hive_init_file']])
            # Record contents of file in result.
            rec['hive_init_file_contents'] = read_file_to_string(
                rec['hive_init_file'])
        cmd.extend(hiveconf)

    elif db_type == 'impala':
        cmd = []
        cmd.extend(['impala-shell'])
        cmd.extend([
            '--impalad',
            '%s:%d' % (rec.get('impalad_host',
                               'localhost'), rec.get('impalad_port', 21000))
        ])
        cmd.extend(['--database', rec['db_name']])
        cmd.extend(['-f', rec['query_filename']])
        cmd.extend(['-B'])  # turn off pretty printing
        cmd.extend(['-o', '/dev/null'])
        if rec.get('profile_query'):
            cmd.extend(['--show_profiles'])

    else:
        raise ('Unknown db_type')

    logging.info('%d: # %s' % (stream_id, ' '.join(cmd)))
    rec['query_command'] = cmd

    t0 = datetime.datetime.utcnow()

    # Run query.

    return_code, output, errors = system_command(cmd,
                                                 print_command=False,
                                                 print_output=print_output,
                                                 timeout=rec.get(
                                                     'command_timeout_sec',
                                                     None),
                                                 raise_on_error=False,
                                                 shell=shell)

    t1 = datetime.datetime.utcnow()
    td = t1 - t0

    rec['utc_begin'] = t0.isoformat()
    rec['utc_end'] = t1.isoformat()
    rec['elapsed_sec'] = time_duration_to_seconds(td)
    rec['error'] = (return_code != 0)
    rec['exit_code'] = return_code
    rec['command_timed_out'] = (return_code == -1)
    rec['output'] = output
    rec['errors'] = errors
    rec['record_type'] = 'query_result'

    # Parse query output to determine elapsed time and rows returned.

    if db_type == 'hive':
        rec['application_id'] = regex_first_group(
            '\\(Executing on YARN cluster with App id (application_.*)\\)$',
            errors,
            return_on_no_match=None,
            search=True,
            flags=re.MULTILINE)

        # Extract actual query duration from stderr text. Note that we must find the last occurance of 'Time taken'.
        query_elapsed_sec = regex_first_group('Time taken: ([0-9.]+) seconds',
                                              errors,
                                              return_on_no_match='nan',
                                              search=True,
                                              flags=re.MULTILINE,
                                              match_last=True)
        if query_elapsed_sec == 'nan':
            logging.warn('Time taken not returned by command.')
            rec['error'] = True
        rec['query_elapsed_sec'] = float(query_elapsed_sec)
        rec['non_query_elapsed_sec'] = rec['elapsed_sec'] - rec[
            'query_elapsed_sec']

        # Extract row count from stderr text. Note that some queries will not report fetched rows.
        query_rows_returned = regex_first_group('Fetched: ([0-9]+) row',
                                                errors,
                                                return_on_no_match='0',
                                                search=True,
                                                flags=re.MULTILINE)
        rec['query_rows_returned'] = int(query_rows_returned)

        logging.info(
            'error=%d, query_elapsed_sec=%f, non_query_elapsed_sec=%f, query_rows_returned=%d'
            % (rec['error'], rec['query_elapsed_sec'],
               rec['non_query_elapsed_sec'], rec['query_rows_returned']))

    elif db_type == 'impala':
        # Extract actual query duration from stderr text.
        # Fetched 100 row(s) in 0.98s
        query_elapsed_sec = regex_first_group(
            'Fetched [0-9]+ row\\(s\\) in ([0-9.]+)s',
            errors,
            return_on_no_match='nan',
            search=True,
            flags=re.MULTILINE,
            match_last=True)
        if query_elapsed_sec == 'nan':
            logging.warn('Time taken not returned by command.')
            rec['error'] = True
        rec['query_elapsed_sec'] = float(query_elapsed_sec)
        rec['non_query_elapsed_sec'] = rec['elapsed_sec'] - rec[
            'query_elapsed_sec']

        # Extract row count from stderr text. Note that some queries will not report fetched rows.
        query_rows_returned = regex_first_group('Fetched ([0-9]+) row\\(s\\)',
                                                errors,
                                                return_on_no_match='0',
                                                search=True,
                                                flags=re.MULTILINE)
        rec['query_rows_returned'] = int(query_rows_returned)

        logging.info(
            'error=%d, query_elapsed_sec=%f, non_query_elapsed_sec=%f, query_rows_returned=%d'
            % (rec['error'], rec['query_elapsed_sec'],
               rec['non_query_elapsed_sec'], rec['query_rows_returned']))

    else:
        rec['query_elapsed_sec'] = rec['elapsed_sec']
        rec['non_query_elapsed_sec'] = 0.0
        rec['query_rows_returned'] = np.nan

    # Handle errors.

    if rec['error']:
        logging.info('%d: return_code=%d' % (stream_id, return_code))
        if not print_output:
            logging.info('%d: %s' % (stream_id, output))

        if db_type == 'hive':
            # Kill YARN application
            if rec['application_id']:
                kill_yarn_job(rec['application_id'])

    if errors != '':
        if not print_output:
            logging.info('%d: %s' % (stream_id, errors))

    if not rec['error']:
        logging.info('%d: %s: %0.3f seconds' %
                     (stream_id, rec['query_filename'], rec['elapsed_sec']))

    return rec