Exemplo n.º 1
0
    def test_customize_command(self):
        job = ShellJob(name='some_job',
                       inputs=['some_input', 'some_other_input'])
        some_event = Event(attributes={'some_attr': 'some_value'})
        some_other_event = Event(attributes={
            'some_attr': 'some_other_value',
            'yet_another_attr': 'yet_another_value'})
        execution_record = ExecutionRecord(instance=123, start_time=10)
        execution_record.events = [some_event, some_other_event]
        job.history = [execution_record]

        # Empty command.
        job.command = ''
        self.assertEqual('', job.customize_command())

        # Command with no attributes.
        job.command = 'some_command'
        self.assertEqual('some_command', job.customize_command())

        # Command with attributes.
        job.command = ('%(non_existent_attr)s %(some_attr)s '
                       '%(yet_another_attr)s')
        self.assertEqual(' some_value,some_other_value yet_another_value',
                         job.customize_command())

        # Command with percentage marks.
        job.command = ('%% some_command')
        self.assertEqual('% some_command', job.customize_command())
Exemplo n.º 2
0
    def test_execute_long_line(self, open_mock, exists_mock, get_s3_key_mock):
        file_mock = mock.MagicMock()
        open_mock.return_value = file_mock
        file_mock.__enter__.return_value = file_mock

        s3_key_mock = mock.MagicMock()
        get_s3_key_mock.return_value = s3_key_mock
        s3_key_mock.__enter__.return_value = s3_key_mock

        job = ShellJob(name='some_job',
                       command="printf \"%s\"" % ('a' * 20000),
                       emails=['*****@*****.**'],
                       warn_timeout_sec=10,
                       abort_timeout_sec=20)
        executor = ShellJobExecutor('some_workflow', '123', 'some_job',
                                    job, self._data_builder,
                                    self._emailer)

        self.assertTrue(executor.prepare())
        self.assertTrue(executor.execute())

        file_mock.write.assert_has_calls(
            [mock.call('a' * 16384), mock.call('a' * 3616)])

        exists_mock.assert_called_once_with(
            '/tmp/pinball_job_logs/some_workflow/123')

        self.assertEqual(1, len(executor.job.history))
        execution_record = executor.job.history[0]
        self.assertEqual(0, execution_record.exit_code)

        self.assertEqual(2, get_s3_key_mock.call_count)
Exemplo n.º 3
0
    def test_process_log_line(self):
        job = ShellJob(name='some_job',
                       command="echo ok",
                       emails=['*****@*****.**'],
                       warn_timeout_sec=10,
                       abort_timeout_sec=20)
        executor = ShellJobExecutor('some_workflow', '123', 'some_job', job,
                                    self._data_builder, self._emailer)
        import time
        execution_record = ExecutionRecord(instance=123456,
                                           start_time=time.time())
        executor.job.history.append(execution_record)

        executor._process_log_line("PINBALL:kv_job_url=j_id1|j_url1\n")
        executor._process_log_line("PINBALL:kv_job_url=j_id2|j_url2\n")
        executor._process_log_line("PINBALL:kv_job_url=j_id2|j_url2\n")
        executor._process_log_line("PINBALL:kill_id=qubole1/123\n")
        executor._process_log_line("PINBALL:kill_id=qubole2/456\n")
        executor._process_log_line("PINBALL:kill_id=qubole1/123\n")

        erp = executor._get_last_execution_record().properties
        self.assertEqual(len(erp), 2)

        self.assertIn('kv_job_url', erp.keys())
        self.assertEqual(type(erp['kv_job_url']), list)
        self.assertEqual(len(erp['kv_job_url']), 2)
        self.assertEqual(erp['kv_job_url'], ['j_id1|j_url1', 'j_id2|j_url2'])

        self.assertIn('kill_id', erp.keys())
        self.assertEqual(type(erp['kill_id']), list)
        self.assertEqual(len(erp['kill_id']), 2)
        self.assertEqual(erp['kill_id'], ['qubole1/123', 'qubole2/456'])
Exemplo n.º 4
0
    def test_retry(self):
        job = ShellJob(name='some_job')

        # Empty history.
        self.assertFalse(job.retry())

        # History with a successful execution.
        record = ExecutionRecord(instance=123, exit_code=0)
        job.history.append(record)
        self.assertRaises(AssertionError, job.retry)

        # History with too many failures.
        record = ExecutionRecord(instance=1234, exit_code=1)
        job.history.append(record)
        self.assertFalse(job.retry())

        # History without too many failures.
        job.max_attempts = 2
        self.assertTrue(job.retry())

        # History with too many failures in a different instance.
        job.history.append(record)
        record = ExecutionRecord(instance=12345, exit_code=1)
        job.history.append(record)
        self.assertTrue(job.retry())
Exemplo n.º 5
0
 def get_pinball_job(self, inputs, outputs, params=None):
     params = params if params else {}
     command = self._command % params
     max_attempts = (self._max_attempts if self._max_attempts is not None
                     else 1)
     return ShellJob(name=self.name, inputs=inputs, outputs=outputs,
                     emails=self._emails, max_attempts=max_attempts,
                     warn_timeout_sec=self._warn_timeout_sec,
                     abort_timeout_sec=self._abort_timeout_sec,
                     command=command)
Exemplo n.º 6
0
 def _get_child_job_token(self):
     name = Name(workflow='some_workflow',
                 instance='12345',
                 job_state=Name.WAITING_STATE,
                 job='child_job')
     job = ShellJob(name=name.job,
                    inputs=['parent_job'],
                    outputs=[],
                    command='echo child',
                    emails=['*****@*****.**'])
     return Token(name=name.get_job_token_name(), data=pickle.dumps(job))
Exemplo n.º 7
0
    def test_retry(self):
        job = ShellJob(name='some_job')

        # Empty history.
        self.assertFalse(job.retry())

        # History with a successful execution.
        record = ExecutionRecord(instance=123, exit_code=0)
        job.history.append(record)
        self.assertRaises(AssertionError, job.retry)

        # History with too many failures.
        record = ExecutionRecord(instance=1234, exit_code=1)
        job.history.append(record)
        self.assertFalse(job.retry())

        # History without too many failures.
        job.max_attempts = 2
        self.assertTrue(job.retry())

        # History with too many failures in a different instance.
        job.history.append(record)
        record = ExecutionRecord(instance=12345, exit_code=1)
        job.history.append(record)
        self.assertTrue(job.retry())
Exemplo n.º 8
0
    def _add_active_workflow_tokens(self):
        """Add some active workflow tokens.

        The job dependencies form a complete binary tree turned upside down.
        I.e., each job has two parents.
        """
        self._store = EphemeralStore()
        version = 1
        for level in range(AnalyzerTestCase._NUM_LEVELS):
            jobs_at_level = 2**(AnalyzerTestCase._NUM_LEVELS - level - 1)
            for job_index in range(jobs_at_level):
                job_name = 'job_%d_%d' % (level, job_index)
                event_name = Name(workflow='some_workflow',
                                  instance='123',
                                  job=job_name,
                                  event='some_event')
                if level == 0:
                    inputs = [
                        Name.WORKFLOW_START_INPUT,
                        Name.WORKFLOW_START_INPUT + '_prime'
                    ]
                    event_name.input = Name.WORKFLOW_START_INPUT
                else:
                    inputs = [
                        'job_%d_%d' % (level - 1, 2 * job_index),
                        'job_%d_%d' % (level - 1, 2 * job_index + 1)
                    ]
                    event_name.input = 'job_%d_%d' % (level - 1, 2 * job_index)
                if level == AnalyzerTestCase._NUM_LEVELS - 1:
                    outputs = []
                else:
                    outputs = ['job_%d_%d' % (level + 1, job_index / 2)]
                job = ShellJob(name=job_name,
                               inputs=inputs,
                               outputs=outputs,
                               command='some_command')
                job.history.append(ExecutionRecord())
                name = Name(workflow='some_workflow',
                            instance='123',
                            job_state=Name.WAITING_STATE,
                            job=job_name)
                job_token = Token(version=version,
                                  name=name.get_job_token_name(),
                                  priority=10,
                                  data=pickle.dumps(job))
                version += 1
                event = Event('some_event')
                event_token = Token(version=version,
                                    name=event_name.get_event_token_name(),
                                    priority=10,
                                    data=pickle.dumps(event))
                self._store.commit_tokens([job_token, event_token])
Exemplo n.º 9
0
    def get_pinball_job(self, inputs, outputs, params):
        assert 'job_repo_dir' in params and 'job_import_dirs_config' in params

        # Config job_params
        job_params = params.get('job_params', {})
        # TODO(csliu): end_date should always be passed from params
        if 'end_date' not in params:
            yesterday = str(datetime.utcnow().date() - timedelta(days=1))
            job_params['end_date'] = yesterday
        else:
            job_params['end_date'] = params['end_date']
        assert job_params, 'job_params should not be empty!'
        job_params_text = ','.join(
            ['%s=%s' % (k, v) for k, v in job_params.iteritems()])

        # Config executor for job runner. (Only ClusterJob needs executor)
        executor_params = ''
        if self._executor:
            executor_params = '--executor=%s ' % self._executor

        # Executor config.
        executor_config = ''
        if self._executor_config:
            executor_config_text = ','.join(
                ['%s=%s' % (k, v) for k, v in self._executor_config.iteritems()])
            executor_config = '--executor_config="%s" ' % executor_config_text

        runner_extra_params = ''
        if self.write_lock:
            runner_extra_params += '--write_lock=%s ' % self.write_lock

        # Construct job runner command line.
        job_runner_command = self.command_template % {
            'job_repo_dir': params['job_repo_dir'],
            'job_import_dirs_config': params['job_import_dirs_config'],
            'job_class_name': self._job_class_name,
            'job_params': job_params_text,
            'executor_params': executor_params,
            'executor_config': executor_config,
            'runner_extra_params': runner_extra_params
        }

        return ShellJob(name=self.name, inputs=inputs, outputs=outputs,
                        command=job_runner_command,
                        emails=self._emails, max_attempts=self._max_attempts,
                        warn_timeout_sec=self._warn_timeout_sec,
                        abort_timeout_sec=self._abort_timeout_sec)
Exemplo n.º 10
0
 def setUp(self):
     self._data_builder = mock.Mock()
     self._emailer = mock.Mock()
     job = ShellJob(name='some_job',
                    command='printf "line1\\nline2\\nline3";'
                            'printf "line1\\nline2" >&2',
                    emails=['*****@*****.**'],
                    warn_timeout_sec=10,
                    abort_timeout_sec=20)
     self._executor = ShellJobExecutor('some_workflow', '123', 'some_job',
                                       job, self._data_builder,
                                       self._emailer)
     # Set PinballConfig to enable s3 log saver
     PinballConfig.S3_LOGS_DIR_PREFIX = 's3n://pinball/tmp/'
     PinballConfig.S3_LOGS_DIR = \
         PinballConfig.S3_LOGS_DIR_PREFIX \
         + PinballConfig.JOB_LOG_PATH_PREFIX
Exemplo n.º 11
0
def _generate_job_token(workflow, instance, job, executions, max_jobs):
    if job == 0:
        inputs = [Name.WORKFLOW_START_INPUT]
    else:
        inputs = ['job_%d' % (job - 1)]
    if job == max_jobs - 1:
        outputs = []
    else:
        outputs = ['job_%d' % (job + 1)]
    shell_job = ShellJob(name='job_%d' % job,
                         inputs=inputs,
                         outputs=outputs,
                         command='some command %d' % job)
    for e in range(0, executions):
        start_time = 1000000 * workflow + 10000 * instance + 100 * job + e + 1
        end_time = start_time + 10 * e + 1
        DIR = '/tmp/pinball/logs'
        if not os.path.exists(DIR):
            os.makedirs(DIR)
        LOG_PATTERN = '%s/%%s.%%d.%%s' % DIR
        info_log_file = LOG_PATTERN % (job, start_time, 'info')
        with open(info_log_file, 'w') as f:
            f.write('some info log of execution %d' % e)
        error_log_file = LOG_PATTERN % (job, start_time, 'error')
        with open(error_log_file, 'w') as f:
            f.write('some error log of execution %d' % e)
        record = ExecutionRecord(info='some_command %d some_args %d' % (e, e),
                                 instance='instance_%d' % instance,
                                 start_time=start_time,
                                 end_time=end_time,
                                 exit_code=(workflow + instance + e) % 2,
                                 logs={
                                     'info': info_log_file,
                                     'error': error_log_file
                                 })
        shell_job.history.append(record)
    name = Name(workflow='workflow_%d' % workflow,
                instance='instance_%d' % instance,
                job_state=Name.WAITING_STATE,
                job='job_%d' % job)
    return Token(name=name.get_job_token_name(),
                 version=1000000 * workflow + 10000 * instance + 100 * job,
                 priority=job,
                 data=pickle.dumps(shell_job))
Exemplo n.º 12
0
    def test_customize_command(self):
        job = ShellJob(name='some_job',
                       inputs=['some_input', 'some_other_input'])
        some_event = Event(attributes={'some_attr': 'some_value'})
        some_other_event = Event(
            attributes={
                'some_attr': 'some_other_value',
                'yet_another_attr': 'yet_another_value'
            })
        execution_record = ExecutionRecord(instance=123, start_time=10)
        execution_record.events = [some_event, some_other_event]
        job.history = [execution_record]

        # Empty command.
        job.command = ''
        self.assertEqual('', job.customize_command())

        # Command with no attributes.
        job.command = 'some_command'
        self.assertEqual('some_command', job.customize_command())

        # Command with attributes.
        job.command = ('%(non_existent_attr)s %(some_attr)s '
                       '%(yet_another_attr)s')
        self.assertEqual(' some_value,some_other_value yet_another_value',
                         job.customize_command())

        # Command with percentage marks.
        job.command = ('%% some_command')
        self.assertEqual('% some_command', job.customize_command())