Exemplo n.º 1
0
    def test_status_command_with_sched(self):
        """Test status command when test is 'SCHEDULED'."""

        cfg = file_format.TestConfigLoader().validate({
            'scheduler': 'raw',
            'run': {
                'env': {
                    'foo': 'bar',
                },
                'cmds': ['sleep 1'],
            },
        })

        cfg['name'] = 'testytest'

        test = self._quick_test(cfg, build=False, finalize=False)

        test.build()
        schedulers.get_plugin(test.scheduler) \
            .schedule_test(self.pav_cfg, test)

        status_cmd = commands.get_command('status')
        status_cmd.outfile = io.StringIO()

        parser = argparse.ArgumentParser()
        status_cmd._setup_arguments(parser)
        args = parser.parse_args([str(test.id)])
        test.status.set(status_file.STATES.SCHEDULED, "faker")
        self.assertEqual(status_cmd.run(self.pav_cfg, args), 0)

        parser = argparse.ArgumentParser()
        status_cmd._setup_arguments(parser)
        args = parser.parse_args(['-j', str(test.id)])
        test.status.set(status_file.STATES.SCHEDULED, "faker")
        self.assertEqual(status_cmd.run(self.pav_cfg, args), 0)
Exemplo n.º 2
0
    def test_node_range(self):
        """Make sure node ranges work properly."""

        slurm = schedulers.get_plugin('slurm')

        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'

        for num_nodes in '1-10000000', '1-all':
            # We're testing that everything works when we ask for a max number
            # of nodes and don't get them all.
            cfg['slurm']['num_nodes'] = num_nodes

            test = self._quick_test(cfg=cfg, name='slurm_test')
            test.build()

            slurm.schedule_test(self.pav_cfg, test)
            timeout = time.time() + self.TEST_TIMEOUT

            while time.time() < timeout:
                status = slurm.job_status(self.pav_cfg, test)
                if status.state == STATES.COMPLETE:
                    break
                time.sleep(.5)
            else:
                # We timed out.
                slurm.cancel_job(test)
                self.fail("Test {} at {} did not complete within {} secs with "
                          "num_nodes of {}.".format(test.id, test.path,
                                                    self.TEST_TIMEOUT,
                                                    num_nodes))

        results = test.load_results()
        self.assertEqual(results['result'], result_parsers.PASS)
Exemplo n.º 3
0
    def is_done(self):
        """Returns True if all the tests in the set are completed."""

        if self.done:
            return True

        all_tests_passed = True

        for test_name, test_obj in self.series_obj.tests.items():
            # check if test object even exists
            if test_obj is None:
                return False

            # update the status
            if test_obj.job_id:
                test_sched = schedulers.get_plugin(test_obj.scheduler)
                test_sched.job_status(self.pav_cfg, test_obj)

            # check if RUN_COMPLETE exists
            if not (test_obj.path/'RUN_COMPLETE').exists():
                return False

            # check if test passed
            try:
                if test_obj.results['result'] != 'PASS':
                    all_tests_passed = False
            except KeyError:
                all_tests_passed = False

        # if all_tests_passed is still True, update object variable
        self.all_pass = all_tests_passed

        self.done = True
        return True
Exemplo n.º 4
0
    def test_job_status(self):
        """Make sure we can get a slurm job status."""

        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'
        test = self._quick_test(cfg, name='slurm_job_status', finalize=False)

        slurm = schedulers.get_plugin('slurm')

        # Steal a running job's ID, and then check our status.
        test.status.set(STATES.SCHEDULED, "not really though.")
        test.job_id = self._get_job('JobState=RUNNING')
        status = slurm.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.SCHEDULED)
        self.assertIn('RUNNING', status.note)

        # Steal a canceled jobs id
        test.status.set(STATES.SCHEDULED, "not really though.")
        test.job_id = self._get_job('JobState=CANCELLED')
        sched_status = slurm.job_status(self.pav_cfg, test)
        self.assertEqual(sched_status.state, STATES.SCHED_CANCELLED)
        status = test.status.current()
        self.assertEqual(status.state, STATES.SCHED_CANCELLED)

        # Check another random state. In this case, all pavilion will
        # just consider the test still scheduled.
        test.status.set(STATES.SCHEDULED, "not really though.")
        test.job_id = self._get_job('JobState=COMPLETED')
        sched_status = slurm.job_status(self.pav_cfg, test)
        self.assertEqual(sched_status.state, STATES.SCHEDULED)
        self.assertIn('COMPLETED', sched_status.note)
Exemplo n.º 5
0
    def test_status_history(self):
        # Testing that status works with history flag
        status_cmd = commands.get_command('status')
        out = io.StringIO()
        status_cmd.outfile = out

        parser = argparse.ArgumentParser()
        status_cmd._setup_arguments(parser)

        test = self._quick_test()
        raw = schedulers.get_plugin('raw')
        raw.schedule_test(self.pav_cfg, test)
        end = time.time() + 5
        while test.check_run_complete() is None and time.time() < end:
            time.sleep(.1)

        args = parser.parse_args(['--history', str(test.id)])
        self.assertEqual(status_cmd.run(self.pav_cfg, args), 0)

        out.seek(0)
        output = out.readlines()[4:]
        statuses = test.status.history()
        self.assertEqual(len(output), len(statuses))
        for i in range(len(output)):
            self.assertTrue(statuses[i].state in output[i])
Exemplo n.º 6
0
    def test_cancel_job(self):
        """Create a series of tests and kill them under different
        circumstances."""

        # This test will just sleep for a bit.
        cfg = self._quick_test_cfg()
        cfg['run']['cmds'] = ['sleep 100']

        test = self._quick_test(cfg=cfg)
        test.build()

        raw = schedulers.get_plugin('raw')

        raw.schedule_test(self.pav_cfg, test)

        timeout = time.time() + 1
        while (raw.job_status(self.pav_cfg, test).state == STATES.SCHEDULED
               and time.time() < timeout):
            time.sleep(.1)

        # The test should be running
        self.assertEqual(test.status.current().state, STATES.RUNNING)

        _, pid = test.job_id.split('_')

        self.assertEqual(raw.cancel_job(test).state, STATES.SCHED_CANCELLED)
Exemplo n.º 7
0
def status_from_test_obj(pav_cfg, test_obj):
    """Takes a test object or list of test objects and creates the dictionary
    expected by the print_status function.

:param dict pav_cfg: Pavilion base configuration.
:param Union[TestRun,[TestRun] test_obj: Pavilion test object.
:return: List of dictionary objects containing the test ID, name,
         statt time of state update, and note associated with that state.
:rtype: list(dict)
    """
    if not isinstance(test_obj, list):
        test_obj = [test_obj]

    test_statuses = []

    for test in test_obj:
        status_f = test.status.current()

        if status_f.state == STATES.SCHEDULED:
            sched = schedulers.get_plugin(test.scheduler)
            status_f = sched.job_status(pav_cfg, test)

        test_statuses.append({
            'test_id': test.id,
            'name': test.name,
            'state': status_f.state,
            'time': status_f.when,
            'note': status_f.note,
        })

    test_statuses.sort(key=lambda x: x['test_id'])
    return test_statuses
Exemplo n.º 8
0
    def test_log_arguments(self):
        log_cmd = commands.get_command('log')

        parser = argparse.ArgumentParser()
        log_cmd._setup_arguments(parser)

        # run a simple test
        test = self._quick_test(finalize=False)
        raw = schedulers.get_plugin('raw')

        raw.schedule_test(self.pav_cfg, test)

        state = test.status.current().state
        end = time.time() + 1
        while ('ERROR' not in state and 'FAIL' not in state
               and state != STATES.COMPLETE and time.time() < end):
            time.sleep(.1)

        # test `pav log run test`
        args = parser.parse_args(['run', str(test.id)])
        self.assertEqual(args.test, test.id)

        out = io.StringIO()
        err = io.StringIO()

        log_cmd.outfile = out
        log_cmd.errfile = err

        result = log_cmd.run(self.pav_cfg, args)
        err.seek(0)
        out.seek(0)
        self.assertEqual(err.read(), '')
        self.assertEqual(out.read(), 'Hello World.\n')
        self.assertEqual(result, 0)

        # test `pav log build test`
        # note: echo-ing hello world should not require anything to be built
        out.truncate(0)
        err.truncate(0)
        args = parser.parse_args(['build', str(test.id)])
        log_cmd.run(self.pav_cfg, args)
        out.seek(0)
        err.seek(0)
        self.assertEqual(out.read(), '')

        # test `pav log kickoff test`
        # note: in general, kickoff.log should be an empty file
        out.truncate(0)
        err.truncate(0)
        args = parser.parse_args(['kickoff', str(test.id)])
        result = log_cmd.run(self.pav_cfg, args)
        out.seek(0)
        err.seek(0)
        self.assertEqual(out.read(), '')
        self.assertEqual(err.read(), '')
        self.assertEqual(result, 0)

        log_cmd.outfile = sys.stdout
        log_cmd.outfile = sys.stderr
Exemplo n.º 9
0
    def _cancel_all(tests_by_sched):
        """Cancel each of the given tests using the appropriate scheduler."""
        for sched_name, tests in tests_by_sched.items():

            sched = schedulers.get_plugin(sched_name)

            for test in tests:
                sched.cancel_job(test)
Exemplo n.º 10
0
    def cancel_series(self):
        """Goes through all test objects assigned to series and cancels tests
        that haven't been completed. """

        for test_id, test_obj in self.tests.items():
            if not (test_obj.path/'RUN_COMPLETE').exists():
                sched = schedulers.get_plugin(test_obj.scheduler)
                sched.cancel_job(test_obj)
                test_obj.status.set(STATES.COMPLETE, "Killed by SIGTERM.")
                test_obj.set_run_complete()
Exemplo n.º 11
0
    def test_sched_vars(self):
        """Make sure all the slurm scheduler variable methods work when
        not on a node."""

        raw = schedulers.get_plugin('raw')

        svars = raw.get_vars(self._quick_test())

        for key, value in svars.items():
            self.assertNotEqual(int(value), 0)
Exemplo n.º 12
0
    def test_sched_vars(self):
        """Make sure the scheduler variable class works as expected."""

        test = self._quick_test()

        raw_sched = schedulers.get_plugin('raw')

        vars = raw_sched.get_vars(test.config['raw'])

        for key in vars.keys():
            _ = vars[key]
Exemplo n.º 13
0
    def _get_sched(test):
        """Get the scheduler for the given test.
        :param TestRun test: The test.
        """

        try:
            return schedulers.get_plugin(test.scheduler)
        except Exception:
            test.status.set(
                STATES.BUILD_ERROR,
                "Unknown error getting the scheduler. Refer to "
                "the kickoff log.")
            raise
Exemplo n.º 14
0
    def test_kickoff_env(self):

        pav_cfg = self.pav_cfg
        pav_cfg['env_setup'] = ['test1', 'test2', 'test3']

        config = {'name': 'sched-vars', 'scheduler': 'dummy'}
        test = self._quick_test(config)

        dummy_sched = schedulers.get_plugin('dummy')
        path = dummy_sched._create_kickoff_script(pav_cfg, test)
        with path.open() as file:
            lines = file.readlines()
        for i in range(0, len(lines)):
            lines[i] = lines[i].strip()
        testlist = pav_cfg['env_setup']
        self.assertTrue(set(testlist).issubset(lines))
        self.assertTrue(re.match(r'pav _run.*', lines[-1]))
Exemplo n.º 15
0
    def test_schedule_test(self):
        """Try to schedule a test."""

        slurm = schedulers.get_plugin('slurm')
        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'
        test = self._quick_test(cfg=cfg, name='slurm_test')

        slurm.schedule_test(self.pav_cfg, test)

        status = slurm.job_status(self.pav_cfg, test)

        self.assertEqual(status.state, STATES.SCHEDULED)

        status = slurm.cancel_job(test)

        self.assertEqual(status.state, STATES.SCHED_CANCELLED)
Exemplo n.º 16
0
    def test_schedule_test(self):
        """Make sure the scheduler can run a test."""

        raw = schedulers.get_plugin('raw')

        test = self._quick_test(build=False, finalize=False)

        self.assertTrue(test.build(), msg=test)

        raw.schedule_tests(self.pav_cfg, [test])

        try:
            test.wait(2)
        except Exception:
            self.fail()

        self.assertEqual(test.status.current().state, STATES.COMPLETE)
Exemplo n.º 17
0
    def test_sched_vars(self):
        """Make sure the scheduler vars are reasonable when not on a node."""

        slurm = schedulers.get_plugin('slurm')

        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'
        test = self._quick_test(cfg, name='slurm_vars', finalize=False)

        sched_conf = test.config['slurm']

        # Check all the variables to make sure they work outside an allocation,
        # or at least return a DeferredVariable
        var_list = list()
        for k, v in slurm.get_vars(sched_conf).items():
            # Make sure everything has a value of some sort.
            self.assertNotIn(v, ['None', ''])
            var_list.append(k)

        # Now check all the vars for real, when a test is running.
        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'
        # Ask for each var in our test comands.
        cfg['run']['cmds'] = [
            'echo "{var}={{{{sched.{var}}}}}"'.format(var=var)
            for var in var_list
        ]
        sched_vars = slurm.get_vars(sched_conf)
        test = self._quick_test(cfg,
                                name='slurm_vars2',
                                finalize=False,
                                sched_vars=sched_vars)

        slurm.schedule_test(self.pav_cfg, test)

        timeout = time.time() + self.TEST_TIMEOUT
        state = test.status.current()
        while time.time() < timeout:
            state = test.status.current()
            if state.state == STATES.COMPLETE:
                return 0
        else:
            self.fail("Test never completed. Has state: {}".format(state))
Exemplo n.º 18
0
    def test_check_job(self):
        """Make sure we can get the test's scheduler status."""

        cfg = self._quick_test_cfg()
        cfg['run']['cmds'] = ['sleep 2']
        test = self._quick_test(cfg=cfg)

        test.status.set('SCHEDULED', 'but not really')

        with Path('/proc/sys/kernel/pid_max').open() as pid_max_file:
            max_pid = int(pid_max_file.read())

        hostname = socket.gethostname()

        raw = schedulers.get_plugin('raw')

        # Make a test from another host.
        test.job_id = 'garbledhostnameasldfkjasd_{}'.format(os.getpid())
        status = raw.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.SCHEDULED)

        # Make a test with a non-existent pid.
        test.job_id = '{}_{}'.format(hostname, max_pid + 1)
        status = raw.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.SCHED_ERROR)

        # Check the 'race condition' case of check_job
        test.status.set(STATES.COMPLETE, 'not really this either.')
        status = raw.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.COMPLETE)
        test.status.set(STATES.SCHEDULED, "reseting.")

        # Make a test with a re-used pid.
        test.job_id = '{}_{}'.format(hostname, os.getpid())
        status = raw.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.SCHED_ERROR)

        raw.schedule_test(self.pav_cfg, test)
        status = raw.job_status(self.pav_cfg, test)
        self.assertEqual(status.state, STATES.SCHEDULED)
Exemplo n.º 19
0
    def build_variable_manager(self, raw_test_cfg):
        """Get all of the different kinds of Pavilion variables into a single
        variable set manager for this test.

        :param raw_test_cfg: A raw test configuration. It should be from before
            any variables are resolved.
        :rtype: variables.VariableSetManager
        """

        user_vars = raw_test_cfg.get('variables', {})
        var_man = copy.deepcopy(self.base_var_man)

        # Since per vars are the highest in resolution order, we can make things
        # a bit faster by adding these after we find the used per vars.
        try:
            var_man.add_var_set('var', user_vars)
        except variables.VariableError as err:
            raise TestConfigError("Error in variables section: {}".format(err))

        scheduler = raw_test_cfg.get('scheduler', '<undefined>')
        try:
            sched = schedulers.get_plugin(scheduler)
        except schedulers.SchedulerPluginError:
            raise TestConfigError(
                "Could not find scheduler '{}'"
                .format(scheduler))

        try:
            sched_vars = sched.get_vars(raw_test_cfg.get(scheduler, {}))
            var_man.add_var_set('sched', sched_vars)
        except schedulers.SchedulerPluginError as err:
            raise TestConfigError(
                "Could not get variables for scheduler {}: {}"
                .format(scheduler, err)
            )
        except variables.VariableError as err:
            raise TestConfigError("Error in scheduler variables: {}"
                                  .format(err))

        return var_man
Exemplo n.º 20
0
    def test_log_tail(self):
        log_cmd = commands.get_command('log')

        parser = argparse.ArgumentParser()
        log_cmd._setup_arguments(parser)

        out = io.StringIO()
        err = io.StringIO()

        log_cmd.outfile = out
        log_cmd.errfile = err

        # test 'pav log --tail X run test'
        test_cfg = self._quick_test_cfg()
        test_cfg['run']['cmds'] = [
            'echo "this"', 'echo "is"', 'echo "some"', 'echo "crazy"',
            'echo "long"', 'echo "output"'
        ]
        test = self._quick_test(cfg=test_cfg)

        raw = schedulers.get_plugin('raw')
        raw.schedule_test(self.pav_cfg, test)

        end = time.time() + 5
        while test.check_run_complete() is None and time.time() < end:
            time.sleep(.1)

        args = parser.parse_args(['--tail', '2', 'run', str(test.id)])
        out.truncate(0)
        err.truncate(0)
        result = log_cmd.run(self.pav_cfg, args)
        self.assertEqual(result, 0)
        out.seek(0)
        err.seek(0)
        self.assertEqual(err.read(), '')
        self.assertEqual(out.read(), 'long\noutput\n')

        log_cmd.outfile = sys.stdout
        log_cmd.outfile = sys.stderr
Exemplo n.º 21
0
    def test_node_list_parsing(self):
        """Make sure the node list regex matches what it's supposed to."""

        slurm = schedulers.get_plugin('slurm')  # type: Slurm

        examples = (
            (None, []),
            ('', []),
            ('ab03', ['ab03']),
            ('ab-bc[3-004]', ['ab-bc3', 'ab-bc4']),
            ('ab_bc[03-10]',
             ['ab_bc{:02d}'.format(d) for d in range(3, 11)]),
            ('n[003-143]', ['n{:03d}'.format(d) for d in range(3, 144)]),
            # Duplicates are accepted
            ('nid03,nid[03-04]', ['nid03', 'nid03', 'nid04']),
            ('nid03,nid[04-06],nid[12-33]',
             ['nid03', 'nid04', 'nid05', 'nid06'] +
             ['nid{:02d}'.format(d) for d in range(12, 34)]),
        )

        for ex, answer in examples:
            nodes = slurm.parse_node_list(ex)
            self.assertEqual(nodes, answer)

        bad_examples = (
            ('n03d',  "Trailing characters"),
            ('nid03!@#', "Trailing junk (whole string match)."),
            ('n03.n04', "Not comma separated"),
            ('n[03', "No closing bracket"),
            ('n03]', "No open bracket"),
            ('nid[12-03]', "Out of order range"),
        )

        for ex, problem in bad_examples:
            with self.assertRaises(
                    ValueError,
                    msg="Did not throw error for {}".format(problem)):
                slurm.parse_node_list(ex)
Exemplo n.º 22
0
    def test_include_exclude(self):
        """Test that we can schedule tests that require or exclude nodes."""

        slurm = schedulers.get_plugin('slurm')

        dummy_test = self._quick_test(build=False, finalize=False)
        svars = slurm.get_vars(dummy_test.config['slurm'])
        up_nodes = svars['node_up_list'].split()

        cfg = self._quick_test_cfg()
        cfg['scheduler'] = 'slurm'
        cfg['slurm']['num_nodes'] = '2'
        cfg['slurm']['include_nodes'] = up_nodes[1]
        cfg['slurm']['exclude_nodes'] = up_nodes[2]

        test = self._quick_test(cfg, finalize=False)

        # We mainly care if this step completes successfully.
        slurm.schedule_test(self.pav_cfg, test)
        try:
            test.wait(timeout=5)
        except TimeoutError:
            slurm.cancel_job(test)
Exemplo n.º 23
0
def status_from_test_obj(pav_cfg: dict, test: TestRun):
    """Takes a test object or list of test objects and creates the dictionary
    expected by the print_status function.

:param pav_cfg: Pavilion base configuration.
:param test: Pavilion test object.
:return: List of dictionary objects containing the test ID, name,
         stat time of state update, and note associated with that state.
:rtype: list(dict)
    """

    status_f = test.status.current()

    if status_f.state == STATES.SCHEDULED:
        sched = schedulers.get_plugin(test.scheduler)
        status_f = sched.job_status(pav_cfg, test)
    elif status_f.state == STATES.BUILDING:
        last_update = test.builder.log_updated()
        status_f.note = ' '.join([
            status_f.note, '\nLast updated: ',
            str(last_update) if last_update is not None else '<unknown>'
        ])
    elif status_f.state == STATES.RUNNING:
        last_update = get_last_ctime(test.path / 'run.log')
        status_f.note = ' '.join([
            status_f.note, '\nLast updated:',
            str(last_update) if last_update is not None else '<unknown>'
        ])

    return {
        'test_id': test.id,
        'name': test.name,
        'state': status_f.state,
        'time': status_f.when,
        'note': status_f.note,
    }
Exemplo n.º 24
0
    def _get_tests(self, pav_cfg, host, test_files, tests, modes, overrides,
                   sys_vars):
        """Translate a general set of pavilion test configs into the final,
        resolved configurations. These objects will be organized in a
        dictionary by scheduler, and have a scheduler object instantiated and
        attached.
        :param pav_cfg: The pavilion config
        :param str host: The host config to target these tests with
        :param list(str) modes: The mode configs to use.
        :param list(Path) test_files: Files containing a newline separated
            list of tests.
        :param list(str) tests: The tests to run.
        :param list(str) overrides: Overrides to apply to the configurations.
        :param system_variables.SysVarDict sys_vars: The system variables dict.
        :returns: A dictionary (by scheduler type name) of lists of tuples
            test configs and their variable managers.
        """
        self.logger.debug("Finding Configs")

        # Use the sys_host if a host isn't specified.
        if host is None:
            host = sys_vars.get('sys_name')

        tests = list(tests)
        for file in test_files:
            try:
                with pathlib.PosixPath(file).open() as test_file:
                    for line in test_file.readlines():
                        line = line.strip()
                        if line and not line.startswith('#'):
                            tests.append(line)
            except (OSError, IOError) as err:
                msg = "Could not read test file {}: {}".format(file, err)
                self.logger.error(msg)
                raise commands.CommandError(msg)

        try:
            raw_tests = test_config.load_test_configs(pav_cfg, host, modes,
                                                      tests)
        except test_config.TestConfigError as err:
            self.logger.error(str(err))
            raise commands.CommandError(str(err))

        raw_tests_by_sched = defaultdict(lambda: [])
        tests_by_scheduler = defaultdict(lambda: [])

        # Apply config overrides.
        for test_cfg in raw_tests:
            # Apply the overrides to each of the config values.
            try:
                test_config.apply_overrides(test_cfg, overrides)
            except test_config.TestConfigError as err:
                msg = 'Error applying overrides to test {} from {}: {}' \
                    .format(test_cfg['name'], test_cfg['suite_path'], err)
                self.logger.error(msg)
                raise commands.CommandError(msg)

            # Resolve all configuration permutations.
            try:
                p_cfg, permutes = test_config.resolve_permutations(
                    test_cfg, pav_cfg.pav_vars, sys_vars)
                for p_var_man in permutes:
                    # Get the scheduler from the config.
                    sched = p_cfg['scheduler']
                    sched = test_config.resolve_section_vars(
                        component=sched,
                        var_man=p_var_man,
                        allow_deferred=False,
                        deferred_only=False,
                    )
                    raw_tests_by_sched[sched].append((p_cfg, p_var_man))
            except test_config.TestConfigError as err:
                msg = 'Error resolving permutations for test {} from {}: {}' \
                    .format(test_cfg['name'], test_cfg['suite_path'], err)
                self.logger.error(msg)
                raise commands.CommandError(msg)

        # Get the schedulers for the tests, and the scheduler variables.
        # The scheduler variables are based on all of the
        for sched_name in raw_tests_by_sched.keys():
            try:
                sched = schedulers.get_plugin(sched_name)
            except KeyError:
                msg = "Could not find scheduler '{}'.".format(sched_name)
                self.logger.error(msg)
                raise commands.CommandError(msg)

            nondeferred_cfg_sctns = schedulers.list_plugins()

            # Builds must have the values of all their variables now.
            nondeferred_cfg_sctns.append('build')

            # Set the scheduler variables for each test.
            for test_cfg, test_var_man in raw_tests_by_sched[sched_name]:

                sched_config = test_config.resolve_section_vars(
                    component=test_cfg[sched_name],
                    var_man=test_var_man,
                    allow_deferred=False,
                    deferred_only=False,
                )

                test_var_man.add_var_set('sched', sched.get_vars(sched_config))

                # Resolve all variables for the test (that aren't deferred).
                try:
                    resolved_config = test_config.resolve_config(
                        test_cfg,
                        test_var_man,
                        no_deferred_allowed=nondeferred_cfg_sctns)

                except (ResolveError, KeyError) as err:
                    msg = "Error resolving variables in config at '{}': {}" \
                        .format(test_cfg['suite_path'].resolve(test_var_man),
                                err)
                    self.logger.error(msg)
                    raise commands.CommandError(msg)

                tests_by_scheduler[sched.name].append(
                    (resolved_config, test_var_man))
        return tests_by_scheduler
Exemplo n.º 25
0
    def run(self, pav_cfg, args):
        """Resolve the test configurations into individual tests and assign to
        schedulers. Have those schedulers kick off jobs to run the individual
        tests themselves.
        :param pav_cfg: The pavilion configuration.
        :param args: The parsed command line argument object.
        """

        # 1. Resolve the test configs
        #   - Get sched vars from scheduler.
        #   - Compile variables.
        #

        overrides = {}
        for ovr in args.overrides:
            if '=' not in ovr:
                fprint(
                    "Invalid override value. Must be in the form: "
                    "<key>=<value>. Ex. -c run.modules=['gcc'] ",
                    file=self.errfile)
                return errno.EINVAL

            key, value = ovr.split('=', 1)
            overrides[key] = value

        sys_vars = system_variables.get_vars(True)

        try:
            configs_by_sched = self._get_tests(
                pav_cfg=pav_cfg,
                host=args.host,
                test_files=args.files,
                tests=args.tests,
                modes=args.modes,
                overrides=overrides,
                sys_vars=sys_vars,
            )

            tests_by_sched = self._configs_to_tests(
                pav_cfg=pav_cfg,
                configs_by_sched=configs_by_sched,
            )

        except commands.CommandError as err:
            # Our error messages get escaped to a silly degree
            err = codecs.decode(str(err), 'unicode-escape')
            fprint(err, file=self.errfile)
            return errno.EINVAL

        all_tests = sum(tests_by_sched.values(), [])

        if not all_tests:
            fprint("You must specify at least one test.", file=self.errfile)
            return errno.EINVAL

        series = TestSeries(pav_cfg, all_tests)

        rp_errors = []
        for test in all_tests:

            # Make sure the result parsers have reasonable arguments.
            try:
                result_parsers.check_args(test.config['results'])
            except TestRunError as err:
                rp_errors.append(str(err))

        if rp_errors:
            fprint("Result Parser configurations had errors:",
                   file=self.errfile,
                   color=output.RED)
            for msg in rp_errors:
                fprint(msg, bullet=' - ', file=self.errfile)
            return errno.EINVAL

        failed_build = None
        # Building any tests that specify that they should be built before
        for test in all_tests:
            if test.config['build']['on_nodes'] not in ['true', 'True']:
                if not test.build():
                    fprint("Error building test: ",
                           file=self.errfile,
                           color=output.RED)
                    fprint("status {status.state} - {status.note}".format(
                        status=test.status.current()),
                           file=self.errfile)
                    fprint(
                        "For more information, run 'pav log build {}'".format(
                            test.id),
                        file=self.errfile)
                    failed_build = test
                    break

        if failed_build is not None:
            for test in all_tests:
                if test is not failed_build:
                    test.status.set(
                        STATES.ABORTED,
                        "Canceled due to problems with other tests in run")
            return errno.EINVAL

        for sched_name, tests in tests_by_sched.items():
            sched = schedulers.get_plugin(sched_name)

            try:
                sched.schedule_tests(pav_cfg, tests)
            except schedulers.SchedulerPluginError as err:
                fprint('Error scheduling tests:',
                       file=self.errfile,
                       color=output.RED)
                fprint(err, bullet='  ', file=self.errfile)
                fprint('Cancelling already kicked off tests.',
                       file=self.errfile)
                self._cancel_all(tests_by_sched)

        # Tests should all be scheduled now, and have the SCHEDULED state
        # (at some point, at least). Wait until something isn't scheduled
        # anymore (either running or dead), or our timeout expires.
        wait_result = None
        if args.wait is not None:
            end_time = time.time() + args.wait
            while time.time() < end_time and wait_result is None:
                last_time = time.time()
                for sched_name, tests in tests_by_sched.items():
                    sched = schedulers.get_plugin(sched_name)
                    for test in tests:
                        status = test.status.current()
                        if status == STATES.SCHEDULED:
                            status = sched.job_status(pav_cfg, test)

                        if status != STATES.SCHEDULED:
                            # The test has moved past the scheduled state.
                            wait_result = None
                            break

                        break

                if wait_result is None:
                    # Sleep at most SLEEP INTERVAL seconds, minus the time
                    # we spent checking our jobs.
                    time.sleep(self.SLEEP_INTERVAL - (time.time() - last_time))

        fprint("{} test{} started as test series {}.".format(
            len(all_tests), 's' if len(all_tests) > 1 else '', series.id),
               file=self.outfile,
               color=output.GREEN)

        if args.status:
            tests = list(series.tests.keys())
            tests, _ = test_obj_from_id(pav_cfg, tests)
            return print_from_test_obj(pav_cfg, tests, self.outfile, args.json)

        return 0
Exemplo n.º 26
0
    def _scheduler_cmd(self, _, args):
        """
        :param argparse.Namespace args:
        """

        sched = None  # type : schedulers.SchedulerPlugin
        sched_name = None
        if args.vars is not None or args.config is not None:
            sched_name = args.vars if args.vars is not None else args.config

            try:
                sched = schedulers.get_plugin(sched_name)
            except schedulers.SchedulerPluginError:
                output.fprint(
                    "Invalid scheduler plugin '{}'.".format(sched_name),
                    color=output.RED,
                )
                return errno.EINVAL

        if args.vars is not None:
            sched_vars = []

            empty_config = file_format.TestConfigLoader().load_empty()

            svars = sched.get_vars(empty_config[sched_name])

            for key in sorted(list(svars.keys())):
                sched_vars.append(svars.info(key))

            output.draw_table(
                self.outfile,
                fields=['name', 'deferred', 'example', 'help'],
                rows=sched_vars,
                title="Variables for the {} scheduler plugin.".format(
                    args.vars))

        elif args.config is not None:

            sched_config = sched.get_conf()

            class Loader(yaml_config.YamlConfigLoader):
                """Loader for just a scheduler's config."""
                ELEMENTS = [sched_config]

            defaults = Loader().load_empty()

            Loader().dump(self.outfile, values=defaults)

        else:
            # Assuming --list was given

            scheds = []
            for sched_name in schedulers.list_plugins():
                sched = schedulers.get_plugin(sched_name)

                scheds.append({
                    'name': sched_name,
                    'description': sched.description,
                    'path': sched.path
                })

            fields = ['name', 'description']

            if args.verbose:
                fields.append('path')

            output.draw_table(self.outfile,
                              fields=fields,
                              rows=scheds,
                              title="Available Scheduler Plugins")
Exemplo n.º 27
0
    def run(self, pav_cfg, args):
        """Cancel the given tests."""

        user_id = os.geteuid()  # gets unique user id

        if not args.tests:
            # user wants to cancel all current tests
            if args.all:
                tests_dir = pav_cfg.working_dir / 'test_runs'
                # iterate through all the tests in the tests directory
                for test in tests_dir.iterdir():
                    test_owner_id = test.stat().st_uid
                    if test_owner_id == user_id:
                        if not (test / 'RUN_COMPLETE').exists():
                            test_id = test.name
                            args.tests.append(test_id)
            else:
                # Get the last series ran by this user.
                series_id = series.TestSeries.load_user_series_id(pav_cfg)
                if series_id is not None:
                    args.tests.append(series_id)

        test_list = []
        for test_id in args.tests:
            if test_id.startswith('s'):
                try:
                    test_list.extend(
                        series.TestSeries.from_id(pav_cfg, test_id).tests)
                except series.TestSeriesError as err:
                    output.fprint("Series {} could not be found.\n{}".format(
                        test_id, err),
                                  file=self.errfile,
                                  color=output.RED)
                    return errno.EINVAL
                except ValueError as err:
                    output.fprint(
                        "Series {} is not a valid series.\n{}".format(
                            test_id, err),
                        color=output.RED,
                        file=self.errfile)
                    return errno.EINVAL
            else:
                try:
                    test_list.append(int(test_id))
                except ValueError as err:
                    output.fprint("Test {} is not a valid test.\n{}".format(
                        test_id, err),
                                  file=self.errfile,
                                  color=output.RED)
                    return errno.EINVAL

        cancel_failed = False
        test_object_list = []
        for test_id in test_list:
            try:
                test = TestRun.load(pav_cfg, test_id)
                sched = schedulers.get_plugin(test.scheduler)
                test_object_list.append(test)

                status = test.status.current()
                # Won't try to cancel a completed job or a job that was
                # previously cancelled.
                if status.state not in (STATES.COMPLETE,
                                        STATES.SCHED_CANCELLED):
                    # Sets status based on the result of sched.cancel_job.
                    # Ran into trouble when 'cancelling' jobs that never
                    # actually started, ie. build errors/created job states.
                    cancel_status = sched.cancel_job(test)
                    test.status.set(cancel_status.state, cancel_status.note)
                    test.set_run_complete()
                    output.fprint("Test {} cancelled.".format(test_id),
                                  file=self.outfile,
                                  color=output.GREEN)

                else:
                    output.fprint(
                        "Test {} could not be cancelled has state: {}.".format(
                            test_id, status.state),
                        file=self.outfile,
                        color=output.RED)

            except TestRunError as err:
                output.fprint(
                    "Test {} could not be cancelled, cannot be found. \n{}".
                    format(test_id, err),
                    file=self.errfile,
                    color=output.RED)
                return errno.EINVAL

        # Only prints statuses of tests if option is selected
        # and test_list is not empty
        if args.status and test_object_list:
            print_from_test_obj(pav_cfg, test_object_list, self.outfile,
                                args.json)
            return cancel_failed

        return cancel_failed
Exemplo n.º 28
0
    def _run(self, pav_cfg, test):
        """Run an already prepped test in the current environment.
        """

        try:
            sched = schedulers.get_plugin(test.scheduler)
        except Exception:
            test.status.set(STATES.BUILD_ERROR,
                            "Unknown error getting the scheduler. Refer to "
                            "the kickoff log.")
            raise

        # Re-add var sets that may have had deferred variables.
        try:
            var_man = VariableSetManager()
            var_man.add_var_set('sys', system_variables.get_vars(defer=False))
            sched_config = test.config[test.scheduler]
            var_man.add_var_set('sched', sched.get_vars(sched_config))
        except Exception:
            test.status.set(STATES.RUN_ERROR,
                            "Unknown error getting pavilion variables at "
                            "run time.")
            raise

        try:
            test.finalize(var_man)
        except Exception:
            test.status.set(STATES.RUN_ERROR,
                            "Unknown error finalizing test.")
            raise

        try:
            if test.config['build']['on_nodes'] in ['true', 'True']:
                if not test.build():
                    self.logger.warning(
                        "Test {t.id} failed to build:"
                    )
        except Exception:
            test.status.set(STATES.BUILD_ERROR,
                            "Unknown build error. Refer to the kickoff log.")
            raise

        # Optionally wait on other tests running under the same scheduler.
        # This depends on the scheduler and the test configuration.
        lock = sched.lock_concurrency(pav_cfg, test)

        try:
            run_result = test.run()
        except TestRunError as err:
            test.status.set(STATES.RUN_ERROR, err)
            return 1
        except TimeoutError:
            return 1
        except Exception:
            test.status.set(
                STATES.RUN_ERROR,
                "Unknown error while running test. Refer to the kickoff log.")
            raise
        finally:
            sched.unlock_concurrency(lock)

        try:
            rp_errors = []
            # Make sure the result parsers have reasonable arguments.
            # We check here because the parser code itself will likely assume
            # the args are valid form _check_args, but those might not be
            # checkable before kickoff due to deferred variables.
            try:
                result_parsers.check_args(test.config['results'])
            except TestRunError as err:
                rp_errors.append(str(err))

            if rp_errors:
                for msg in rp_errors:
                    test.status.set(STATES.RESULTS_ERROR, msg)
                test.set_run_complete()
                return 1

            results = test.gather_results(run_result)
        except result_parsers.ResultParserError as err:
            self.logger.error("Unexpected error gathering results: %s", err)
            test.status.set(STATES.RESULTS_ERROR,
                            "Error parsing results: {}".format(err))
            return 1

        try:
            test.save_results(results)

            result_logger = logging.getLogger('results')
            result_logger.info(output.json_dumps(results))
        except Exception:
            test.status.set(
                STATES.RESULTS_ERROR,
                "Unknown error while saving results. Refer to the kickoff log.")
            raise

        try:
            test.status.set(STATES.COMPLETE,
                            "The test completed with result: {}"
                            .format(results.get('result', '<unknown>')))
        except Exception:
            test.status.set(
                STATES.UNKNOWN,
                "Unknown error while setting test completion. Refer to the "
                "kickoff log.")
            raise
Exemplo n.º 29
0
    def run_tests(self, pav_cfg, tests_by_sched, series, wait, report_status):
        """
        :param pav_cfg:
        :param dict[str,[TestRun]] tests_by_sched: A dict by scheduler name
            of the tests (in a list).
        :param series: The test series.
        :param int wait: Wait this long for a test to start before exiting.
        :param bool report_status: Do a 'pav status' after tests have started.
            on nodes, and kick them off in build only mode.
        :return:
        """

        all_tests = sum(tests_by_sched.values(), [])

        for sched_name in tests_by_sched.keys():
            sched = schedulers.get_plugin(sched_name)

            if not sched.available():
                fprint("{} tests started with the {} scheduler, but "
                       "that scheduler isn't available on this system."
                       .format(len(tests_by_sched[sched_name]), sched_name),
                       file=self.errfile, color=output.RED)
                return errno.EINVAL

        for sched_name, tests in tests_by_sched.items():
            tests = [test for test in tests if not test.skipped]
            sched = schedulers.get_plugin(sched_name)

            # Filter out any 'build_only' tests (it should be all or none)
            # that shouldn't be scheduled.
            tests = [test for test in tests if
                     # The non-build only tests
                     (not test.build_only) or
                     # The build only tests that are built on nodes
                     (not test.build_local and
                      # As long they need to be built.
                      (test.rebuild or not test.builder.exists()))]

            # Skip this scheduler if it doesn't have tests that need to run.
            if not tests:
                continue

            try:
                sched.schedule_tests(pav_cfg, tests)
            except schedulers.SchedulerPluginError as err:
                fprint('Error scheduling tests:', file=self.errfile,
                       color=output.RED)
                fprint(err, bullet='  ', file=self.errfile)
                fprint('Cancelling already kicked off tests.',
                       file=self.errfile)
                self._cancel_all(tests_by_sched)
                # return so the rest of the tests don't actually run
                return errno.EINVAL

        # Tests should all be scheduled now, and have the SCHEDULED state
        # (at some point, at least). Wait until something isn't scheduled
        # anymore (either running or dead), or our timeout expires.
        wait_result = None
        if wait is not None:
            end_time = time.time() + wait
            while time.time() < end_time and wait_result is None:
                last_time = time.time()
                for sched_name, tests in tests_by_sched.items():
                    sched = schedulers.get_plugin(sched_name)
                    for test in tests:
                        status = test.status.current()
                        if status == STATES.SCHEDULED:
                            status = sched.job_status(pav_cfg, test)

                        if status != STATES.SCHEDULED:
                            # The test has moved past the scheduled state.
                            wait_result = None
                            break

                        break

                if wait_result is None:
                    # Sleep at most SLEEP INTERVAL seconds, minus the time
                    # we spent checking our jobs.
                    time.sleep(self.SLEEP_INTERVAL - (time.time() - last_time))

        fprint("{} test{} started as test series {}."
               .format(len(all_tests),
                       's' if len(all_tests) > 1 else '',
                       series.id),
               file=self.outfile,
               color=output.GREEN)

        if report_status:
            tests = list(series.tests.keys())
            tests, _ = test_obj_from_id(pav_cfg, tests)
            return print_from_test_obj(
                pav_cfg=pav_cfg,
                test_obj=tests,
                outfile=self.outfile,
                json=False)

        return 0
Exemplo n.º 30
0
    def run_tests(self, wait: Union[None, int] = None,
                  tests: List[TestRun] = None) -> int:
        """Run the tests for this test series.

    :param int wait: Wait this long for a test to start before exiting.
    :param tests: Manually specified list of tests to run. Defaults to
        the series' test list.
    :return: A return code based on the success of this action.
    """

        if tests is None:
            tests = list(self.tests.values())

        all_tests = tests

        for test in tests:
            sched_name = test.scheduler
            sched = schedulers.get_plugin(sched_name)

            if not sched.available():
                fprint("1 test started with the {} scheduler, but"
                       "that scheduler isn't available on this system."
                       .format(sched_name),
                       file=self.errfile, color=output.RED)
                return errno.EINVAL

        for test in tests:

            # don't run this test if it was meant to be skipped
            if test.skipped:
                continue

            # tests that are build-only or build-local should
            # already be completed, therefore don't run these

            if test.complete:
                continue

            sched = schedulers.get_plugin(test.scheduler)
            try:
                sched.schedule_tests(self.pav_cfg, [test])
            except schedulers.SchedulerPluginError as err:
                fprint('Error scheduling test: ', file=self.errfile,
                       color=output.RED)
                fprint(err, bullet='  ', file=self.errfile)
                fprint('Cancelling already kicked off tests.',
                       file=self.errfile)
                sched.cancel_job(test)
                return errno.EINVAL

        # Tests should all be scheduled now, and have the SCHEDULED state
        # (at some point, at least). Wait until something isn't scheduled
        # anymore (either running or dead), or our timeout expires.
        wait_result = None
        if wait is not None:
            end_time = time.time() + wait
            while time.time() < end_time and wait_result is None:
                last_time = time.time()
                for test in tests:
                    sched = schedulers.get_plugin(test.scheduler)
                    status = test.status.current()
                    if status == STATES.SCHEDULED:
                        status = sched.job_status(self.pav_cfg, test)

                    if status != STATES.SCHEDULED:
                        # The test has moved past the scheduled state
                        wait_result = None
                        break

                if wait_result is None:
                    # Sleep at most SLEEP INTERVAL seconds, minus the time
                    # we spent checking our jobs.
                    time.sleep(self.WAIT_INTERVAL - (time.time() - last_time))

        fprint("{} test{} started as test series {}."
               .format(len(all_tests),
                       's' if len(all_tests) > 1 else '', self.sid),
               file=self.outfile,
               color=output.GREEN)

        return 0