Exemplo n.º 1
0
    def __checkServiceDir(self, wd):
        wdir = find_single_aux_dir(wd)
        if exists(wdir) and exists(join(wdir, 'address')):
            with open(join(wdir, 'address'), 'r') as f:
                return f.read()

        return None
Exemplo n.º 2
0
def test_resume_tracker_files(tmpdir):
    try:
        m = LocalManager(['--log', 'debug', '--wd', tmpdir, '--report-format', 'json', '--nodes', '4'], {'wdir': str(tmpdir)})

        job_req = {
                   'name': 'host',
                   'execution': {
                       'exec': '/bin/date',
                       'stdout': 'out',
                   },
                   'resources': { 'numCores': { 'exact': 1 } }
                   }
        jobs = Jobs().add_std(job_req)
        submit_2_manager_and_wait_4_info(m, jobs, 'SUCCEED')

        time.sleep(1)
        aux_dir = find_single_aux_dir(str(tmpdir))

        print(f'aux_dir content: {str(listdir(aux_dir))}')

        assert all(exists(join(aux_dir, fname)) for fname in ['track.reqs', 'track.states']), \
            f"missing tracker files in {aux_dir}: {str(listdir(aux_dir))}"

    finally:
        if m:
            m.finish()
            m.cleanup()

    rmtree(tmpdir)
Exemplo n.º 3
0
def check_service_log_string(resubstr, workdir='.'):
    service_log_file = join(find_single_aux_dir(workdir), 'service.log')

    regex = re.compile(resubstr)

    with open(service_log_file, 'r') as log_f:
        for line in log_f:
            if regex.search(line):
                return True

    return False
Exemplo n.º 4
0
def test_local_manager_submit_simple(tmpdir):
    cores = 4

    # switch on debugging (by default in api.log file)
    m = LocalManager(['--wd', str(tmpdir), '--nodes',
                      str(cores)], {'wdir': str(tmpdir)})

    try:
        res = m.resources()

        assert all(
            ('total_nodes' in res, 'total_cores'
             in res, res['total_nodes'] == 1, res['total_cores'] == cores))

        ids = m.submit(Jobs().add(name='host',
                                  exec='/bin/hostname',
                                  args=['--fqdn'],
                                  stdout='host.stdout').add(
                                      name='date',
                                      exec='/bin/date',
                                      stdout='date.stdout',
                                      numCores={'exact': 2}))

        assert len(m.list()) == 2

        m.wait4(ids)

        jinfos = m.info(ids)

        assert all(
            ('jobs' in jinfos, len(jinfos['jobs'].keys()) == 2, 'host'
             in jinfos['jobs'], 'date' in jinfos['jobs'],
             jinfos['jobs']['host'].get('data', {}).get('status',
                                                        '') == 'SUCCEED',
             jinfos['jobs']['date'].get('data', {}).get('status',
                                                        '') == 'SUCCEED'))

        aux_dir = find_single_aux_dir(str(tmpdir))

        assert all(
            (exists(tmpdir.join('.qcgpjm-client', 'api.log')),
             exists(join(aux_dir,
                         'service.log')), exists(tmpdir.join('host.stdout')),
             exists(tmpdir.join('date.stdout'))))
    finally:
        m.finish()
        #        m.stopManager()
        m.cleanup()
Exemplo n.º 5
0
def check_job_status_in_json(jobs, workdir='.', dest_state='SUCCEED'):
    to_check = set(jobs)

    entries = {}

    report_path = join(find_single_aux_dir(workdir), 'jobs.report')
    with open(report_path, 'r') as report_f:
        for line, entry in enumerate(report_f, 1):
            try:
                job_entry = json.loads(entry)
            except JSONDecodeError as e:
                raise ValueError(
                    'wrong jobs.report {} file format: error in {} line: {}'.
                    format(report_path, line, str(e)))

            for attr in ['name', 'state']:
                if not attr in job_entry:
                    raise ValueError(
                        'wrong jobs.report {} file format: missing \'{}\' attribute'
                        .format(report_path, attr))

            jName = job_entry['name']
            state = job_entry['state']

            if jName in to_check:
                msg = job_entry.get('message', None)
                if state != dest_state:
                    raise ValueError(
                        'job\'s {} incorrect status \'{}\' vs expected \'{}\' {}'
                        .format(jName, state, dest_state,
                                '(' + msg + ')' if msg else ''))

                to_check.remove(jName)
                entries[jName] = job_entry

            if len(to_check) == 0:
                break

    if len(to_check) > 0:
        raise ValueError('missing {} jobs in report'.format(
            ','.join(to_check)))

    return entries
Exemplo n.º 6
0
def test_local_workflows(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobs = [
        job.to_dict() for job in [
            Job(
                'first',
                JobExecution('sleep',
                             args=['2s'],
                             wd=abspath(tmpdir.join('first.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1))),
            Job('second',
                JobExecution('sleep',
                             args=['1s'],
                             wd=abspath(tmpdir.join('second.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1)),
                dependencies=JobDependencies(after=['first'])),
            Job('third',
                JobExecution('date',
                             wd=abspath(tmpdir.join('third.sandbox')),
                             stdout='out',
                             stderr='err'),
                JobResources(numCores=ResourceSize(1)),
                dependencies=JobDependencies(after=['first', 'second']))
        ]
    ]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    # the ammount of resources should be enough to theoretically start all three job's at once
    sys.argv = [
        'QCG-PilotJob', '--file', '--file-path',
        str(file_path), '--nodes', '4', '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    jnames = ['first', 'second', 'third']
    check_job_status_in_json(jnames, workdir=str(tmpdir), dest_state='SUCCEED')
    for jname in jnames:
        assert all((isdir(abspath(tmpdir.join('{}.sandbox'.format(jname)))),
                    exists(
                        join(abspath(tmpdir.join('{}.sandbox'.format(jname))),
                             'out')),
                    exists(
                        join(abspath(tmpdir.join('{}.sandbox'.format(jname))),
                             'err'))))

    with open(join(find_single_aux_dir(str(tmpdir)), 'jobs.report'), 'r') as f:
        job_stats = [json.loads(line) for line in f.readlines()]

    assert len(job_stats) == len(jnames)

    jstats = {}
    for i in range(0, len(jnames)):
        job = job_stats[i]

        print('readed job stats: {}'.format(str(job)))
        t = datetime.strptime(job['runtime']['rtime'], "%H:%M:%S.%f")
        rtime = timedelta(hours=t.hour,
                          minutes=t.minute,
                          seconds=t.second,
                          microseconds=t.microsecond)

        # find start executing time
        exec_state = list(
            filter(lambda st_en: st_en['state'] == 'EXECUTING',
                   job['history']))
        assert len(exec_state) == 1

        # find finish executing time
        finish_state = list(
            filter(lambda st_en: st_en['state'] == 'SUCCEED', job['history']))
        assert len(finish_state) == 1

        start_time = datetime.strptime(exec_state[0]['date'],
                                       '%Y-%m-%dT%H:%M:%S.%f')
        finish_time = datetime.strptime(finish_state[0]['date'],
                                        '%Y-%m-%dT%H:%M:%S.%f')

        jstats[job['name']] = {
            'r_time': rtime,
            's_time': start_time,
            'f_time': finish_time
        }

    # assert second job started after the first one
    assert jstats['second']['s_time'] > jstats['first']['f_time']

    # assert third job started after the first and second ones
    assert all((jstats['third']['s_time'] > jstats['first']['f_time'],
                jstats['third']['s_time'] > jstats['second']['f_time']))

    rmtree(str(tmpdir))
Exemplo n.º 7
0
def test_local_iter_scheduling_job_large(tmpdir):
    file_path = tmpdir.join('jobs.json')

    print('tmpdir: {}'.format(str(tmpdir)))

    jobName = "sleep-iter"
    nits = 20
    jobSleepTime = 2
    jobCores = 2
    availCores = 10
    rounds = nits * jobCores / availCores
    totalExecTime = rounds * jobSleepTime
    jobs = [{
        "name": jobName,
        "iteration": {
            "stop": nits
        },
        "execution": {
            "exec": "/bin/sleep",
            "args": ["{}s".format(str(jobSleepTime))],
            "wd": abspath(tmpdir.join("{}_$${{it}}".format(jobName))),
            "stdout": "sleep-iter.stdout",
            "stderr": "sleep-iter.stderr"
        },
        "resources": {
            "numCores": {
                "exact": jobCores,
            }
        }
    }]
    reqs = [{
        'request': 'submit',
        'jobs': jobs
    }, {
        'request': 'control',
        'command': 'finishAfterAllTasksDone'
    }]
    save_reqs_to_file(reqs, file_path)
    print('jobs saved to file_path: {}'.format(str(file_path)))

    sys.argv = [
        'QCG-PilotJob', '--log', 'debug', '--file', '--file-path',
        str(file_path), '--nodes',
        str(availCores), '--wd',
        str(tmpdir), '--report-format', 'json'
    ]
    QCGPMService().start()

    check_job_status_in_json(
        [jobName] + ["{}:{}".format(jobName, i) for i in range(0, nits)],
        workdir=str(tmpdir),
        dest_state='SUCCEED')

    for i in range(0, nits):
        wd_path = abspath(tmpdir.join("{}_{}".format(jobName, i)))
        stdout_path = join(wd_path, 'sleep-iter.stdout')
        stderr_path = join(wd_path, 'sleep-iter.stderr')
        assert all((isdir(wd_path), exists(stdout_path), exists(stderr_path)
                    )), "stdout({}) and/or stderr({}) doesn't exist".format(
                        stdout_path, stderr_path)

    with open(join(find_single_aux_dir(str(tmpdir)), 'jobs.report'), 'r') as f:
        job_stats = [json.loads(line) for line in f.readlines()]

    assert len(job_stats) == nits + 1

    min_start, max_finish = None, None

    for i in range(0, nits):
        job = job_stats[i]

        print('readed job stats: {}'.format(str(job)))
        t = datetime.strptime(job['runtime']['rtime'], "%H:%M:%S.%f")
        rtime = timedelta(hours=t.hour,
                          minutes=t.minute,
                          seconds=t.second,
                          microseconds=t.microsecond)

        assert all((rtime.total_seconds() > jobSleepTime, rtime.total_seconds() < jobSleepTime + 2)), \
            "job {} runtime exceeded assumed value {}s vs max {}s".format(i, rtime.total_seconds(), jobSleepTime + 2)

        # find start executing time
        exec_state = list(
            filter(lambda st_en: st_en['state'] == 'EXECUTING',
                   job['history']))
        assert len(exec_state) == 1

        finish_state = list(
            filter(lambda st_en: st_en['state'] == 'SUCCEED', job['history']))
        assert len(finish_state) == 1

        start_time = datetime.strptime(exec_state[0]['date'],
                                       '%Y-%m-%dT%H:%M:%S.%f')
        finish_time = datetime.strptime(finish_state[0]['date'],
                                        '%Y-%m-%dT%H:%M:%S.%f')

        if not min_start or start_time < min_start:
            min_start = start_time

        if not max_finish or finish_time > max_finish:
            max_finish = finish_time

    assert all((min_start, finish_time))

    # check if duration from executing first job till the end of last job is about 2 rounds, each with jobSleepTime
    scenario_duration = finish_time - min_start
    assert all((scenario_duration.total_seconds() > totalExecTime,
                scenario_duration.total_seconds() < totalExecTime + 4)), \
            "scenario duration runtime exceeded assumed value {}s vs max {}s".format(scenario_duration.total_seconds(),
                                                                                     totalExecTime + 4)

    rmtree(str(tmpdir))
Exemplo n.º 8
0
def test_aux_dir_find(tmpdir):
    # in fresh directory there should be no aux dirs
    assert len(find_aux_dirs(tmpdir)) == 0

    # directory that not start with .qcgpjm
    dname = 'somepath.qcgpjm'
    os.makedirs(tmpdir.join(dname))
    assert len(find_aux_dirs(tmpdir)) == 0
    shutil.rmtree(tmpdir.join(dname))

    # valid directory without postfix
    dname = '.qcgpjm-service-'
    os.makedirs(tmpdir.join(dname))
    assert len(find_aux_dirs(tmpdir)) == 1 and find_aux_dirs(
        tmpdir)[0] == os.path.abspath(tmpdir.join(dname))
    shutil.rmtree(tmpdir.join(dname))

    # file, not directory
    dname = '.qcgpjm-service-'
    open(str(tmpdir.join(dname)), 'w+').close()
    assert os.path.isfile(str(tmpdir.join(dname)))
    assert len(find_aux_dirs(tmpdir)) == 0
    os.remove(tmpdir.join(dname))

    # not top level directory
    subdir = 'subdirectory'
    dname = '{}/.qcgpjm-service'.format(subdir)
    os.makedirs(tmpdir.join(dname))
    assert len(find_aux_dirs(tmpdir)) == 0
    shutil.rmtree(tmpdir.join(subdir))

    # many directories with postfixes
    dnames = [
        '.qcgpjm-service-governor-0', '.qcgpjm-service-manager-0',
        '.qcgpjm-service-manager-1'
    ]
    for dname in dnames:
        os.makedirs(tmpdir.join(dname))
    aux_dirs = find_aux_dirs(tmpdir)
    assert len(aux_dirs) == len(dnames)
    assert all((os.path.abspath(str(tmpdir.join(dname))) in aux_dirs
                for dname in dnames))
    for dname in dnames:
        shutil.rmtree(str(tmpdir.join(dname)))

    # no auxiliary directories
    with pytest.raises(Exception):
        find_single_aux_dir(str(tmpdir))

    # too many directories
    dnames = [
        '.qcgpjm-service-governor-0', '.qcgpjm-service-manager-0',
        '.qcgpjm-service-manager-1'
    ]
    for dname in dnames:
        os.makedirs(tmpdir.join(dname))
    with pytest.raises(Exception):
        find_single_aux_dir(str(tmpdir))
    for dname in dnames:
        shutil.rmtree(str(tmpdir.join(dname)))

    # just one auxiliary dir
    dname = '.qcgpjm-service-0'
    os.makedirs(tmpdir.join(dname))
    assert find_single_aux_dir(str(tmpdir)) == os.path.abspath(
        str(tmpdir.join(dname)))
    shutil.rmtree(tmpdir.join(dname))

    # last modified (last created) auxiliary dir
    dnames = [
        '.qcgpjm-service-manager-1', '.qcgpjm-service-manager-3',
        '.qcgpjm-service-manager-2'
    ]
    for dname in dnames:
        os.makedirs(tmpdir.join(dname))
        time.sleep(1)
    with pytest.raises(Exception):
        find_single_aux_dir(str(tmpdir))
    assert find_latest_aux_dir(tmpdir) == os.path.abspath(
        str(tmpdir.join(dnames[-1])))
    for dname in dnames:
        shutil.rmtree(str(tmpdir.join(dname)))