Exemplo n.º 1
0
def run(args):
    it_config = get_yml_content(args.config)

    for test_case_config in it_config['testCases']:
        name = test_case_config['name']
        if case_excluded(name, args.exclude):
            print('{} excluded'.format(name))
            continue
        if args.cases and not case_included(name, args.cases):
            continue

        # fill test case default config
        for k in it_config['defaultTestCaseConfig']:
            if k not in test_case_config:
                test_case_config[k] = it_config['defaultTestCaseConfig'][k]
        print(json.dumps(test_case_config, indent=4))

        if not match_platform(test_case_config):
            print('skipped {}, platform {} not match [{}]'.format(
                name, sys.platform, test_case_config['platform']))
            continue

        if not match_training_service(test_case_config, args.ts):
            print('skipped {}, training service {} not match [{}]'.format(
                name, args.ts, test_case_config['trainingService']))
            continue

        wait_for_port_available(8080, 30)
        print('{}Testing: {}{}'.format(GREEN, name, CLEAR))
        begin_time = time.time()

        run_test_case(test_case_config, it_config, args)
        print('{}Test {}: TEST PASS IN {} SECONDS{}'.format(
            GREEN, name, int(time.time() - begin_time), CLEAR),
              flush=True)
Exemplo n.º 2
0
def run(args):
    it_config = get_yml_content(args.config)
    test_cases = it_config['testCases']

    for test_case_id, test_case_config in enumerate(test_cases, start=1):
        name = test_case_config['name']
        print(GREEN + '=' * 80 + CLEAR)
        print('## {}Testing: {}{} ##'.format(GREEN, name, CLEAR))

        # Print progress on devops
        print(
            f'##vso[task.setprogress value={int(test_case_id / len(test_cases) * 100)};]{name}'
        )

        if case_excluded(name, args.exclude):
            print('{} excluded'.format(name))
            continue
        if args.cases and not case_included(name, args.cases):
            continue

        # fill test case default config
        for k in it_config['defaultTestCaseConfig']:
            if k not in test_case_config:
                test_case_config[k] = it_config['defaultTestCaseConfig'][k]
        print(json.dumps(test_case_config, indent=4))

        if not match_platform(test_case_config):
            print('skipped {}, platform {} not match [{}]'.format(
                name, sys.platform, test_case_config['platform']))
            continue

        if not match_training_service(test_case_config, args.ts):
            print('skipped {}, training service {} not match [{}]'.format(
                name, args.ts, test_case_config['trainingService']))
            continue

        if args.ts == 'remote':
            if not match_remoteConfig(test_case_config, args.nni_source_dir):
                print('skipped {}, remoteConfig not match.'.format(name))
                continue

        # remote mode need more time to cleanup
        if args.ts == 'remote' or args.ts == 'hybrid':
            wait_for_port_available(8080, 240)
            wait_for_port_available(
                8081, 240
            )  # some training services need one more port to listen metrics

        # adl mode need more time to cleanup PVC
        if args.ts == 'adl' and name == 'nnictl-resume-2':
            time.sleep(30)

        begin_time = time.time()

        run_test_case(test_case_config, it_config, args)
        print('{}Test {}: TEST PASS IN {} SECONDS{}\n\n'.format(
            GREEN, name, int(time.time() - begin_time), CLEAR),
              flush=True)
Exemplo n.º 3
0
def naive_test(args):
    '''run naive integration test'''
    to_remove = ['tuner_search_space.json', 'tuner_result.txt', 'assessor_result.txt']
    to_remove = list(map(lambda file: osp.join(NAIVE_TEST_CONFIG_DIR, file), to_remove))
    remove_files(to_remove)

    proc = subprocess.run(['nnictl', 'create', '--config', args.config])
    assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

    print('Spawning trials...')

    nnimanager_log_path = get_nni_log_path(EXPERIMENT_URL)
    current_trial = 0

    for _ in range(120):
        time.sleep(1)

        tuner_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt'))
        assessor_status = read_last_line(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt'))
        experiment_status = is_experiment_done(nnimanager_log_path)

        assert tuner_status != 'ERROR', 'Tuner exited with error'
        assert assessor_status != 'ERROR', 'Assessor exited with error'

        if experiment_status:
            break

        if tuner_status is not None:
            for line in open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')):
                if line.strip() == 'ERROR':
                    break
                trial = int(line.split(' ')[0])
                if trial > current_trial:
                    current_trial = trial
                    print('Trial #%d done' % trial)

    assert experiment_status, 'Failed to finish in 2 min'

    ss1 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'search_space.json')))
    ss2 = json.load(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_search_space.json')))
    assert ss1 == ss2, 'Tuner got wrong search space'

    tuner_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'tuner_result.txt')))
    expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_tuner_result.txt')))
    # Trials may complete before NNI gets assessor's result,
    # so it is possible to have more final result than expected
    print('Tuner result:', tuner_result)
    print('Expected tuner result:', expected)
    assert tuner_result.issuperset(expected), 'Bad tuner result'

    assessor_result = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'assessor_result.txt')))
    expected = set(open(osp.join(NAIVE_TEST_CONFIG_DIR, 'expected_assessor_result.txt')))
    assert assessor_result == expected, 'Bad assessor result'

    subprocess.run(['nnictl', 'stop'])
    wait_for_port_available(8080, 10)
Exemplo n.º 4
0
def stop_experiment_test(args):
    config_file = args.config
    '''Test `nnictl stop` command, including `nnictl stop exp_id` and `nnictl stop all`.
    Simple `nnictl stop` is not tested here since it is used in all other test code'''
    subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8080'], check=True)
    subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8888'], check=True)
    subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8989'], check=True)
    subprocess.run(['nnictl', 'create', '--config', config_file, '--port', '8990'], check=True)

    # test cmd 'nnictl stop id`
    experiment_id = get_experiment_id(EXPERIMENT_URL)
    proc = subprocess.run(['nnictl', 'stop', experiment_id])
    assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode)
    wait_for_port_available(8080, 10)
    assert not detect_port(8080), '`nnictl stop %s` failed to stop experiments' % experiment_id

    # test cmd `nnictl stop --port`
    proc = subprocess.run(['nnictl', 'stop', '--port', '8990'])
    assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode)
    wait_for_port_available(8990, 10)
    assert not detect_port(8990), '`nnictl stop %s` failed to stop experiments' % experiment_id

    # test cmd `nnictl stop --all`
    proc = subprocess.run(['nnictl', 'stop', '--all'])
    assert proc.returncode == 0, '`nnictl stop --all` failed with code %d' % proc.returncode
    wait_for_port_available(8888, 10)
    wait_for_port_available(8989, 10)
    assert not detect_port(8888) and not detect_port(8989), '`nnictl stop --all` failed to stop experiments'
Exemplo n.º 5
0
def run(args):
    it_config = get_yml_content(args.config)

    for test_case_config in it_config['testCases']:
        name = test_case_config['name']
        if case_excluded(name, args.exclude):
            print('{} excluded'.format(name))
            continue
        if args.cases and not case_included(name, args.cases):
            continue

        # fill test case default config
        for k in it_config['defaultTestCaseConfig']:
            if k not in test_case_config:
                test_case_config[k] = it_config['defaultTestCaseConfig'][k]
        print(json.dumps(test_case_config, indent=4))

        if not match_platform(test_case_config):
            print('skipped {}, platform {} not match [{}]'.format(
                name, sys.platform, test_case_config['platform']))
            continue

        if not match_training_service(test_case_config, args.ts):
            print('skipped {}, training service {} not match [{}]'.format(
                name, args.ts, test_case_config['trainingService']))
            continue

        # remote mode need more time to cleanup
        if args.ts == 'remote' or args.ts == 'hybrid':
            if args.ts == 'remote':
                if not match_remoteConfig(test_case_config,
                                          args.nni_source_dir):
                    print('skipped {}, remoteConfig not match.'.format(name))
                    continue
            wait_for_port_available(8080, 240)
        else:
            wait_for_port_available(8080, 60)

        # adl mode need more time to cleanup PVC
        if args.ts == 'adl' and name == 'nnictl-resume-2':
            time.sleep(30)
        print('## {}Testing: {}{} ##'.format(GREEN, name, CLEAR))
        begin_time = time.time()

        run_test_case(test_case_config, it_config, args)
        print('{}Test {}: TEST PASS IN {} SECONDS{}'.format(
            GREEN, name, int(time.time() - begin_time), CLEAR),
              flush=True)