示例#1
0
def test_commit_put_file_bytes(adapter: PachydermAdapter, repo):
    with PachydermCommitAdapter(adapter, repo) as c:
        c.put_file_bytes('test_1', '/test_file_1')
        c.put_file_bytes('test_2', '/folder/test_file_2')
        c.put_file_bytes(b'test_3', 'test_file_3')
        c.put_file_bytes(b'test_4', 'test_file_4')
        assert set(c._list_file_paths('test*')) == {'/test_file_1', '/test_file_3', '/test_file_4'}
        c.delete_file('test_file_4')
        assert set(c._list_file_paths('test*')) == {'/test_file_1', '/test_file_3'}
        assert len(c._list_file_paths('**')) == 4
    files = adapter.list_files(repo, commit=c.commit)
    assert len(files) == 4
    assert '/test_file_1' in set(files.path)
    assert '/folder/test_file_2' in set(files.path)
    assert '/test_file_3' in set(files.path)
    assert '/test_file_4' not in set(files.path)

    with PachydermCommitAdapter(adapter, repo, branch='test') as c:
        c.put_file_bytes('test_5', '/test_file_5')
    files = adapter.list_files(repo, branch='test')
    assert len(files) == 1
    assert '/test_file_5' in set(files.path)
    assert adapter.list_branch_heads(repo)['test'] == c.commit

    commits = adapter.list_commits(repo)
    commits_branches = set(commits.branches.apply(', '.join))
    assert len(commits) == 2
    assert 'master' in commits_branches and 'test' in commits_branches
    assert c.commit in set(commits.commit)

    adapter.delete_commit(repo, c.commit)
    assert c.commit not in set(adapter.list_commits(repo).commit)

    adapter.delete_branch(repo, 'test')
    assert 'test' not in adapter.list_branch_heads(repo)
示例#2
0
def test_retry(adapter: PachydermAdapter):
    from grpc._channel import _Rendezvous
    from pachypy.adapter import retry
    state = mock.MagicMock()
    code = mock.MagicMock()
    type(code).value = mock.PropertyMock(return_value=(None, 'unavailable'))
    type(state).code = mock.PropertyMock(return_value=code)
    func = mock.MagicMock(side_effect=_Rendezvous(
        state=state,
        call=mock.MagicMock(),
        response_deserializer=mock.MagicMock(),
        deadline=mock.MagicMock(),
    ))
    adapter._max_retries = 3
    with mock.patch('pachypy.adapter.PachydermAdapter.list_repos', func):
        with mock.patch('pachypy.adapter.PachydermAdapter.check_connectivity', mock.MagicMock(return_value=True)):
            with pytest.raises(PachydermError):
                retry(adapter.list_repos)(adapter)  # type: ignore
            assert adapter._retries == adapter._max_retries
            assert func.call_count == adapter._max_retries + 1
        func.reset_mock()
        adapter._retries = 0
        with mock.patch('pachypy.adapter.PachydermAdapter.check_connectivity', mock.MagicMock(return_value=False)):
            with pytest.raises(PachydermError):
                retry(adapter.list_repos)(adapter)  # type: ignore
            assert adapter._retries == 0
            assert func.call_count == 1
示例#3
0
def test_init():
    from pachypy.adapter import PachydermAdapter
    adapter = PachydermAdapter(host='test_host')
    assert adapter.host == 'test_host' and adapter.port == 30650
    with mock.patch.dict(os.environ, {'PACHD_ADDRESS': 'test_host:12345'}):
        adapter = PachydermAdapter()
        assert adapter.host == 'test_host' and adapter.port == 12345
示例#4
0
def test_commit_create_branch(adapter: PachydermAdapter, repo):
    with PachydermCommitAdapter(adapter, repo) as c:
        c.create_branch('test_branch_1')
    adapter.create_branch(repo, c.commit, 'test_branch_2')
    branch_heads = adapter.list_branch_heads(repo)
    assert 'test_branch_1' in branch_heads and 'test_branch_2' in branch_heads
    assert branch_heads['test_branch_1'] == branch_heads['master']
    assert branch_heads['test_branch_2'] == branch_heads['master']
示例#5
0
def test_get_file(adapter: PachydermAdapter, repo):
    with PachydermCommitAdapter(adapter, repo) as c:
        c.put_file_bytes(b'123', '/test_file_1')
    with PachydermCommitAdapter(adapter, repo, branch='test') as c:
        c.put_file_bytes(b'321', '/test_file_2')
    assert next(adapter.get_file(repo, '/test_file_1')) == b'123'
    assert next(adapter.get_file(repo, '/test_file_2', branch='test')) == b'321'
    assert next(adapter.get_file(repo, '/test_file_2', commit=c.commit)) == b'321'
示例#6
0
def test_stop_start_pipeline(adapter: PachydermAdapter, pipeline_1):
    pipeline_name = pipeline_1['pipeline']['name']
    assert await_pipeline_new_state(adapter, pipeline_name, initial_state='starting') == 'running'

    adapter.stop_pipeline(pipeline_name)
    assert await_pipeline_new_state(adapter, pipeline_name, initial_state='running') == 'paused'

    adapter.start_pipeline(pipeline_name)
    assert await_pipeline_new_state(adapter, pipeline_name, initial_state='paused') == 'running'
示例#7
0
def test_commit_put_file_url(adapter: PachydermAdapter, repo):
    with PachydermCommitAdapter(adapter, repo, branch=None) as c:
        c.put_file_url('https://raw.githubusercontent.com/itssimon/pachypy/master/tests/mock/get_logs.csv', 'get_logs.csv')
    files = adapter.list_files(repo, commit=c.commit)
    commits = adapter.list_commits(repo)
    assert len(files) == 1 and len(commits) == 1
    assert '/get_logs.csv' in set(files.path)
    assert c.commit in set(commits.commit)
    assert len(adapter.list_branch_heads(repo)) == 0
示例#8
0
def test_init(monkeypatch):
    monkeypatch.delenv('PACHD_ADDRESS', raising=False)
    adapter = PachydermAdapter()
    assert adapter.host == 'localhost' and adapter.port == 30650
    adapter = PachydermAdapter(host='test_host')
    assert adapter.host == 'test_host' and adapter.port == 30650
    with mock.patch.dict(os.environ, {'PACHD_ADDRESS': 'test_host:12345'}):
        adapter = PachydermAdapter()
        assert adapter.host == 'test_host' and adapter.port == 12345
    with mock.patch.dict(os.environ, {'PACHD_ADDRESS': 'another_test_host'}):
        adapter = PachydermAdapter()
        assert adapter.host == 'another_test_host' and adapter.port == 30650
示例#9
0
def test_run_pipeline(adapter: PachydermAdapter, pipeline_2):
    pipeline_name = pipeline_2['pipeline']['name']
    tick_repo_name = pipeline_name + '_' + pipeline_2['input']['cron']['name']
    assert await_pipeline_new_state(adapter, pipeline_name, initial_state='starting') == 'running'

    with PachydermCommitAdapter(adapter, tick_repo_name) as c:
        c.put_file_bytes(b'0', 'time')
    assert await_job_completed_state(adapter, pipeline_name) == 'success'

    adapter.run_pipeline(pipeline_name)
    assert await_job_completed_state(adapter, pipeline_name) == 'success'
    jobs = adapter.list_jobs(pipeline=pipeline_name)
    assert len(jobs) == 2
示例#10
0
def test_list_commits_files(adapter: PachydermAdapter, repo):
    assert len(adapter.list_branch_heads(repo)) == 0
    assert len(adapter.list_files(repo)) == 0

    for _ in range(3):
        with PachydermCommitAdapter(adapter, repo) as c:
            c.put_file_bytes(b'test', 'test')
        with PachydermCommitAdapter(adapter, repo, branch='test') as c:
            c.put_file_bytes(b'test', 'test')

    branch_heads = adapter.list_branch_heads(repo)
    assert 'master' in branch_heads and 'test' in branch_heads

    commits = adapter.list_commits(repo, n=3)
    assert len(commits) == 3
    assert commits['repo'].iloc[0] == repo
    assert commits['size_bytes'].iloc[0] == 12
    assert commits['branches'].iloc[0] == ['test']
    assert commits['branches'].iloc[1] == ['master']
    assert commits['branches'].iloc[2] == []

    files = adapter.list_files(repo, branch='master')
    assert len(files) == 1
    assert files['repo'].iloc[0] == repo
    assert files['type'].iloc[0] == 'file'
    assert files['size_bytes'].iloc[0] == 12

    files = adapter.list_files(repo, branch='test')
    assert len(files) == 1

    with pytest.raises(ValueError):
        adapter.list_files(repo, branch=None, commit=None)
示例#11
0
def test_list_jobs_get_logs(adapter: PachydermAdapter, pipeline_2):
    pipeline_name = pipeline_2['pipeline']['name']
    tick_repo_name = pipeline_name + '_' + pipeline_2['input']['cron']['name']
    assert await_pipeline_new_state(adapter, pipeline_name, initial_state='starting') == 'running'

    with PachydermCommitAdapter(adapter, tick_repo_name) as c:
        c.put_file_bytes(b'0', 'time')
    assert await_job_completed_state(adapter, pipeline_name) == 'success'

    pipeline_info = adapter.inspect_pipeline(pipeline_name)
    assert pipeline_info['pipeline']['name'] == pipeline_name
    assert pipeline_info['state'] == 'running'
    assert pipeline_info['jobCounts']['success'] > 0
    assert pipeline_info['version'] == 1
    assert pipeline_info['createdAt'].tzinfo is not None

    assert len(adapter.list_jobs()) >= 1
    jobs = adapter.list_jobs(pipeline=pipeline_name)
    assert len(jobs) == 1
    job = jobs['job'].iloc[0]
    assert (jobs['finished'] - jobs['started']).dt.total_seconds().round().iloc[0] > -10
    assert jobs['data_processed'].iloc[0] == jobs['data_total'].iloc[0] == 1
    assert jobs['data_skipped'].iloc[0] == 0

    job_info = adapter.inspect_job(job)
    assert job_info['job']['id'] == job
    assert job_info['pipeline']['name'] == pipeline_name
    assert job_info['state'] == 'success'
    assert job_info['dataTotal'] > 0

    datums = adapter.list_datums(job=job)
    assert len(datums) == 1
    assert datums['job'].iloc[0] == job
    assert datums['repo'].iloc[0] == tick_repo_name
    assert datums['size_bytes'].iloc[0] > 0

    datum_info = adapter.inspect_datum(job=job, datum=datums['datum'].iloc[0])
    assert datum_info['datum']['job']['id'] == job
    assert datum_info['state'] == 'success'
    assert datum_info['stats']['processTime'] > 0

    logs = adapter.get_logs(pipeline=pipeline_name)
    logs = logs[logs['user']]
    assert logs.shape == (1, 7)
    assert logs['message'].iloc[0] == 'test'

    adapter.delete_job(job)
    assert len(adapter.list_jobs(pipeline=pipeline_name)) == 0
示例#12
0
def await_pipeline_new_state(adapter: PachydermAdapter, pipeline_name, initial_state='starting', timeout=60):
    start_time = time.time()
    state = initial_state
    while state == initial_state and time.time() - start_time < timeout:
        time.sleep(1)
        pipelines = adapter.list_pipelines()
        state = pipelines.loc[pipelines.pipeline == pipeline_name, 'state'].iloc[0]
    return state
示例#13
0
def test_commit_context_manager(adapter: PachydermAdapter, repo):
    c = PachydermCommitAdapter(adapter, repo)
    assert c.commit is None and c.finished is False
    assert len(adapter.list_commits(repo)) == 0
    with c:
        pass
    assert c.commit is not None and c.finished is True
    assert len(adapter.list_commits(repo)) == 1
    assert adapter.list_branch_heads(repo)['master'] == c.commit
    with pytest.raises(PachydermError):
        with c:
            pass
    assert len(adapter.list_commits(repo)) == 1

    with pytest.raises(OSError):
        with PachydermCommitAdapter(adapter, repo) as c:
            raise OSError
    assert len(adapter.list_commits(repo)) == 1
示例#14
0
def await_job_completed_state(adapter: PachydermAdapter, pipeline_name, timeout=300):
    start_time = time.time()
    state = 'starting'
    while state in {'unknown', 'starting', 'running', 'merging'} and time.time() - start_time < timeout:
        time.sleep(3)
        jobs = adapter.list_jobs(pipeline=pipeline_name, n=1)
        if len(jobs):
            state = jobs['state'].iloc[0]
    return state
示例#15
0
def test_commit_flush(adapter: PachydermAdapter, pipeline_5):
    pipeline = pipeline_5['pipeline']['name']
    repo = pipeline_5['input']['pfs']['repo']
    assert await_pipeline_new_state(adapter, pipeline) == 'running'
    with PachydermCommitAdapter(adapter, repo, flush=True) as c:
        c.put_file_bytes(b'a b c d e f g h i j\n', 'file1')
        c.put_file_bytes(b'k l m n o p q r s t\n', 'file2')
        t = time.time()
    assert (time.time() - t) > 5
    res = b''.join([c for c in adapter.get_file(pipeline, 'cnt')]).decode('utf-8')
    assert int(res) == 20
示例#16
0
def test_create_delete_repo(adapter: PachydermAdapter):
    repo_name = 'test_repo_a1b2c'
    delete_repo_if_exists(adapter, repo_name)

    adapter.create_repo(repo_name)
    assert repo_name in set(adapter.list_repo_names())
    assert repo_name in set(adapter.list_repos().repo)

    adapter.delete_repo(repo_name)
    assert repo_name not in set(adapter.list_repo_names())
示例#17
0
def test_list_pipelines(adapter: PachydermAdapter, pipeline_1, pipeline_2, pipeline_3, pipeline_4):
    df = adapter.list_pipelines()
    assert df.shape[0] >= 4
    assert df.shape[1] == 15
    assert all([c in df.columns for c in [
        'pipeline', 'image', 'cron_spec', 'input', 'input_repos', 'output_branch',
        'parallelism_constant', 'parallelism_coefficient', 'datum_tries', 'max_queue_size',
        'jobs_running', 'jobs_success', 'jobs_failure',
        'created', 'state'
    ]])
    assert set(df.loc[df['pipeline'] == 'test_pipeline_3', 'input_repos'].iloc[0]) == {'test_pipeline_1', 'test_pipeline_2'}
    assert df.loc[df['pipeline'] == 'test_pipeline_3', 'input'].iloc[0] == '(tick ∪ test_pipeline_1:* ∪ test_pipeline_2:*)'
    assert df.loc[df['pipeline'] == 'test_pipeline_3', 'cron_spec'].iloc[0] == '0 * * * *'
    assert set(df.loc[df['pipeline'] == 'test_pipeline_4', 'input_repos'].iloc[0]) == {'test_pipeline_1', 'test_pipeline_2'}
    assert df.loc[df['pipeline'] == 'test_pipeline_4', 'input'].iloc[0] == '(test_pipeline_1/test:* ⨯ test_pipeline_2/test:*)'
示例#18
0
def delete_pipeline_if_exists(adapter: PachydermAdapter, pipeline_name):
    try:
        adapter.delete_pipeline(pipeline_name)
    except PachydermError:
        pass
示例#19
0
def test_check_connectivity():
    adapter = PachydermAdapter(host='host_that_does_not_exist')
    assert adapter.check_connectivity() is False
示例#20
0
def test_pipeline_input_cron_specs(adapter: PachydermAdapter, pipeline_5, pipeline_6):
    assert len(adapter.get_pipeline_cron_specs(pipeline_5['pipeline']['name'])) == 0
    assert len(adapter.get_pipeline_cron_specs(pipeline_6['pipeline']['name'])) == 2
示例#21
0
def pipeline(adapter: PachydermAdapter, pipeline_spec):
    pipeline_name = pipeline_spec['pipeline']['name']
    delete_pipeline_if_exists(adapter, pipeline_name)
    adapter.create_pipeline(pipeline_spec)
    yield pipeline_spec
    delete_pipeline_if_exists(adapter, pipeline_name)
示例#22
0
def test_get_version(adapter: PachydermAdapter):
    version = adapter.get_version()
    assert len(version.split('.')) == 3
示例#23
0
def test_check_connectivity():
    from pachypy.adapter import PachydermAdapter
    adapter = PachydermAdapter(host='host_that_does_not_exist')
    assert adapter.check_connectivity() is False
    adapter = PachydermAdapter(host='google.com')
    assert adapter.check_connectivity() is False
示例#24
0
def delete_repo_if_exists(adapter: PachydermAdapter, repo_name):
    try:
        adapter.delete_repo(repo_name)
    except PachydermError:
        pass
示例#25
0
def repo(adapter: PachydermAdapter):
    repo_name = 'test_repo_{:05x}'.format(random.randrange(16**5))
    delete_repo_if_exists(adapter, repo_name)
    adapter.create_repo(repo_name)
    yield repo_name
    delete_repo_if_exists(adapter, repo_name)
示例#26
0
def adapter():
    from pachypy.adapter import PachydermAdapter
    return PachydermAdapter()
示例#27
0
def adapter() -> PachydermAdapter:
    return PachydermAdapter('localhost', 30650)