Пример #1
0
def test_list_cli_http_format(cli_tester, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    m1['work_id'] = 'foo1234'
    m2 = copy(random_metadata)
    m2['id'] = '2'
    m2['work_id'] = 'foo1234'
    r = {
        'records': [{
            'url': 's3://url1',
            'http_url': 'https://foo.com/url1',
            'metadata': m1,
        }, {
            'url': 's3://url2',
            'http_url': 'https://foo.com/url2',
            'metadata': m2,
        }],
        'next':
        None,
    }
    prepare_response(r, what=m1['what'], work_id=m1['work_id'])
    cmd = 'list {what} --work-id={work_id} --format=http'
    cmd = cmd.format(**m1)
    output = cli_tester(cmd).rstrip('\n').split('\n')
    assert output == ['https://foo.com/url1', 'https://foo.com/url2']
Пример #2
0
def test_list_two_pages(archive, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    r1 = {
        'records': [{
            'url': 's3://bucket/file1',
            'metadata': m1,
        }],
        'next': 'http://the-next-url/',
    }
    prepare_response(r1,
                     what=random_metadata['what'],
                     start=m1['start'],
                     end=m1['end'])

    m2 = copy(random_metadata)
    m2['id'] = '2'
    r2 = {
        'records': [{
            'url': 's3://bucket/file2',
            'metadata': m2,
        }],
        'next': None,
    }
    prepare_response(r2, url='http://the-next-url/')
    l = list(
        archive.list(m1['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end']))
    assert len(l) == 2
    assert l[0]['url'] == 's3://bucket/file1'
    assert l[0]['metadata'] == m1
    assert l[1]['url'] == 's3://bucket/file2'
    assert l[1]['metadata'] == m2
Пример #3
0
def test_list_cli_json_format(cli_tester, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    m1['work_id'] = 'foo1234'
    m2 = copy(random_metadata)
    m2['id'] = '2'
    m2['work_id'] = 'foo1234'
    r = {
        'records': [{
            'url': 's3://url1',
            'metadata': m1,
        }, {
            'url': 's3://url2',
            'metadata': m2,
        }],
        'next':
        None,
    }
    prepare_response(r, what=m1['what'], work_id=m1['work_id'])
    cmd = 'list {what} --work-id={work_id} --format=json'
    cmd = cmd.format(**m1)
    output_lines = cli_tester(cmd).rstrip('\n').split('\n')
    assert len(output_lines) == 2
    output_jsons = [json.loads(l) for l in output_lines]
    for record in r['records']:
        assert record in output_jsons
Пример #4
0
def test_list_two_pages(archive, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    r1 = {
        'records': [
            {
                'url': 's3://bucket/file1',
                'metadata': m1,
            }
        ],
        'next': 'http://the-next-url/',
    }
    prepare_response(r1, what=random_metadata['what'], start=m1['start'],
                     end=m1['end'])

    m2 = copy(random_metadata)
    m2['id'] = '2'
    r2 = {
        'records': [
            {
                'url': 's3://bucket/file2',
                'metadata': m2,
            }
        ],
        'next': None,
    }
    prepare_response(r2, url='http://the-next-url/')
    l = list(archive.list(m1['what'],
                          start=random_metadata['start'],
                          end=random_metadata['end']))
    assert len(l) == 2
    assert l[0]['url'] == 's3://bucket/file1'
    assert l[0]['metadata'] == m1
    assert l[1]['url'] == 's3://bucket/file2'
    assert l[1]['metadata'] == m2
Пример #5
0
def test_internal_server_error(archive):

    r = 'INTERNAL SERVER ERROR'
    prepare_response(r, status=500, what='syslog')

    with pytest.raises(DatalakeHttpError):
        list(archive.list('syslog'))
Пример #6
0
def test_list_cli_json_format(cli_tester, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    m1['work_id'] = 'foo1234'
    m2 = copy(random_metadata)
    m2['id'] = '2'
    m2['work_id'] = 'foo1234'
    r = {
        'records': [
            {
                'url': 's3://url1',
                'metadata': m1,
            },
            {
                'url': 's3://url2',
                'metadata': m2,
            }
        ],
        'next': None,
    }
    prepare_response(r, what=m1['what'], work_id=m1['work_id'])
    cmd = 'list {what} --work-id={work_id} --format=json'
    cmd = cmd.format(**m1)
    output_lines = cli_tester(cmd).rstrip('\n').split('\n')
    assert len(output_lines) == 2
    output_jsons = [json.loads(l) for l in output_lines]
    for record in r['records']:
        assert record in output_jsons
Пример #7
0
def test_internal_server_error(archive):

    r = 'INTERNAL SERVER ERROR'
    prepare_response(r, status=500, what='syslog')

    with pytest.raises(DatalakeHttpError):
        list(archive.list('syslog'))
Пример #8
0
def test_list_cli_human_format_no_end_time(cli_tester, random_metadata):
    m1 = copy(random_metadata)
    m1['id'] = '1'
    m1['work_id'] = 'foo1234'
    m1['start'] = 1612548642000
    m1['end'] = None
    m2 = copy(random_metadata)
    m2['id'] = '2'
    m2['work_id'] = 'foo1234'
    m2['start'] = 1612548642000
    m2['end'] = None
    r = {
        'records': [{
            'url': 's3://url1',
            'metadata': m1,
        }, {
            'url': 's3://url2',
            'metadata': m2,
        }],
        'next':
        None,
    }
    prepare_response(r, what=m1['what'], work_id=m1['work_id'])
    cmd = 'list {what} --work-id={work_id} --format=human'
    cmd = cmd.format(**m1)
    stanzas = [s for s in cli_tester(cmd).split('\n\n') if s]
    for s in stanzas:
        lines = [l for l in s.split('\n')]
        # just check for the start/end
        assert 'start: 2021-02-05T18:10:42+00:00' in lines
        assert 'end: null' in lines
    assert len(stanzas) == 2
Пример #9
0
def test_no_such_latest(archive):
    r = {
        'message': 'not found',
        'code': 'NoSuchDatalakeFile',
    }
    url = 'http://datalake.example.com/v0/archive/latest/not/here'
    prepare_response(r, status=404, url=url)
    with pytest.raises(DatalakeHttpError):
        archive.latest('not', 'here')
Пример #10
0
def test_bad_request(archive):

    r = {
        "code": "NoWorkInterval",
        "message": "You must provide either work_id or start/end"
    }
    prepare_response(r, status=400, what='syslog')

    with pytest.raises(DatalakeHttpError):
        list(archive.list('syslog'))
Пример #11
0
def test_bad_request(archive):

    r = {
        "code": "NoWorkInterval",
        "message": "You must provide either work_id or start/end"
    }
    prepare_response(r, status=400, what='syslog')

    with pytest.raises(DatalakeHttpError):
        list(archive.list('syslog'))
Пример #12
0
def test_latest(archive, random_metadata):
    r = {
        'url': 's3://bucket/file',
        'metadata': random_metadata,
    }
    url = 'http://datalake.example.com/v0/archive/latest/{}/{}'
    url = url.format(random_metadata['what'], random_metadata['where'])
    prepare_response(r, url=url)
    l = archive.latest(random_metadata['what'], random_metadata['where'])
    assert l['url'] == 's3://bucket/file'
    assert l['metadata'] == random_metadata
Пример #13
0
def test_latest_with_lookback_cli(cli_tester, random_metadata):
    r = {
        'url': 's3://bucket/file',
        'metadata': random_metadata,
    }
    url = 'http://datalake.example.com/v0/archive/latest/{}/{}?lookback=42'
    url = url.format(random_metadata['what'], random_metadata['where'])
    prepare_response(r, url=url)

    cmd = 'latest {what} {where} --lookback 42'
    cmd = cmd.format(**random_metadata)
    output = cli_tester(cmd)
    assert output == 's3://bucket/file\n'
Пример #14
0
def test_with_work_id(archive, random_metadata):
    random_metadata['work_id'] = 'foo123'

    r = {
        'records': [{
            'url': 's3://bucket/file',
            'metadata': random_metadata,
        }],
        'next': None,
    }
    prepare_response(r,
                     what=random_metadata['what'],
                     work_id=random_metadata['work_id'])
    l = list(archive.list(random_metadata['what'], work_id='foo123'))
    assert len(l) == 1
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata
Пример #15
0
def test_with_work_id(archive, random_metadata):
    random_metadata['work_id'] = 'foo123'

    r = {
        'records': [
            {
                'url': 's3://bucket/file',
                'metadata': random_metadata,
            }
        ],
        'next': None,
    }
    prepare_response(r, what=random_metadata['what'],
                     work_id=random_metadata['work_id'])
    l = list(archive.list(random_metadata['what'],
                          work_id='foo123'))
    assert len(l) == 1
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata
Пример #16
0
def test_list_one_page(archive, random_metadata):
    r = {
        'records': [{
            'url': 's3://bucket/file',
            'metadata': random_metadata,
        }],
        'next': None,
    }
    prepare_response(r,
                     what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])
    l = list(
        archive.list(random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end']))
    assert len(l) == 1
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata
Пример #17
0
    def tester(start, end):
        random_metadata['start'] = Metadata.normalize_date(start)
        random_metadata['end'] = Metadata.normalize_date(end)
        r = {
            'records': [{
                'url': 's3://bucket/file',
                'metadata': random_metadata,
            }],
            'next': None,
        }

        prepare_response(r,
                         what=random_metadata['what'],
                         start=random_metadata['start'],
                         end=random_metadata['end'])
        l = list(archive.list(random_metadata['what'], start=start, end=end))
        assert len(l) == 1
        assert l[0]['url'] == 's3://bucket/file'
        assert l[0]['metadata'] == random_metadata
Пример #18
0
def test_list_one_page(archive, random_metadata):
    r = {
        'records': [
            {
                'url': 's3://bucket/file',
                'metadata': random_metadata,
            }
        ],
        'next': None,
    }
    prepare_response(r, what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])
    l = list(archive.list(random_metadata['what'],
                          start=random_metadata['start'],
                          end=random_metadata['end']))
    assert len(l) == 1
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata
Пример #19
0
    def tester(start, end):
        random_metadata['start'] = Metadata.normalize_date(start)
        random_metadata['end'] = Metadata.normalize_date(end)
        r = {
            'records': [
                {
                    'url': 's3://bucket/file',
                    'metadata': random_metadata,
                }
            ],
            'next': None,
        }

        prepare_response(r, what=random_metadata['what'],
                         start=random_metadata['start'],
                         end=random_metadata['end'])
        l = list(archive.list(random_metadata['what'], start=start, end=end))
        assert len(l) == 1
        assert l[0]['url'] == 's3://bucket/file'
        assert l[0]['metadata'] == random_metadata
Пример #20
0
def test_list_cli_url_format(cli_tester, random_metadata):
    r = {
        'records': [{
            'url': 's3://thisistheurl',
            'metadata': random_metadata,
        }, {
            'url': 's3://thisistheotherurl',
            'metadata': random_metadata,
        }],
        'next':
        None,
    }
    prepare_response(r,
                     what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])
    cmd = 'list {what} --start={start} --end={end}'
    cmd = cmd.format(**random_metadata)
    output = cli_tester(cmd)
    assert output == 's3://thisistheurl\ns3://thisistheotherurl\n'
Пример #21
0
def test_list_cli_url_format(cli_tester, random_metadata):
    r = {
        'records': [
            {
                'url': 's3://thisistheurl',
                'metadata': random_metadata,
            },
            {
                'url': 's3://thisistheotherurl',
                'metadata': random_metadata,
            }
        ],
        'next': None,
    }
    prepare_response(r, what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])
    cmd = 'list {what} --start={start} --end={end}'
    cmd = cmd.format(**random_metadata)
    output = cli_tester(cmd)
    assert output == 's3://thisistheurl\ns3://thisistheotherurl\n'
Пример #22
0
def test_list_with_session_class(monkeypatch, archive_maker, random_metadata):
    r = {
        'records': [{
            'url': 's3://bucket/file',
            'metadata': random_metadata,
        }],
        'next': None,
    }
    prepare_response(r,
                     what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])

    monkeypatch.setenv('DATALAKE_SESSION_CLASS', 'test_list.SessionWrapper')
    a = archive_maker()
    l = list(
        a.list(random_metadata['what'],
               start=random_metadata['start'],
               end=random_metadata['end']))
    assert len(TEST_REQUESTS) > 0
    assert len(l) > 0
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata
Пример #23
0
def test_list_with_injected_session(archive_maker, random_metadata):
    r = {
        'records': [{
            'url': 's3://bucket/file',
            'metadata': random_metadata,
        }],
        'next': None,
    }
    prepare_response(r,
                     what=random_metadata['what'],
                     start=random_metadata['start'],
                     end=random_metadata['end'])

    s = SessionWrapper()
    a = archive_maker(session=s)
    l = list(
        a.list(random_metadata['what'],
               start=random_metadata['start'],
               end=random_metadata['end']))
    assert len(TEST_REQUESTS) > 0
    assert len(l) > 0
    assert l[0]['url'] == 's3://bucket/file'
    assert l[0]['metadata'] == random_metadata