Exemplo n.º 1
0
def test_timespan_too_big(random_metadata):
    url = 's3://foo/blapp'
    random_metadata['start'] = 0
    random_metadata['end'] = (DatalakeRecord.MAXIMUM_BUCKET_SPAN + 1) * \
        DatalakeRecord.TIME_BUCKET_SIZE_IN_MS
    with pytest.raises(UnsupportedTimeRange):
        DatalakeRecord.list_from_metadata(url, random_metadata)
Exemplo n.º 2
0
def test_timespan_too_big(s3_file_from_metadata, random_metadata):
    url = 's3://foo/blapp'
    s3_file_from_metadata(url, random_metadata)
    random_metadata['start'] = 0
    random_metadata['end'] = (DatalakeRecord.MAXIMUM_BUCKET_SPAN + 1) * \
        DatalakeRecord.TIME_BUCKET_SIZE_IN_MS
    with pytest.raises(UnsupportedTimeRange):
        DatalakeRecord.list_from_metadata(url, random_metadata)
Exemplo n.º 3
0
def test_no_end(random_metadata):
    url = 's3://foo/baz'
    del(random_metadata['end'])
    records = DatalakeRecord.list_from_metadata(url, random_metadata)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == random_metadata
Exemplo n.º 4
0
 def maker(**kwargs):
     m = random_metadata()
     m.update(**kwargs)
     key = '/'.join([str(v) for v in kwargs.values()])
     url = 's3://datalake-test/' + key
     s3_file_from_metadata(url, m)
     return DatalakeRecord.list_from_metadata(url, m)
Exemplo n.º 5
0
def test_list_from_metadata(s3_file_from_metadata, random_metadata):
    url = 's3://foo/baz'
    s3_file_from_metadata(url, random_metadata)
    records = DatalakeRecord.list_from_metadata(url, random_metadata)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == random_metadata
def test_no_end_exclusion(table_maker, querier):
    m = random_metadata()
    del(m['end'])
    url = 's3://datalake-test/' + m['id']
    records = DatalakeRecord.list_from_metadata(url, m)
    table_maker(records)
    results = querier.query_by_time(m['start'] + 1, m['start'] + 2, m['what'])
    assert len(results) == 0
Exemplo n.º 7
0
def test_no_end_exclusion(table_maker, querier, s3_file_from_metadata):
    m = random_metadata()
    del (m['end'])
    url = 's3://datalake-test/' + m['id']
    s3_file_from_metadata(url, m)
    records = DatalakeRecord.list_from_metadata(url, m)
    table_maker(records)
    results = querier.query_by_time(m['start'] + 1, m['start'] + 2, m['what'])
    assert len(results) == 0
Exemplo n.º 8
0
def test_no_end(random_metadata, s3_file_from_metadata):
    url = 's3://foo/baz'
    del (random_metadata['end'])
    expected_metadata = random_metadata.copy()
    expected_metadata['end'] = None
    s3_file_from_metadata(url, random_metadata)
    records = DatalakeRecord.list_from_metadata(url, random_metadata)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == expected_metadata
def test_null_end(table_maker, querier):
    m = {
        "start": 1461023640000,
        "what": "file",
        "version": 0,
        "end": None,
        "work_id": None,
        "path": "/home/foo/file",
        "where": "somehost",
        "id": "fedcba09876543210",
        "hash": "0123456789abcdef"
    }
    url = 's3://datalake-test/' + m['id']
    records = DatalakeRecord.list_from_metadata(url, m)
    table_maker(records)
    results = querier.query_by_time(1461023630000, 1461023650000, 'file')
    assert len(results) == 1
Exemplo n.º 10
0
def create_test_records(bucket='datalake-test', **kwargs):
    m = random_metadata()
    m.update(**kwargs)
    url = 's3://' + bucket + '/' + '/'.join([str(v) for v in kwargs.values()])
    return DatalakeRecord.list_from_metadata(url, m)
Exemplo n.º 11
0
 def maker(content, metadata):
     path = metadata['id'] + '/data'
     s3_file_maker('datalake-test', path, content, metadata)
     url = 's3://datalake-test/' + path
     records = DatalakeRecord.list_from_metadata(url, metadata)
     table_maker(records)