def test_timespan_too_big(random_metadata): url = 's3://foo/blapp' random_metadata['start'] = 0 random_metadata['end'] = (DatalakeRecord.MAXIMUM_BUCKET_SPAN + 1) * \ DatalakeRecord.TIME_BUCKET_SIZE_IN_MS with pytest.raises(UnsupportedTimeRange): DatalakeRecord.list_from_metadata(url, random_metadata)
def test_timespan_too_big(s3_file_from_metadata, random_metadata): url = 's3://foo/blapp' s3_file_from_metadata(url, random_metadata) random_metadata['start'] = 0 random_metadata['end'] = (DatalakeRecord.MAXIMUM_BUCKET_SPAN + 1) * \ DatalakeRecord.TIME_BUCKET_SIZE_IN_MS with pytest.raises(UnsupportedTimeRange): DatalakeRecord.list_from_metadata(url, random_metadata)
def test_no_end(random_metadata): url = 's3://foo/baz' del(random_metadata['end']) records = DatalakeRecord.list_from_metadata(url, random_metadata) assert len(records) >= 1 for r in records: assert r['metadata'] == random_metadata
def maker(**kwargs): m = random_metadata() m.update(**kwargs) key = '/'.join([str(v) for v in kwargs.values()]) url = 's3://datalake-test/' + key s3_file_from_metadata(url, m) return DatalakeRecord.list_from_metadata(url, m)
def test_list_from_metadata(s3_file_from_metadata, random_metadata): url = 's3://foo/baz' s3_file_from_metadata(url, random_metadata) records = DatalakeRecord.list_from_metadata(url, random_metadata) assert len(records) >= 1 for r in records: assert r['metadata'] == random_metadata
def test_no_end_exclusion(table_maker, querier): m = random_metadata() del(m['end']) url = 's3://datalake-test/' + m['id'] records = DatalakeRecord.list_from_metadata(url, m) table_maker(records) results = querier.query_by_time(m['start'] + 1, m['start'] + 2, m['what']) assert len(results) == 0
def test_no_end_exclusion(table_maker, querier, s3_file_from_metadata): m = random_metadata() del (m['end']) url = 's3://datalake-test/' + m['id'] s3_file_from_metadata(url, m) records = DatalakeRecord.list_from_metadata(url, m) table_maker(records) results = querier.query_by_time(m['start'] + 1, m['start'] + 2, m['what']) assert len(results) == 0
def test_no_end(random_metadata, s3_file_from_metadata): url = 's3://foo/baz' del (random_metadata['end']) expected_metadata = random_metadata.copy() expected_metadata['end'] = None s3_file_from_metadata(url, random_metadata) records = DatalakeRecord.list_from_metadata(url, random_metadata) assert len(records) >= 1 for r in records: assert r['metadata'] == expected_metadata
def test_null_end(table_maker, querier): m = { "start": 1461023640000, "what": "file", "version": 0, "end": None, "work_id": None, "path": "/home/foo/file", "where": "somehost", "id": "fedcba09876543210", "hash": "0123456789abcdef" } url = 's3://datalake-test/' + m['id'] records = DatalakeRecord.list_from_metadata(url, m) table_maker(records) results = querier.query_by_time(1461023630000, 1461023650000, 'file') assert len(results) == 1
def create_test_records(bucket='datalake-test', **kwargs): m = random_metadata() m.update(**kwargs) url = 's3://' + bucket + '/' + '/'.join([str(v) for v in kwargs.values()]) return DatalakeRecord.list_from_metadata(url, m)
def maker(content, metadata): path = metadata['id'] + '/data' s3_file_maker('datalake-test', path, content, metadata) url = 's3://datalake-test/' + path records = DatalakeRecord.list_from_metadata(url, metadata) table_maker(records)