Exemplo n.º 1
0
def test_list_from_s3_url(s3_file_from_metadata, random_metadata):
    url = 's3://foo/bar'
    s3_file_from_metadata(url, random_metadata)
    records = DatalakeRecord.list_from_url(url)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == random_metadata
Exemplo n.º 2
0
def test_list_from_s3_url(s3_file_from_metadata, random_metadata):
    url = 's3://foo/bar'
    s3_file_from_metadata(url, random_metadata)
    records = DatalakeRecord.list_from_url(url)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == random_metadata
Exemplo n.º 3
0
def test_record_size_and_create_time(s3_file_maker, random_metadata):
    url = 's3://foo/bar'
    now = int(time.time() * 1000.0)

    # s3 create times have a 1s resolution. So we just tolerate 2x that to
    # ensure the test passes reasonably.
    max_tolerable_delta = 2000

    s3_file_maker('foo', 'bar', 'thissongisjust23byteslong', random_metadata)
    records = DatalakeRecord.list_from_url(url)
    assert len(records) >= 1
    for r in records:
        assert r['metadata'] == random_metadata
        assert abs(r['create_time'] - now) <= max_tolerable_delta
        assert r['size'] == 25
Exemplo n.º 4
0
def test_no_such_bucket(s3_connection):
    url = 's3://no/such/file'
    with pytest.raises(NoSuchDatalakeFile):
        DatalakeRecord.list_from_url(url)
Exemplo n.º 5
0
def test_no_such_datalake_file_in_bucket(s3_bucket_maker):
    s3_bucket_maker('test-bucket')
    url = 's3://test-bucket/such/file'
    with pytest.raises(NoSuchDatalakeFile):
        DatalakeRecord.list_from_url(url)
Exemplo n.º 6
0
def test_from_url_fails_without_boto():
    with pytest.raises(InsufficientConfiguration):
        DatalakeRecord.list_from_url('s3://foo/bar')
Exemplo n.º 7
0
 def ingest(self, url):
     '''ingest the metadata associated with the given url'''
     records = DatalakeRecord.list_from_url(url)
     for r in records:
         self.storage.store(r)
Exemplo n.º 8
0
def test_no_metadata(s3_file_maker):
    url = 's3://foo/bar'
    s3_file_maker('foo', 'bar', 'the content', None)
    with pytest.raises(InvalidDatalakeMetadata):
        DatalakeRecord.list_from_url(url)
Exemplo n.º 9
0
def test_no_such_bucket(s3_connection):
    url = 's3://no/such/file'
    with pytest.raises(NoSuchDatalakeFile):
        DatalakeRecord.list_from_url(url)
Exemplo n.º 10
0
def test_no_such_datalake_file_in_bucket(s3_bucket_maker):
    s3_bucket_maker('test-bucket')
    url = 's3://test-bucket/such/file'
    with pytest.raises(NoSuchDatalakeFile):
        DatalakeRecord.list_from_url(url)
Exemplo n.º 11
0
def test_from_url_fails_without_boto():
    with pytest.raises(InsufficientConfiguration):
        DatalakeRecord.list_from_url('s3://foo/bar')
Exemplo n.º 12
0
 def ingest(self, url):
     '''ingest the metadata associated with the given url'''
     records = DatalakeRecord.list_from_url(url)
     for r in records:
         self.storage.store(r)
Exemplo n.º 13
0
 def datalake_records(self):
     if self['eventName'] not in self.EVENTS_WITH_RECORDS:
         return []
     return [dlr for dlr in DatalakeRecord.list_from_url(self.s3_url)]
Exemplo n.º 14
0
 def datalake_records(self):
     if self['eventName'] not in self.EVENTS_WITH_RECORDS:
         return []
     return [dlr for dlr in DatalakeRecord.list_from_url(self.s3_url)]