def test_list_from_s3_url(s3_file_from_metadata, random_metadata): url = 's3://foo/bar' s3_file_from_metadata(url, random_metadata) records = DatalakeRecord.list_from_url(url) assert len(records) >= 1 for r in records: assert r['metadata'] == random_metadata
def test_list_from_s3_url(s3_file_from_metadata, random_metadata): url = 's3://foo/bar' s3_file_from_metadata(url, random_metadata) records = DatalakeRecord.list_from_url(url) assert len(records) >= 1 for r in records: assert r['metadata'] == random_metadata
def test_record_size_and_create_time(s3_file_maker, random_metadata): url = 's3://foo/bar' now = int(time.time() * 1000.0) # s3 create times have a 1s resolution. So we just tolerate 2x that to # ensure the test passes reasonably. max_tolerable_delta = 2000 s3_file_maker('foo', 'bar', 'thissongisjust23byteslong', random_metadata) records = DatalakeRecord.list_from_url(url) assert len(records) >= 1 for r in records: assert r['metadata'] == random_metadata assert abs(r['create_time'] - now) <= max_tolerable_delta assert r['size'] == 25
def test_no_such_bucket(s3_connection): url = 's3://no/such/file' with pytest.raises(NoSuchDatalakeFile): DatalakeRecord.list_from_url(url)
def test_no_such_datalake_file_in_bucket(s3_bucket_maker): s3_bucket_maker('test-bucket') url = 's3://test-bucket/such/file' with pytest.raises(NoSuchDatalakeFile): DatalakeRecord.list_from_url(url)
def test_from_url_fails_without_boto(): with pytest.raises(InsufficientConfiguration): DatalakeRecord.list_from_url('s3://foo/bar')
def ingest(self, url): '''ingest the metadata associated with the given url''' records = DatalakeRecord.list_from_url(url) for r in records: self.storage.store(r)
def test_no_metadata(s3_file_maker): url = 's3://foo/bar' s3_file_maker('foo', 'bar', 'the content', None) with pytest.raises(InvalidDatalakeMetadata): DatalakeRecord.list_from_url(url)
def test_no_such_bucket(s3_connection): url = 's3://no/such/file' with pytest.raises(NoSuchDatalakeFile): DatalakeRecord.list_from_url(url)
def test_no_such_datalake_file_in_bucket(s3_bucket_maker): s3_bucket_maker('test-bucket') url = 's3://test-bucket/such/file' with pytest.raises(NoSuchDatalakeFile): DatalakeRecord.list_from_url(url)
def test_from_url_fails_without_boto(): with pytest.raises(InsufficientConfiguration): DatalakeRecord.list_from_url('s3://foo/bar')
def ingest(self, url): '''ingest the metadata associated with the given url''' records = DatalakeRecord.list_from_url(url) for r in records: self.storage.store(r)
def datalake_records(self): if self['eventName'] not in self.EVENTS_WITH_RECORDS: return [] return [dlr for dlr in DatalakeRecord.list_from_url(self.s3_url)]
def datalake_records(self): if self['eventName'] not in self.EVENTS_WITH_RECORDS: return [] return [dlr for dlr in DatalakeRecord.list_from_url(self.s3_url)]