def test_absolute_windows_path_single_slash(basic_metadata): # some cygwin environments seem to have a single slash after the # drive. Shrug. path = r'Z:\foo\bar.txt' basic_metadata['path'] = path m = Metadata(basic_metadata) assert m['path'] == path
def test_id_gets_assigned(basic_metadata): m = Metadata(basic_metadata) assert 'id' in m assert m['id'] is not None
def test_invalid_date(basic_metadata): basic_metadata['end'] = 'bxfl230' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_normalize_date(basic_metadata): basic_metadata['start'] = '2015-03-20' m = Metadata(basic_metadata) assert m['start'] == 1426809600000
def test_work_id_null_string_unallowed(basic_metadata): basic_metadata['work_id'] = 'null' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_unallowed_spaces(basic_metadata): basic_metadata['where'] = 'SAN FRANCISCO' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_unallowed_characters(basic_metadata): basic_metadata['what'] = '123#$' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_id_not_overwritten(basic_metadata): basic_metadata['id'] = '123' m = Metadata(basic_metadata) assert 'id' in m assert m['id'] == '123'
def test_absolute_windows_path(basic_metadata): path = r'Z:\\foo\bar.txt' basic_metadata['path'] = path m = Metadata(basic_metadata) assert m['path'] == path
def test_path_with_leading_dot_not_allowed(basic_metadata): basic_metadata['path'] = './abc.txt' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_normalize_int_date(basic_metadata): basic_metadata['end'] = '1426809600123' m = Metadata(basic_metadata) assert m['end'] == 1426809600123
def test_normalize_float_date(basic_metadata): basic_metadata['start'] = '1426809600.123' m = Metadata(basic_metadata) assert m['start'] == 1426809600123
def test_random_metadata(random_metadata): # Others rely on datalake-common's random_metadata to be valid. So make # sure it doesn't throw any errors. Metadata(random_metadata)
def test_end_before_start(basic_metadata): end = basic_metadata['end'] basic_metadata['end'] = basic_metadata['start'] basic_metadata['start'] = end with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_work_id_with_unallowed_characters(basic_metadata): basic_metadata['work_id'] = 'foojob#123' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_none_for_required_field(basic_metadata): basic_metadata['where'] = None with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_work_id_gets_assigned(basic_metadata): m = Metadata(basic_metadata) assert 'work_id' in m assert m['work_id'] is None
def test_relative_windows_path_not_allowed(basic_metadata): basic_metadata['path'] = r'foo\abc.txt' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_no_end_allowed(basic_metadata): del (basic_metadata['end']) m = Metadata(basic_metadata) assert m['end'] is None
def test_version_default(basic_metadata): del (basic_metadata['version']) m = Metadata(basic_metadata) assert 'version' in m assert m['version'] == 0
def test_unallowed_capitals(basic_metadata): basic_metadata['what'] = 'MYFILE' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def test_unsupported_version(basic_metadata): basic_metadata['version'] = '100' with pytest.raises(UnsupportedDatalakeMetadataVersion): Metadata(basic_metadata)
def test_unallowed_dots(basic_metadata): basic_metadata['where'] = 'this.that.com' with pytest.raises(InvalidDatalakeMetadata): Metadata(basic_metadata)
def get_file(self, file_id): key = self._get_s3_key(file_id) fd = key['Body'] j = json.loads(key['Metadata']['datalake']) metadata = Metadata(j) return ArchiveFile(fd, metadata)