Пример #1
0
def test_absolute_windows_path_single_slash(basic_metadata):
    # some cygwin environments seem to have a single slash after the
    # drive. Shrug.
    path = r'Z:\foo\bar.txt'
    basic_metadata['path'] = path
    m = Metadata(basic_metadata)
    assert m['path'] == path
Пример #2
0
 def list_from_metadata(cls, url, metadata):
     '''return a list of DatalakeRecords for the url and metadata'''
     key = cls._get_key(url)
     metadata = Metadata(**metadata)
     ct = cls._get_create_time(key)
     time_buckets = cls.get_time_buckets_from_metadata(metadata)
     return [cls(url, metadata, t, ct, key.size) for t in time_buckets]
Пример #3
0
    def __init__(self, fd, **metadata_fields):
        '''Create a File

        Args:

            fd: file-like object from which the file data can be read.

            metadata_fields: known metadata fields that go with this
            file. Missing fields will be added if they can be
            determined. Othwerise, InvalidDatalakeMetadata will be raised.

        '''
        self._fd = fd
        self._initialize_methods_from_fd()
        self._infer_metadata_fields(metadata_fields)
        self.metadata = Metadata(metadata_fields)
Пример #4
0
    def __init__(self, stream, **metadata_fields):
        '''Create a StreamingFile
        A StreamingFile is never loaded as a whole into memory.

        Args:

            stream: a generator from which the file data can be read.

            metadata_fields: known metadata fields that go with this
            file. Missing fields will be added if they can be
            determined. Othwerise, InvalidDatalakeMetadata will be raised.

        '''
        self._stream = stream
        self._buffer = b''
        self._content_gen = False
        self.metadata = Metadata(metadata_fields)
Пример #5
0
def test_normalize_int_date(basic_metadata):
    basic_metadata['end'] = '1426809600123'
    m = Metadata(basic_metadata)
    assert m['end'] == 1426809600123
Пример #6
0
def test_version_default(basic_metadata):
    del (basic_metadata['version'])
    m = Metadata(basic_metadata)
    assert 'version' in m
    assert m['version'] == 0
Пример #7
0
def test_unallowed_spaces(basic_metadata):
    basic_metadata['where'] = 'SAN FRANCISCO'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #8
0
def test_path_with_leading_dot_not_allowed(basic_metadata):
    basic_metadata['path'] = './abc.txt'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #9
0
def test_id_not_overwritten(basic_metadata):
    basic_metadata['id'] = '123'
    m = Metadata(basic_metadata)
    assert 'id' in m
    assert m['id'] == '123'
Пример #10
0
def test_unallowed_characters(basic_metadata):
    basic_metadata['what'] = '123#$'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #11
0
def test_invalid_date(basic_metadata):
    basic_metadata['end'] = 'bxfl230'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #12
0
def test_none_for_required_field(basic_metadata):
    basic_metadata['where'] = None
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #13
0
def test_unsupported_version(basic_metadata):
    basic_metadata['version'] = '100'
    with pytest.raises(UnsupportedDatalakeMetadataVersion):
        Metadata(basic_metadata)
Пример #14
0
def test_absolute_windows_path(basic_metadata):
    path = r'Z:\\foo\bar.txt'
    basic_metadata['path'] = path
    m = Metadata(basic_metadata)
    assert m['path'] == path
Пример #15
0
def test_no_end_allowed(basic_metadata):
    del (basic_metadata['end'])
    m = Metadata(basic_metadata)
    assert m['end'] is None
Пример #16
0
def test_normalize_date(basic_metadata):
    basic_metadata['start'] = '2015-03-20'
    m = Metadata(basic_metadata)
    assert m['start'] == '2015-03-20T00:00:00.000000Z'
Пример #17
0
def test_relative_windows_path_not_allowed(basic_metadata):
    basic_metadata['path'] = r'foo\abc.txt'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #18
0
def test_normalize_date(basic_metadata):
    basic_metadata['start'] = '2015-03-20'
    m = Metadata(basic_metadata)
    assert m['start'] == 1426809600000
Пример #19
0
def test_work_id_null_string_unallowed(basic_metadata):
    basic_metadata['work_id'] = 'null'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #20
0
def test_id_gets_assigned(basic_metadata):
    m = Metadata(basic_metadata)
    assert 'id' in m
    assert m['id'] is not None
Пример #21
0
def test_work_id_with_unallowed_characters(basic_metadata):
    basic_metadata['work_id'] = 'foojob#123'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #22
0
def test_work_id_gets_assigned(basic_metadata):
    m = Metadata(basic_metadata)
    assert 'work_id' in m
    assert m['work_id'] is None
Пример #23
0
def test_end_before_start(basic_metadata):
    end = basic_metadata['end']
    basic_metadata['end'] = basic_metadata['start']
    basic_metadata['start'] = end
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #24
0
def test_no_end_allowed(basic_metadata):
    del (basic_metadata['end'])
    m = Metadata(basic_metadata)
    assert 'end' not in m
Пример #25
0
def test_random_metadata(random_metadata):
    # Others rely on datalake-common's random_metadata to be valid. So make
    # sure it doesn't throw any errors.
    Metadata(random_metadata)
Пример #26
0
def test_unallowed_capitals(basic_metadata):
    basic_metadata['what'] = 'MYFILE'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #27
0
def test_normalize_float_date(basic_metadata):
    basic_metadata['start'] = '1426809600.123'
    m = Metadata(basic_metadata)
    assert m['start'] == 1426809600123
Пример #28
0
def test_unallowed_dots(basic_metadata):
    basic_metadata['where'] = 'this.that.com'
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata(basic_metadata)
Пример #29
0
 def list_from_metadata(cls, url, metadata):
     '''return a list of DatalakeRecords for the url and metadata'''
     metadata = Metadata(**metadata)
     time_buckets = cls.get_time_buckets_from_metadata(metadata)
     return [cls(url, metadata, t) for t in time_buckets]