Exemplo n.º 1
0
    def enqueue(self, filename, compress=False, **metadata_fields):
        '''enqueue a file with the specified metadata to be pushed

        Args:
            filename: the file to enqueue

            compress: whether or not to compress the file before enqueueing

        Returns the File with complete metadata that will be pushed.

        '''
        log.info('Enqueing ' + filename)
        if compress:
            try:
                f = File.from_filename_compressed(filename, **metadata_fields)
            except OverflowError:
                log.warning('Compression failed. Falling back to uncompressed '
                            'uploads')
                f = File.from_filename(filename, **metadata_fields)
        else:
            f = File.from_filename(filename, **metadata_fields)
        fname = f.metadata['id'] + '.tar'
        dest = os.path.join(self.queue_dir, fname)
        f.to_bundle(dest)
        return f
Exemplo n.º 2
0
    def enqueue(self, filename, compress=False, **metadata_fields):
        '''enqueue a file with the specified metadata to be pushed

        Args:
            filename: the file to enqueue

            compress: whether or not to compress the file before enqueueing

        Returns the File with complete metadata that will be pushed.

        '''
        log.info('Enqueing ' + filename)
        if compress:
            try:
                f = File.from_filename_compressed(filename, **metadata_fields)
            except OverflowError:
                log.warning('Compression failed. Falling back to uncompressed '
                            'uploads')
                f = File.from_filename(filename, **metadata_fields)
        else:
            f = File.from_filename(filename, **metadata_fields)
        fname = f.metadata['id'] + '.tar'
        dest = os.path.join(self.queue_dir, fname)
        f.to_bundle(dest)
        return f
Exemplo n.º 3
0
def test_bundle_with_invalid_metadata(bundle_maker, random_metadata):
    del (random_metadata['what'])
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(content='1234'.encode('utf-8'),
                     metadata=m,
                     version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeMetadata):
        File.from_bundle(b)
Exemplo n.º 4
0
def test_bundle_with_invalid_metadata(bundle_maker, random_metadata):
    del(random_metadata['what'])
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(content='1234'.encode('utf-8'),
                     metadata=m,
                     version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeMetadata):
        File.from_bundle(b)
Exemplo n.º 5
0
def random_file(tmpdir, metadata=None):
    name = random_word(10)
    content = random_word(256)
    f = tmpdir.join(name)
    f.write(content)
    if metadata is None:
        metadata = random_metadata()
    return File.from_filename(f.strpath, **metadata)
Exemplo n.º 6
0
def random_file(tmpdir, metadata=None):
    name = random_word(10)
    content = random_word(256)
    f = tmpdir.join(name)
    f.write(content)
    if metadata is None:
        metadata = generate_random_metadata()
    return File.from_filename(f.strpath, **metadata)
Exemplo n.º 7
0
 def _fetch_s3_url(self, url, stream=False):
     k = self._get_key_from_url(url)
     m = self._get_metadata_from_key(k)
     if stream:
         return StreamingFile(k, **m)
     fd = BytesIO()
     k.get_contents_to_file(fd)
     fd.seek(0)
     return File(fd, **m)
Exemplo n.º 8
0
 def _fetch_http_url(self, url, stream=False):
     m = self._get_metadata_from_http_url(url)
     k = self._stream_http_url(url)
     if stream:
         return StreamingHTTPFile(k, **m)
     fd = BytesIO()
     for block in k.iter_content(1024):
         fd.write(block)
     fd.seek(0)
     return File(fd, **m)
Exemplo n.º 9
0
def test_valid_bundle(tmpdir, random_metadata):
    p = os.path.join(str(tmpdir), 'foo.tar')
    f1 = random_file(tmpdir, metadata=random_metadata)
    f1.to_bundle(p)
    f2 = File.from_bundle(p)
    assert f1.metadata == f2.metadata
    content1 = f1.read()
    content2 = f2.read()
    assert content1
    assert content1 == content2
Exemplo n.º 10
0
def test_valid_bundle(tmpdir, random_metadata):
    p = os.path.join(str(tmpdir), 'foo.tar')
    f1 = random_file(tmpdir, metadata=random_metadata)
    f1.to_bundle(p)
    f2 = File.from_bundle(p)
    assert f1.metadata == f2.metadata
    content1 = f1.read()
    content2 = f2.read()
    assert content1
    assert content1 == content2
Exemplo n.º 11
0
    def enqueue(self, filename, compress=False, **metadata_fields):
        '''enqueue a file with the specified metadata to be pushed

        Args:
            filename: the file to enqueue

            compress: whether or not to compress the file before enqueueing

        Returns the File with complete metadata that will be pushed.

        '''
        log.info('Enqueing ' + filename)
        if compress:
            f = File.from_filename_compressed(filename, **metadata_fields)
        else:
            f = File.from_filename(filename, **metadata_fields)
        fname = f.metadata['id'] + '.tar'
        dest = os.path.join(self.queue_dir, fname)
        f.to_bundle(dest)
        return f
Exemplo n.º 12
0
 def _synchronous_push(self, filename):
     try:
         f = File.from_bundle(filename)
     except InvalidDatalakeBundle as e:
         msg = '{}. Skipping upload.'.format(e.args[0])
         log.exception(msg)
         return
     url = self._archive.push(f)
     msg = 'Pushed {}({}) to {}'.format(filename, f.metadata['path'], url)
     log.info(msg)
     os.unlink(filename)
     if self._callback is not None:
         self._callback(filename)
Exemplo n.º 13
0
 def _synchronous_push(self, filename):
     try:
         f = File.from_bundle(filename)
     except InvalidDatalakeBundle as e:
         msg = '{}. Skipping upload.'.format(e.args[0])
         log.exception(msg)
         return
     url = self._archive.push(f)
     msg = 'Pushed {}({}) to {}'.format(filename, f.metadata['path'], url)
     log.info(msg)
     os.unlink(filename)
     if self._callback is not None:
         self._callback(filename)
Exemplo n.º 14
0
    def prepare_metadata_and_push(self, filename, **metadata_fields):
        '''push a file to the archive with the specified metadata

        Args:
            filename: path of the file to push

            metadata_fields: metadata fields for file. Missing fields will be
            added if they can be determined. Othwerise, InvalidDatalakeMetadata
            will be raised.

        returns the url to which the file was pushed.
        '''
        f = File.from_filename(filename, **metadata_fields)
        return self.push(f)
Exemplo n.º 15
0
def test_pre_python_3_bundle():
    # prior to python 3 support, we relied on python to choose the most
    # suitable encoding for files. Now we do it explicitly. Make sure legacy
    # bundles work.
    eyedee = '7c72f3ab092445a08aa6983c864c087c'
    expected_content = b'Wake up.\nEat. Mmm.\nHappy hour.\nSleep.\n'
    expected_metadata = {
        'end': 1474308636507,
        'hash': '70373dec2de49d566fc1e34bacca7561',
        'id': eyedee,
        'path': '/home/brian/src/datalake/chicken.log',
        'start': 1474308548000,
        'version': 0,
        'what': 'chicken',
        'where': 'nomad',
        'work_id': None
    }
    b = os.path.join(legacy_bundles, eyedee + '.tar')
    f = File.from_bundle(b)
    assert f.metadata == expected_metadata
    assert f.read() == expected_content
Exemplo n.º 16
0
def test_pre_python_3_bundle():
    # prior to python 3 support, we relied on python to choose the most
    # suitable encoding for files. Now we do it explicitly. Make sure legacy
    # bundles work.
    eyedee = '7c72f3ab092445a08aa6983c864c087c'
    expected_content = b'Wake up.\nEat. Mmm.\nHappy hour.\nSleep.\n'
    expected_metadata = {
        'end': 1474308636507,
        'hash': '70373dec2de49d566fc1e34bacca7561',
        'id': eyedee,
        'path': '/home/brian/src/datalake/chicken.log',
        'start': 1474308548000,
        'version': 0,
        'what': 'chicken',
        'where': 'nomad',
        'work_id': None
    }
    b = os.path.join(legacy_bundles, eyedee + '.tar')
    f = File.from_bundle(b)
    assert f.metadata == expected_metadata
    assert f.read() == expected_content
Exemplo n.º 17
0
def test_bundle_not_tar(tmpfile):
    f = tmpfile('foobar')
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(f)
Exemplo n.º 18
0
def test_bundle_without_content(bundle_maker, random_metadata):
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(metadata=m, version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)
Exemplo n.º 19
0
def test_non_existent_file():
    with pytest.raises(IOError):
        File.from_filename('surelythisfiledoesnotexist.txt')
Exemplo n.º 20
0
def test_bundle_with_non_json_metadata(bundle_maker):
    b = bundle_maker(content='1234'.encode('utf-8'),
                     metadata='not:a%json#'.encode('utf-8'),
                     version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)
Exemplo n.º 21
0
def test_bundle_with_non_json_metadata(bundle_maker):
    b = bundle_maker(content='1234'.encode('utf-8'),
                     metadata='not:a%json#'.encode('utf-8'),
                     version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)
Exemplo n.º 22
0
def test_bundle_without_content(bundle_maker, random_metadata):
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(metadata=m, version='0'.encode('utf-8'))
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)
Exemplo n.º 23
0
def test_bundle_without_version(bundle_maker, random_metadata):
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(content='1234'.encode('utf-8'), metadata=m)
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)
Exemplo n.º 24
0
def test_non_existent_file():
    with pytest.raises(IOError):
        File.from_filename('surelythisfiledoesnotexist.txt')
Exemplo n.º 25
0
def test_bundle_not_tar(tmpfile):
    f = tmpfile('foobar')
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(f)
Exemplo n.º 26
0
def test_bundle_without_version(bundle_maker, random_metadata):
    m = json.dumps(random_metadata).encode('utf-8')
    b = bundle_maker(content='1234'.encode('utf-8'), metadata=m)
    with pytest.raises(InvalidDatalakeBundle):
        File.from_bundle(b)