def startDecompressMessage(self):
     if self._isServer:
         if self._decompressor is None or self.client_no_context_takeover:
             self._decompressor = snappy.StreamDecompressor()
     else:
         if self._decompressor is None or self.server_no_context_takeover:
             self._decompressor = snappy.StreamDecompressor()
Exemplo n.º 2
0
    def test_decompression(self):
        # test that we check for the initial stream identifier
        data = b"\x01" * 50
        self.assertRaises(snappy.UncompressError,
                snappy.StreamDecompressor().decompress,
                    b"\x01\x36\x00\00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data)
        self.assertEqual(
                snappy.StreamDecompressor().decompress(
                    b"\xff\x06\x00\x00sNaPpY"
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)
        decompressor = snappy.StreamDecompressor()
        decompressor.decompress(b"\xff\x06\x00\x00sNaPpY")
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)

        # test that we throw errors for unknown unskippable chunks
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress, b"\x03\x01\x00\x00")

        # test that we skip unknown skippable chunks
        self.assertEqual(b"",
                         decompressor.copy().decompress(b"\xfe\x01\x00\x00"))

        # test that we check CRCs
        compressed_data = snappy.compress(data)
        real_crc = struct.pack("<L", snappy._masked_crc32c(data))
        fake_crc = os.urandom(4)
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress,
                    b"\x00\x0a\x00\x00" + fake_crc + compressed_data)
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x00\x0a\x00\x00" + real_crc + compressed_data),
                data)

        # test that we buffer when we don't have enough
        uncompressed_data = os.urandom(100)
        compressor = snappy.StreamCompressor()
        compressed_data = (compressor.compress(uncompressed_data[:50]) +
                           compressor.compress(uncompressed_data[50:]))
        for split1 in range(len(compressed_data) - 1):
            for split2 in range(split1, len(compressed_data)):
                decompressor = snappy.StreamDecompressor()
                self.assertEqual(
                    (decompressor.decompress(compressed_data[:split1]) +
                     decompressor.decompress(compressed_data[split1:split2]) +
                     decompressor.decompress(compressed_data[split2:])),
                    uncompressed_data)
Exemplo n.º 3
0
    async def get_object(self, session: aiohttp.ClientSession) -> None:
        """Method to get the object from S3 after the pre-signed URL has been obtained

        Args:
            session: The current aiohttp session

        Returns:
            None
        """
        try:
            decompressor = snappy.StreamDecompressor()
            async with session.get(self.presigned_s3_url) as response:
                if response.status != 200:
                    # An error occurred
                    body = await response.text()
                    raise IOError(
                        f"Failed to get {self.object_details.dataset_path} to storage backend."
                        f" Status: {response.status}. Response: {body}")

                async with aiofiles.open(self.object_details.object_path,
                                         'wb') as fd:
                    while True:
                        chunk = await response.content.read(
                            self.download_chunk_size)
                        if not chunk:
                            fd.write(decompressor.flush())
                            break
                        await fd.write(decompressor.decompress(chunk))
        except Exception as err:
            logger.exception(err)
            raise IOError(
                f"Failed to get {self.object_details.dataset_path} to storage backend. {err}"
            )
Exemplo n.º 4
0
 def _create_decompressor(self, alg):
     if alg == "snappy":
         return snappy.StreamDecompressor()
     elif alg == "lzma":
         return lzma.LZMADecompressor()
     raise InvalidConfigurationError(
         "invalid compression algorithm: {!r}".format(alg))
Exemplo n.º 5
0
    async def get_object(self, session: aiohttp.ClientSession, progress_update_fn: Callable) -> None:
        """Method to get the object from S3 after the pre-signed URL has been obtained

        Args:
            session: The current aiohttp session
            progress_update_fn: A callable with arg "completed_bytes" (int) indicating how many bytes have been
                                downloaded in since last called

        Returns:
            None
        """
        try:
            decompressor = snappy.StreamDecompressor()
            async with session.get(self.presigned_s3_url) as response:
                if response.status != 200:
                    # An error occurred
                    body = await response.text()
                    raise IOError(f"Failed to get {self.object_details.dataset_path} to storage backend."
                                  f" Status: {response.status}. Response: {body}")

                async with aiofiles.open(self.object_details.object_path, 'wb') as fd:
                    while True:
                        chunk = await response.content.read(self.download_chunk_size)
                        if not chunk:
                            fd.write(decompressor.flush())
                            break

                        decompressed_chunk = decompressor.decompress(chunk)
                        await fd.write(decompressed_chunk)
                        progress_update_fn(completed_bytes=len(decompressed_chunk))
        except Exception as err:
            logger.exception(err)
            raise IOError(f"Failed to get {self.object_details.dataset_path} from storage backend. {err}")
Exemplo n.º 6
0
 def test_concatenation(self):
     data1 = os.urandom(snappy.snappy._CHUNK_MAX * 2)
     data2 = os.urandom(4096)
     decompressor = snappy.StreamDecompressor()
     self.assertEqual(
         decompressor.decompress(snappy.StreamCompressor().compress(data1) +
                                 snappy.StreamCompressor().compress(data2)),
         data1 + data2)
Exemplo n.º 7
0
    def __init__(self, buffer=None):
        if not snappy:
            raise RuntimeError('python-snappy required for compression')

        self._decompressor = snappy.StreamDecompressor()
        self._compressor = snappy.StreamCompressor()

        super().__init__(buffer)
Exemplo n.º 8
0
 def __init__(self, input):
     super(SnappyInputStream, self).__init__(input)
     try:
         import snappy
     except ImportError:
         raise errors.DependencyNotInstalledError(
             "python-snappy library is required for snappy support")
     self._decompressor = snappy.StreamDecompressor()
Exemplo n.º 9
0
    def __init__(self, infile, mode, **kwargs):
        import snappy

        self.details = {"size": 999999999}  # not true, but OK if we don't seek
        super().__init__(fs=None, path="snappy", mode=mode.strip("b") + "b", **kwargs)
        self.infile = infile
        if "r" in mode:
            self.codec = snappy.StreamDecompressor()
        else:
            self.codec = snappy.StreamCompressor()
Exemplo n.º 10
0
 def __init__(self, input):
     try:
         import snappy
     except ImportError:
         raise errors.DependencyNotInstalledError(
             "python-snappy library is required for snappy support")
     self._decompressor = snappy.StreamDecompressor()
     self._input = input
     self._internal_buffer = compat.BytesIO()
     self._cursor = 0
Exemplo n.º 11
0
    def __init__(self, infile, mode, **kwargs):
        import snappy

        super().__init__(
            fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
        )
        self.infile = infile
        if "r" in mode:
            self.codec = snappy.StreamDecompressor()
        else:
            self.codec = snappy.StreamCompressor()
Exemplo n.º 12
0
 def __init__(self, infile, mode, **kwargs):
     import snappy
     self.details = {'size': 999999999}  # not true, but OK if we don't seek
     super().__init__(fs=None,
                      path='snappy',
                      mode=mode.strip('b') + 'b',
                      **kwargs)
     self.infile = infile
     if 'r' in mode:
         self.codec = snappy.StreamDecompressor()
     else:
         self.codec = snappy.StreamCompressor()
Exemplo n.º 13
0
 def decompressor(self, algorithm):
     if algorithm is None:
         return None
     if algorithm == "lzma":
         return lzma.LZMADecompressor()
     elif algorithm == "snappy":
         if not snappy:
             raise MissingLibraryError(
                 "python-snappy is required when using snappy compression")
         return snappy.StreamDecompressor()
     else:
         raise InvalidConfigurationError(
             "invalid compression algorithm: {!r}".format(algorithm))
Exemplo n.º 14
0
 def codec(self, value):
     self._codec = value if six.PY3 else ord(value)
     if self.codec == 0:
         pass
     elif self.codec == 1:
         self._decompressor = \
             zlib.decompressobj(16 + zlib.MAX_WBITS)
     elif self.codec == 2:
         self._decompressor = snappy.StreamDecompressor()
     elif self.codec == 3:
         self._decompressor = lzma.LZMADecompressor()
     else:
         raise ValueError("Unknown compression type")
Exemplo n.º 15
0
def restore(script):
    # Build a list of metrics to restore from our object store and globbing
    metrics = search(script)

    # For each metric, find the date we want
    for i in metrics.keys():
        objs = metrics[i]
        d = findBackup(script, objs, script.options.date)
        logger.info("Restoring %s from timestamp %s" % (i, d))

        blobgz  = script.store.get("%s%s/%s.wsp.%s" \
                % (script.options.storage_path, i, d, script.options.algorithm))
        blobSHA = script.store.get("%s%s/%s.sha1" \
                % (script.options.storage_path, i, d))

        if blobgz is None:
            logger.warning("Skipping missing file in object store: %s/%s.wsp.%s" \
                    % (i, d, script.options.algorithm))
            continue

        # Decompress
        blobgz = StringIO(blobgz)
        blob = None
        if script.options.algorithm == "gz":
            fd = gzip.GzipFile(fileobj=blobgz, mode="rb")
            blob = fd.read()
            fd.close()
        elif script.options.algorithm == "sz":
            compressor = snappy.StreamDecompressor()
            blob = compressor.decompress(blobgz.getvalue())
            try:
                compressor.flush()
            except UncompressError as e:
                logger.error("Corrupt file in store: %s%s/%s.wsp.sz  Error %s" \
                        % (script.options.storage_path, i, d, str(e)))
                continue

        # Verify
        if blobSHA is None:
            logger.warning("Missing SHA1 checksum file...no verification")
        else:
            if hashlib.sha1(blob).hexdigest() != blobSHA:
                logger.warning("Backup does NOT verify, skipping metric %s" \
                               % i)
                continue

        heal(script, i, blob)

        # Clean up
        del blob
        blobgz.close()
Exemplo n.º 16
0
 def __init__(self,
              file_name_or_obj,
              file_mode,
              buffer_size=snappy._CHUNK_MAX):
     if isinstance(file_name_or_obj, str):
         self._file = open(file_name_or_obj, file_mode)
     else:
         self._file = file_name_or_obj
     self.buffer_pos = 0
     if file_mode == "wb":
         self.buffer = bytearray(buffer_size)
         self._compressor = snappy.StreamCompressor()
     else:
         self.buffer = None
         self._decompressor = snappy.StreamDecompressor()
Exemplo n.º 17
0
    def __init__(self, next_fp, mode):
        if snappy is None:
            raise io.UnsupportedOperation("Snappy is not available")

        if mode == "rb":
            self.decr = snappy.StreamDecompressor()
            self.encr = None
        elif mode == "wb":
            self.decr = None
            self.encr = snappy.StreamCompressor()
        else:
            raise io.UnsupportedOperation("unsupported mode for SnappyFile")

        super().__init__(next_fp)
        self.decr_done = False
Exemplo n.º 18
0
 def test_retrieve_snappy_file_obj(self, tmpdir):
     """
     Test the retrieve_file_obj method with a snappy file
     """
     # Setup the WAL
     source = tmpdir.join("wal_dir/000000080000ABFF000000C1")
     source.write("something".encode("utf-8"), ensure=True)
     # Create a simple CloudWalUploader obj
     uploader = CloudWalUploader(mock.MagicMock(),
                                 "test-server",
                                 compression="snappy")
     open_file = uploader.retrieve_file_obj(source.strpath)
     # Check the in memory file received
     assert open_file
     # Decompress on the fly to check content
     assert snappy.StreamDecompressor().decompress(
         open_file.read()) == "something".encode("utf-8")
Exemplo n.º 19
0
def restore(script):
    fetchArchiveFromHdfs(script)
    # Build a list of metrics to restore from our object store and globbing
    metrics = search(script)

    # For each metric, find the date we want
    for i in metrics.keys():
        objs = metrics[i]
        logger.info("Restoring %s from timestamp %s" %
                    (i, script.options.date))

        blobgz  = script.store.get("%s/%s.%s" \
                % (script.options.storage_path, i, script.options.algorithm))

        if blobgz is None:
            logger.warning("Skipping missing file in object store: %s/%s.wsp.%s" \
                    % (i, d, script.options.algorithm))
            continue

        # Decompress
        blobgz = StringIO(blobgz)
        blob = None
        if script.options.algorithm == "gz":
            fd = gzip.GzipFile(fileobj=blobgz, mode="rb")
            blob = fd.read()
            fd.close()
        elif script.options.algorithm == "sz":
            compressor = snappy.StreamDecompressor()
            blob = compressor.decompress(blobgz.getvalue())
            try:
                compressor.flush()
            except UncompressError as e:
                logger.error("Corrupt file in store: %s%s/%s.wsp.sz  Error %s" \
                        % (script.options.storage_path, i, d, str(e)))
                continue

        heal(script, i, blob)

        # Clean up
        del blob
        blobgz.close()
Exemplo n.º 20
0
    def test_random(self):
        for _ in range(100):
            compressor = snappy.StreamCompressor()
            decompressor = snappy.StreamDecompressor()
            data = b""
            compressed = b""
            for _ in range(random.randint(0, 3)):
                chunk = os.urandom(random.randint(0, snappy.snappy._CHUNK_MAX * 2))
                data += chunk
                compressed += compressor.add_chunk(
                        chunk, compress=random.choice([True, False, None]))

            upper_bound = random.choice([256, snappy.snappy._CHUNK_MAX * 2])
            while compressed:
                size = random.randint(0, upper_bound)
                chunk, compressed = compressed[:size], compressed[size:]
                chunk = decompressor.decompress(chunk)
                self.assertEqual(data[:len(chunk)], chunk)
                data = data[len(chunk):]

            decompressor.flush()
            self.assertEqual(len(data), 0)
Exemplo n.º 21
0
    def load_stream(self, stream):
        off = 0
        length = len(stream)
        chunkLength = 0x00000000
        streamd = snappy.StreamDecompressor()
        dec = b""
        while (off + 4 < length):
            piece = stream[off:off + 4]
            chunkLength = unpack('<BBBB', piece)
            if (chunkLength[0] & 0xFF != 0):
                print u"ooops!"
                return False

            chunkAsInt = unpack('<i', '\0' + piece[1:])[0] >> 8
            if (off + chunkAsInt > length):
                print u"Bad chunk (off: %d cai: %d len: %d)" % (
                    off, chunkAsInt, length)
                return False
            off += 4
            dec += snappy._uncompress(stream[off:off + chunkAsInt])
            off += chunkAsInt

        return dec
Exemplo n.º 22
0
def test_snappy_framed(benchmark, file, use_cramjam: bool):
    """
    Uses snappy compression framed
    """
    import snappy

    data = bytearray(file.read_bytes())
    if use_cramjam:
        benchmark(
            round_trip,
            compress=cramjam.snappy.compress,
            decompress=cramjam.snappy.decompress,
            data=data,
        )
    else:
        compressor = snappy.StreamCompressor()
        decompressor = snappy.StreamDecompressor()
        benchmark(
            round_trip,
            compress=compressor.compress,
            decompress=decompressor.decompress,
            data=data,
        )
Exemplo n.º 23
0
 def __init__(self, socket):
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     self._socket = socket
     self._bootstrapped = None
 def __init__(self, socket):
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     super(SnappySocket, self).__init__(socket)
Exemplo n.º 25
0
 def __init__(self, buffer=None):
     self._parser = Reader()
     self._decompressor = snappy.StreamDecompressor()
     self._compressor = snappy.StreamCompressor()
     buffer and self.feed(buffer)
Exemplo n.º 26
0
    def test_pull_objects_all(self, mock_dataset_with_manifest):
        ds, manifest, working_dir = mock_dataset_with_manifest
        iom = IOManager(ds, manifest)

        revision = manifest.dataset_revision
        os.makedirs(
            os.path.join(manifest.cache_mgr.cache_root, revision, "other_dir"))
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test1.txt", "test content 1")
        helper_append_file(manifest.cache_mgr.cache_root, revision,
                           "test2.txt", "test content 2")
        manifest.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        check_info = {obj1_target: obj1_source, obj2_target: obj2_source}

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True

        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)

        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # remove data from the local file cache
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test1.txt"))
        os.remove(
            os.path.join(manifest.cache_mgr.cache_root,
                         manifest.dataset_revision, "test2.txt"))
        shutil.rmtree(os.path.join(manifest.cache_mgr.cache_root, 'objects'))
        os.makedirs(os.path.join(manifest.cache_mgr.cache_root, 'objects'))

        with aioresponses() as mocked_responses:
            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_1,
                    "dataset": ds.name
                },
                status=200)

            with open(obj1_source, 'rb') as data1:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_1}?params=1",
                    body=data1.read(),
                    status=200,
                    content_type='application/octet-stream')

            mocked_responses.get(
                f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                payload={
                    "presigned_url":
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    "namespace": ds.namespace,
                    "obj_id": obj_id_2,
                    "dataset": ds.name
                },
                status=200)

            with open(obj2_source, 'rb') as data2:
                mocked_responses.get(
                    f"https://dummyurl.com/{obj_id_2}?params=1",
                    body=data2.read(),
                    status=200,
                    content_type='application/octet-stream')

            iom.dataset.backend.set_default_configuration(
                "test-user", "abcd", '1234')

            result = iom.pull_all()
            assert len(result.success) == 2
            assert len(result.failure) == 0
            assert result.success[0].object_path != result.success[
                1].object_path
            assert result.success[0].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]
            assert result.success[1].object_path in [
                obj_to_push[0].object_path, obj_to_push[1].object_path
            ]

            assert os.path.isfile(obj1_target) is True
            assert os.path.isfile(obj2_target) is True

            decompressor = snappy.StreamDecompressor()
            for r in result.success:
                with open(check_info[r.object_path], 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(r.object_path, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1
Exemplo n.º 27
0
    def test_pull_objects(self, mock_config_file, mock_dataset_head):
        im = InventoryManager(mock_config_file[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        os.makedirs(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         "other_dir"))
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test2.txt", "fdsfgfd")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 2
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        _, obj_id_2 = obj_to_push[1].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path
        obj2_target = obj_to_push[1].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)
        obj2_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        assert os.path.exists(obj2_target) is True
        helper_compress_file(obj1_target, obj1_source)
        helper_compress_file(obj2_target, obj2_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj2_target) is False
        assert os.path.isfile(obj1_source) is True
        assert os.path.isfile(obj2_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test2.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file[0]):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')

                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_2}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_2,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj2_source, 'rb') as data2:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_2}?params=1",
                        body=data2.read(),
                        status=200,
                        content_type='application/octet-stream')

                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test1.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is False

                decompressor = snappy.StreamDecompressor()
                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                # Download other file
                dl_kwargs = {
                    'logged_in_username': "******",
                    'access_token': "asdf",
                    'id_token': "1234",
                    'dataset_owner': "default",
                    'dataset_name': "dataset100",
                    'labbook_owner': None,
                    'labbook_name': None,
                    'keys': ["test2.txt"]
                }

                gtmcore.dispatcher.dataset_jobs.pull_objects(**dl_kwargs)

                # Manually link since this is disabled by default in the job (because in real use, multiple jobs run
                # in parallel and you only want to link once.
                m.link_revision()

                assert os.path.isfile(obj1_target) is True
                assert os.path.isfile(obj2_target) is True

                with open(obj1_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj1_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1

                with open(obj2_source, 'rb') as dd:
                    source1 = decompressor.decompress(dd.read())
                    source1 += decompressor.flush()
                with open(obj2_target, 'rt') as dd:
                    dest1 = dd.read()
                assert source1.decode("utf-8") == dest1
Exemplo n.º 28
0
    def test_pull_objects_fail_signing(self, mock_dataset_with_cache_dir, temp_directories):
        with aioresponses() as mocked_responses:
            sb = get_storage_backend("gigantum_object_v1")
            ds = mock_dataset_with_cache_dir[0]
            sb.set_default_configuration(ds.namespace, "abcd", '1234')

            object_dir, compressed_dir = temp_directories

            obj1_id = uuid.uuid4().hex
            obj2_id = uuid.uuid4().hex

            obj1_src_path = helper_write_object(object_dir, obj1_id, 'abcd')
            obj2_src_path = helper_write_object(object_dir, obj2_id, '1234')
            assert os.path.isfile(obj1_src_path) is True
            assert os.path.isfile(obj2_src_path) is True

            obj1_compressed_path = os.path.join(compressed_dir, obj1_id)
            obj2_compressed_path = os.path.join(compressed_dir, obj2_id)
            helper_compress_file(obj1_src_path, obj1_compressed_path)
            helper_compress_file(obj2_src_path, obj2_compressed_path)

            assert os.path.isfile(obj1_src_path) is False
            assert os.path.isfile(obj2_src_path) is False
            assert os.path.isfile(obj1_compressed_path) is True
            assert os.path.isfile(obj2_compressed_path) is True

            check_info = {obj1_src_path: obj1_compressed_path,
                          obj2_src_path: obj2_compressed_path}

            objects = [PullObject(object_path=obj1_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile1.txt'),
                       PullObject(object_path=obj2_src_path,
                                  revision=ds.git.repo.head.commit.hexsha,
                                  dataset_path='myfile2.txt')
                       ]

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj1_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj1_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj1_id,
                                         "dataset": ds.name
                                 },
                                 status=400)

            mocked_responses.get(f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj2_id}',
                                 payload={
                                         "presigned_url": f"https://dummyurl.com/{obj2_id}?params=1",
                                         "namespace": ds.namespace,
                                         "obj_id": obj2_id,
                                         "dataset": ds.name
                                 },
                                 status=200)

            with open(obj2_compressed_path, 'rb') as data2:
                mocked_responses.get(f"https://dummyurl.com/{obj2_id}?params=1",
                                     body=data2.read(), status=200,
                                     content_type='application/octet-stream')

            result = sb.pull_objects(ds, objects, updater)
            assert len(result.success) == 1
            assert len(result.failure) == 1
            assert isinstance(result, PullResult) is True
            assert isinstance(result.success[0], PullObject) is True
            assert result.success[0].object_path == obj2_src_path
            assert result.failure[0].object_path == obj1_src_path

            assert os.path.isfile(result.success[0].object_path) is True
            assert os.path.isfile(result.failure[0].object_path) is False

            decompressor = snappy.StreamDecompressor()
            with open(check_info[result.success[0].object_path], 'rb') as dd:
                source1 = decompressor.decompress(dd.read())
                source1 += decompressor.flush()
            with open(result.success[0].object_path, 'rt') as dd:
                dest1 = dd.read()
            assert source1.decode("utf-8") == dest1
Exemplo n.º 29
0
    def test_download_dataset_files(self, mock_config_file_background_tests,
                                    mock_dataset_head):
        def dispatch_query_mock(self, job_key):
            JobStatus = namedtuple("JobStatus", ['status', 'meta'])
            return JobStatus(status='finished',
                             meta={'completed_bytes': '500'})

        def dispatch_mock(self, method_reference, kwargs, metadata, persist):
            with aioresponses() as mocked_responses:
                mocked_responses.get(
                    f'https://api.gigantum.com/object-v1/{ds.namespace}/{ds.name}/{obj_id_1}',
                    payload={
                        "presigned_url":
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        "namespace": ds.namespace,
                        "obj_id": obj_id_1,
                        "dataset": ds.name
                    },
                    status=200)

                with open(obj1_source, 'rb') as data1:
                    mocked_responses.get(
                        f"https://dummyurl.com/{obj_id_1}?params=1",
                        body=data1.read(),
                        status=200,
                        content_type='application/octet-stream')
                gtmcore.dispatcher.dataset_jobs.pull_objects(**kwargs)

                return "afakejobkey"

        im = InventoryManager(mock_config_file_background_tests[0])
        ds = im.create_dataset('default',
                               'default',
                               "dataset100",
                               storage_type="gigantum_object_v1",
                               description="100")
        m = Manifest(ds, 'default')
        iom = IOManager(ds, m)

        helper_append_file(m.cache_mgr.cache_root, m.dataset_revision,
                           "test1.txt", "asdfadfsdf")
        m.sweep_all_changes()

        obj_to_push = iom.objects_to_push()
        assert len(obj_to_push) == 1
        _, obj_id_1 = obj_to_push[0].object_path.rsplit('/', 1)
        obj1_target = obj_to_push[0].object_path

        obj1_source = os.path.join('/tmp', uuid.uuid4().hex)

        assert os.path.exists(obj1_target) is True
        helper_compress_file(obj1_target, obj1_source)
        assert os.path.isfile(obj1_target) is False
        assert os.path.isfile(obj1_source) is True

        # Clear out from linked dir
        os.remove(
            os.path.join(m.cache_mgr.cache_root, m.dataset_revision,
                         'test1.txt'))

        with patch.object(Configuration, 'find_default_config',
                          lambda self: mock_config_file_background_tests[0]):
            with patch.object(Dispatcher, 'dispatch_task', dispatch_mock):
                with patch.object(Dispatcher, 'query_task',
                                  dispatch_query_mock):
                    dl_kwargs = {
                        'logged_in_username': "******",
                        'access_token': "asdf",
                        'id_token': "1234",
                        'dataset_owner': "default",
                        'dataset_name': "dataset100",
                        'labbook_owner': None,
                        'labbook_name': None,
                        'keys': ["test1.txt"],
                        'config_file': mock_config_file_background_tests[0]
                    }

                    gtmcore.dispatcher.dataset_jobs.download_dataset_files(
                        **dl_kwargs)
                    assert os.path.isfile(obj1_target) is True

                    decompressor = snappy.StreamDecompressor()
                    with open(obj1_source, 'rb') as dd:
                        source1 = decompressor.decompress(dd.read())
                        source1 += decompressor.flush()
                    with open(obj1_target, 'rt') as dd:
                        dest1 = dd.read()
                    assert source1.decode("utf-8") == dest1
Exemplo n.º 30
0
 def __init__(self, fp):
     self._comp = snappy.StreamDecompressor()
     self._fp = fp
     self._done = False
     self._pos = 0