Exemplo n.º 1
0
class TestAccountClient(BaseTestCase):
    def setUp(self):
        super(TestAccountClient, self).setUp()
        self.account_id = "test_account_%f" % time.time()

        self.account_client = AccountClient(self.conf)
        self.container_client = ContainerClient(self.conf)

        self.account_client.account_create(self.account_id)
        self.container_client.container_create(acct=self.account_id, ref="container1")
        self.container_client.container_create(acct=self.account_id, ref="container2")
        time.sleep(0.5)  # ensure container event have been processed

    def test_containers_list(self):
        resp = self.account_client.containers_list(self.account_id)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id, limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container1", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container2", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [])
Exemplo n.º 2
0
class TestFilters(BaseTestCase):

    def setUp(self):
        with mock.patch('oio.container.client.gen_headers',
                        gen_headers_mock):
            super(TestFilters, self).setUp()
            self.account = self.conf['account']
            self.namespace = self.conf['namespace']
            self.chunk_size = self.conf['chunk_size']
            self.gridconf = {'namespace': self.namespace}
            self.content_factory = ContentFactory(self.gridconf)
            self.container_name = 'TestFilter%f' % time.time()
            self.blob_client = BlobClient()
            self.container_client = ContainerClient(self.gridconf)
            self.container_client.container_create(acct=self.account,
                                                   ref=self.container_name)
            self.container_id = cid_from_name(self.account,
                                              self.container_name).upper()
            self.stgpol = "SINGLE"

    def _new_content(self, data, path):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), self.stgpol)
        old_content.create(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
            self.assertTrue(None)
        except ClientException as e:
            print str(e)
            self.assertTrue(str(e).find('NS slave!') != -1)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content = self._new_content(data, path)
            content.delete()

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)
        try:
            content.delete()
            self.assertTrue(None)
        except ClientException as e:
            self.assertTrue(str(e).find('NS wormed!') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content.delete()
Exemplo n.º 3
0
class TestFilters(BaseTestCase):
    def setUp(self):
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            super(TestFilters, self).setUp()
            self.account = self.conf['account']
            self.namespace = self.conf['namespace']
            self.chunk_size = self.conf['chunk_size']
            self.gridconf = {'namespace': self.namespace}
            self.content_factory = ContentFactory(self.gridconf)
            self.container_name = 'TestFilter%f' % time.time()
            self.blob_client = BlobClient()
            self.container_client = ContainerClient(self.gridconf)
            self.container_client.container_create(acct=self.account,
                                                   ref=self.container_name)
            self.container_id = cid_from_name(self.account,
                                              self.container_name).upper()
            self.stgpol = "SINGLE"

    def _new_content(self, data, path):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), self.stgpol)
        old_content.create(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
            self.assertTrue(None)
        except ClientException as e:
            print str(e)
            self.assertTrue(str(e).find('NS slave!') != -1)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content = self._new_content(data, path)
            content.delete()

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)
        try:
            content.delete()
            self.assertTrue(None)
        except ClientException as e:
            self.assertTrue(str(e).find('NS wormed!') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data)
        with mock.patch('oio.container.client.gen_headers', gen_headers_mock):
            content.delete()
Exemplo n.º 4
0
class TestAccountClient(BaseTestCase):
    def setUp(self):
        super(TestAccountClient, self).setUp()
        self.account_id = "test_account_%f" % time.time()

        self.account_client = AccountClient(self.conf)
        self.container_client = ContainerClient(self.conf)

        retry = 3
        for i in range(retry+1):
            try:
                self.account_client.account_create(self.account_id)
                break
            except ClientException:
                if i < retry:
                    time.sleep(2)
                else:
                    raise
        self.container_client.container_create(acct=self.account_id,
                                               ref="container1")
        self.container_client.container_create(acct=self.account_id,
                                               ref="container2")
        time.sleep(.5)  # ensure container event have been processed

    def test_containers_list(self):
        resp = self.account_client.containers_list(self.account_id)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [
            ["container1", 0, 0, 0],
            ["container2", 0, 0, 0]
        ])

        resp = self.account_client.containers_list(self.account_id, limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [
            ["container1", 0, 0, 0]
        ])

        resp = self.account_client.containers_list(self.account_id,
                                                   marker="container1",
                                                   limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [
            ["container2", 0, 0, 0]
        ])

        resp = self.account_client.containers_list(self.account_id,
                                                   marker="container2",
                                                   limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [])
Exemplo n.º 5
0
class TestAccountClient(BaseTestCase):
    def setUp(self):
        super(TestAccountClient, self).setUp()
        self.account_id = "test_account_%f" % time.time()

        self.account_client = AccountClient(self.conf)
        self.container_client = ContainerClient(self.conf)

        retry = 3
        for i in range(retry + 1):
            try:
                self.account_client.account_create(self.account_id)
                break
            except ClientException:
                if i < retry:
                    time.sleep(2)
                else:
                    raise
        self.container_client.container_create(account=self.account_id,
                                               reference="container1")
        self.container_client.container_create(account=self.account_id,
                                               reference="container2")
        time.sleep(.5)  # ensure container event have been processed

    def test_container_list(self):
        resp = self.account_client.container_list(self.account_id)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"],
                         [["container1", 0, 0, 0], ["container2", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id, limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container1", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id,
                                                  marker="container1",
                                                  limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container2", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id,
                                                  marker="container2",
                                                  limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [])
Exemplo n.º 6
0
class TestAccountClient(BaseTestCase):
    def setUp(self):
        super(TestAccountClient, self).setUp()
        self.account_id = "test_account_%f" % time.time()

        self.account_client = AccountClient(self.conf)
        self.container_client = ContainerClient(self.conf)

        self.account_client.account_create(self.account_id)
        self.container_client.container_create(acct=self.account_id,
                                               ref="container1")
        self.container_client.container_create(acct=self.account_id,
                                               ref="container2")
        time.sleep(.5)  # ensure container event have been processed

    def test_containers_list(self):
        resp = self.account_client.containers_list(self.account_id)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"],
                         [["container1", 0, 0, 0], ["container2", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id, limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container1", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id,
                                                   marker="container1",
                                                   limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container2", 0, 0, 0]])

        resp = self.account_client.containers_list(self.account_id,
                                                   marker="container2",
                                                   limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [])
Exemplo n.º 7
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "EC"
        self.size = 1024*1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        l = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in l]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(StringIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(
            UnrecoverableContent, self._test_rebuild, DAT_LEGIT_SIZE,
            self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(StringIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = "".join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(
                NotFound, self.blob_client.chunk_delete, chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(
            OioException, self._test_fetch, DAT_LEGIT_SIZE, broken_chunks)
Exemplo n.º 8
0
class TestStorageTierer(BaseTestCase):
    def setUp(self):
        super(TestStorageTierer, self).setUp()
        self.namespace = self.conf['namespace']
        self.test_account = "test_storage_tiering_%f" % time.time()
        self.gridconf = {
            "namespace": self.namespace,
            "container_fetch_limit": 2,
            "content_fetch_limit": 2,
            "account": self.test_account,
            "outdated_threshold": 0,
            "new_policy": "RAIN"
        }
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self._populate()

    def _populate(self):
        self.container_0_name = "container_empty"
        self.container_0_id = cid_from_name(self.test_account,
                                            self.container_0_name)
        self.container_client.container_create(acct=self.test_account,
                                               ref=self.container_0_name)

        self.container_1_name = "container_with_1_content"
        self.container_1_id = cid_from_name(self.test_account,
                                            self.container_1_name)
        self.container_client.container_create(acct=self.test_account,
                                               ref=self.container_1_name)
        self.container_1_content_0_name = "container_1_content_0"
        self.container_1_content_0 = self._new_content(
            self.container_1_id, self.container_1_content_0_name, "SINGLE")

        self.container_2_name = "container_with_2_contents"
        self.container_2_id = cid_from_name(self.test_account,
                                            self.container_2_name)
        self.container_client.container_create(acct=self.test_account,
                                               ref=self.container_2_name)
        self.container_2_content_0_name = "container_2_content_0"
        self.container_2_content_0 = self._new_content(
            self.container_2_id, self.container_2_content_0_name, "SINGLE")
        self.container_2_content_1_name = "container_2_content_1"
        self.container_2_content_1 = self._new_content(
            self.container_2_id, self.container_2_content_1_name, "TWOCOPIES")

    def _new_content(self, container_id, content_name, stgpol):
        data = random_data(10)
        content = self.content_factory.new(container_id, content_name,
                                           len(data), stgpol)

        content.upload(StringIO.StringIO(data))
        return content

    def tearDown(self):
        super(TestStorageTierer, self).tearDown()

    def test_iter_container_list(self):
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_containers()
        self.assertEqual(gen.next(), self.container_0_name)
        self.assertEqual(gen.next(), self.container_1_name)
        self.assertEqual(gen.next(), self.container_2_name)
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_0(self):
        self.gridconf["outdated_threshold"] = 0
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_9999999999(self):
        self.gridconf["outdated_threshold"] = 9999999999
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_2(self):
        # add a new content created after the three previous contents
        now = int(time.time())
        time.sleep(2)
        self._new_content(self.container_2_id, "titi", "TWOCOPIES")

        self.gridconf["outdated_threshold"] = 2
        worker = StorageTiererWorker(self.gridconf, Mock())
        with mock.patch('oio.crawler.storage_tierer.time.time',
                        mock.MagicMock(return_value=now + 2)):
            gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_skip_good_policy(self):
        self.gridconf["new_policy"] = "SINGLE"
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)
Exemplo n.º 9
0
class TestECContent(BaseTestCase):
    def setUp(self):
        super(TestECContent, self).setUp()

        if len(self.conf['services']['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestECContent%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = "%s-%s" % (self.__class__.__name__, random_str(4))
        self.stgpol = "EC"
        self.size = 1024 * 1024 + 320
        self.k = 6
        self.m = 3

    def tearDown(self):
        super(TestECContent, self).tearDown()

    def random_chunks(self, nb):
        pos = random.sample(xrange(self.k + self.m), nb)
        return ["0.%s" % i for i in pos]

    def _test_create(self, data_size):
        # generate random test data
        data = random_data(data_size)
        # using factory create new EC content
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), self.stgpol)
        # verify the factory gave us an ECContent
        self.assertEqual(type(content), ECContent)

        # perform the content creation
        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        # verify metadata
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], self.stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size)) \
            if len(data) != 0 else 1

        offset = 0
        # verify each metachunk
        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            if len(chunks_at_pos) < 1:
                break
            metachunk_size = chunks_at_pos[0].size
            metachunk_hash = md5_data(data[offset:offset + metachunk_size])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(meta['metachunk_size'], str(chunk.size))
                self.assertEqual(meta['metachunk_hash'], chunk.checksum)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], md5_stream(stream))
                full_path = encode_fullpath(self.account, self.container_name,
                                            self.content,
                                            meta['content_version'],
                                            meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')
                self.assertEqual(metachunk_hash, chunk.checksum)

            offset += metachunk_size

    def test_create_0_byte(self):
        self._test_create(0)

    def test_create_1_byte(self):
        self._test_create(1)

    def test_create(self):
        self._test_create(DAT_LEGIT_SIZE)

    def test_create_6294503_bytes(self):
        self._test_create(6294503)

    def _test_rebuild(self, data_size, broken_pos_list):
        # generate test data
        data = os.urandom(data_size)
        # create initial content
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        # verify factory work as intended
        self.assertEqual(type(old_content), ECContent)

        # perform initial content creation
        old_content.create(BytesIO(data))

        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        # break the content
        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.checksum
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

            # rebuild the broken chunks
            uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        # sanity check
        self.assertEqual(type(rebuilt_content), ECContent)

        # verify rebuild result
        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.checksum, old_info[pos]["hash"])
            self.assertNotEqual(c.url, old_info[pos]["url"])
            self.assertGreaterEqual(rebuilt_meta['chunk_mtime'],
                                    old_info[pos]['dl_meta']['chunk_mtime'])
            del old_info[pos]["dl_meta"]["chunk_mtime"]
            del rebuilt_meta["chunk_mtime"]
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild(self):
        self._test_rebuild(0, self.random_chunks(1))

    def test_content_0_byte_rebuild_advanced(self):
        self._test_rebuild(0, self.random_chunks(3))

    def test_content_1_byte_rebuild(self):
        self._test_rebuild(1, self.random_chunks(1))

    def test_content_1_byte_rebuild_advanced(self):
        self._test_rebuild(1, self.random_chunks(3))

    def test_content_rebuild(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(1))

    def test_content_rebuild_advanced(self):
        self._test_rebuild(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_content_rebuild_unrecoverable(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          DAT_LEGIT_SIZE, self.random_chunks(4))

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, self.content,
                                               len(data), self.stgpol)
        self.assertEqual(type(old_content), ECContent)

        old_content.create(BytesIO(data))

        # break content
        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))
        self.assertRaises(OrphanChunk, content.rebuild_chunk, "invalid")

    def _test_fetch(self, data_size, broken_pos_list=None):
        broken_pos_list = broken_pos_list or []
        test_data = random_data(data_size)
        content = self._new_content(test_data, broken_pos_list)

        data = b''.join(content.fetch())

        self.assertEqual(len(data), len(test_data))
        self.assertEqual(md5_data(data), md5_data(test_data))

        # verify that chunks are broken
        for pos in broken_pos_list:
            chunk = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete,
                              chunk.url)

    def test_fetch_content_0_byte(self):
        self._test_fetch(0)

    def test_fetch_content_1_byte(self):
        self._test_fetch(1)

    def test_fetch_content(self):
        self._test_fetch(DAT_LEGIT_SIZE)

    def test_fetch_content_0_byte_broken(self):
        self._test_fetch(0, self.random_chunks(3))

    def test_fetch_content_1_byte_broken(self):
        self._test_fetch(1, self.random_chunks(3))

    def test_fetch_content_broken(self):
        self._test_fetch(DAT_LEGIT_SIZE, self.random_chunks(3))

    def test_fetch_content_unrecoverable(self):
        broken_chunks = self.random_chunks(4)
        self.assertRaises(OioException, self._test_fetch, DAT_LEGIT_SIZE,
                          broken_chunks)
Exemplo n.º 10
0
class ObjectStorageApi(object):
    """
    The Object Storage API.

    High level API that wraps `AccountClient`, `ContainerClient` and
    `DirectoryClient` classes.

    Every method that takes a `kwargs` argument accepts the at least
    the following keywords:

        - `headers`: `dict` of extra headers to pass to the proxy
        - `connection_timeout`: `float`
        - `read_timeout`: `float`
        - `write_timeout`: `float`
    """
    TIMEOUT_KEYS = ('connection_timeout', 'read_timeout', 'write_timeout')

    def __init__(self, namespace, logger=None, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        :keyword pool_manager: a pooled connection manager that will be used
            for all HTTP based APIs (except rawx)
        :type pool_manager: `urllib3.PoolManager`
        """
        self.namespace = namespace
        conf = {"namespace": self.namespace}
        self.logger = logger or get_logger(conf)
        self.timeouts = {tok: float_value(tov, None)
                         for tok, tov in kwargs.items()
                         if tok in self.__class__.TIMEOUT_KEYS}

        from oio.account.client import AccountClient
        from oio.container.client import ContainerClient
        from oio.directory.client import DirectoryClient
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.container = ContainerClient(conf, logger=self.logger, **kwargs)

        # In AccountClient, "endpoint" is the account service, not the proxy
        acct_kwargs = kwargs.copy()
        acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None)
        self.account = AccountClient(conf, logger=self.logger, **acct_kwargs)

    def _patch_timeouts(self, kwargs):
        """
        Insert timeout settings from this class's constructor into `kwargs`,
        if they are not already there.
        """
        for tok, tov in self.timeouts.items():
            if tok not in kwargs:
                kwargs[tok] = tov

    def account_create(self, account, **kwargs):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        return self.account.account_create(account, **kwargs)

    @handle_account_not_found
    def account_delete(self, account, **kwargs):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account.account_delete(account, **kwargs)

    @handle_account_not_found
    def account_show(self, account, **kwargs):
        """
        Get information about an account.
        """
        return self.account.account_show(account, **kwargs)

    def account_list(self, **kwargs):
        """
        List known accounts.

        Notice that account creation is asynchronous, and an autocreated
        account may appear in the listing only after several seconds.
        """
        return self.account.account_list(**kwargs)

    @handle_account_not_found
    def account_update(self, account, metadata, to_delete=None, **kwargs):
        warnings.warn("You'd better use account_set_properties()",
                      DeprecationWarning, stacklevel=2)
        self.account.account_update(account, metadata, to_delete, **kwargs)

    @handle_account_not_found
    def account_set_properties(self, account, properties, **kwargs):
        self.account.account_update(account, properties, None, **kwargs)

    @handle_account_not_found
    def account_del_properties(self, account, properties, **kwargs):
        self.account.account_update(account, None, properties, **kwargs)

    def container_create(self, account, container, properties=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        return self.container.container_create(account, container,
                                               properties=properties,
                                               **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def container_touch(self, account, container, **kwargs):
        """
        Trigger a notification about the container state.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_touch(account, container, **kwargs)

    def container_create_many(self, account, containers, properties=None,
                              **kwargs):
        """
        Create Many containers

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: `list`
        :param properties: properties to set on the containers
        :type properties: `dict`
        """
        return self.container.container_create_many(account,
                                                    containers,
                                                    properties=properties,
                                                    **kwargs)

    @handle_container_not_found
    def container_delete(self, account, container, **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_delete(account, container, **kwargs)

    @handle_account_not_found
    def container_list(self, account, limit=None, marker=None,
                       end_marker=None, prefix=None, delimiter=None,
                       **kwargs):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        :return: the list of containers of an account
        :rtype: `list` of items (`list`) with 4 fields:
            name, number of objects, number of bytes, and 1 if the item
            is a prefix or 0 if the item is actually a container
        """
        resp = self.account.container_list(account, limit=limit,
                                           marker=marker,
                                           end_marker=end_marker,
                                           prefix=prefix,
                                           delimiter=delimiter,
                                           **kwargs)
        return resp["listing"]

    @handle_container_not_found
    def container_show(self, account, container, **kwargs):
        """
        Get information about a container (user properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :returns: a `dict` with "properties" containing a `dict`
            of user properties.
        """
        return self.container.container_show(account, container, **kwargs)

    @handle_container_not_found
    def container_snapshot(self, account, container, dst_account,
                           dst_container, batch=100, **kwargs):
        """
        Create a copy of the container (only the content of the database)

        :param account: account in which the target is
        :type account: `str`
        :param container: name of the target
        :type container: `str`
        :param dst_account: account in which the snapshot will be.
        :type dst_account: `str`
        :param dst_container: name of the snapshot
        :type dst_container: `str`
        """
        try:
            self.container.container_freeze(account, container)
            self.container.container_snapshot(
                account, container, dst_account, dst_container)
            resp = self.object_list(dst_account, dst_container)
            obj_gen = resp['objects']
            target_beans = []
            copy_beans = []
            for obj in obj_gen:
                data = self.object_locate(
                    account, container, obj["name"])
                chunks = [chunk['url'] for chunk in data[1]]
                copies = self._generate_copy(chunks)
                fullpath = self._generate_fullpath(
                    dst_account, dst_container, obj['name'], obj['version'])
                self._send_copy(chunks, copies, fullpath[0])
                t_beans, c_beans = self._prepare_update_meta2(
                    data[1], copies, dst_account, dst_container,
                    obj['content'])
                target_beans.extend(t_beans)
                copy_beans.extend(c_beans)
                if len(target_beans) > batch:
                    self.container.container_raw_update(
                        target_beans, copy_beans,
                        dst_account, dst_container,
                        frozen=True)
                    target_beans = []
                    copy_beans = []
            if target_beans:
                self.container.container_raw_update(
                    target_beans, copy_beans,
                    dst_account, dst_container,
                    frozen=True)
        finally:
            self.container.container_enable(account, container)

    @handle_container_not_found
    def container_get_properties(self, account, container, properties=None,
                                 **kwargs):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: *ignored*
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        return self.container.container_get_properties(account, container,
                                                       properties=properties,
                                                       **kwargs)

    @handle_container_not_found
    def container_set_properties(self, account, container, properties=None,
                                 clear=False, **kwargs):
        """
        Set properties on a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container where to set properties
        :type container: `str`
        :param properties: a dictionary of properties
        :type properties: `dict`
        :param clear:
        :type clear: `bool`
        :keyword system: dictionary of system properties to set
        """
        return self.container.container_set_properties(
            account, container, properties,
            clear=clear, **kwargs)

    @handle_container_not_found
    def container_del_properties(self, account, container, properties,
                                 **kwargs):
        """
        Delete properties of a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container to deal with
        :type container: `str`
        :param properties: a list of property keys
        :type properties: `list`
        """
        return self.container.container_del_properties(
            account, container, properties, **kwargs)

    def container_update(self, account, container, metadata, clear=False,
                         **kwargs):
        warnings.warn("You'd better use container_set_properties()",
                      DeprecationWarning)
        if not metadata:
            self.container_del_properties(
                account, container, [], **kwargs)
        else:
            self.container_set_properties(
                account, container, metadata, clear, **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def object_create(self, account, container, file_or_path=None, data=None,
                      etag=None, obj_name=None, mime_type=None,
                      metadata=None, policy=None, key_file=None,
                      append=False, properties=None, **kwargs):
        """
        Create an object or append data to object in *container* of *account*
        with data taken from either *data* (`str` or `generator`) or
        *file_or_path* (path to a file or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :keyword key_file:
        :param append: if set, data will be append to existing object (or
        object will be created if unset)
        :type append: `bool`

        :returns: `list` of chunks, size and hash of the what has been uploaded
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName(
                "No name for the object has been specified"
            )

        sysmeta = {'mime_type': mime_type,
                   'etag': etag}
        if metadata:
            warnings.warn(
                "You'd better use 'properties' instead of 'metadata'",
                DeprecationWarning, stacklevel=4)
            if not properties:
                properties = metadata
            else:
                properties.update(metadata)

        if src is data:
            return self._object_create(
                account, container, obj_name, BytesIO(data), sysmeta,
                properties=properties, policy=policy,
                key_file=key_file, append=append, **kwargs)
        elif hasattr(file_or_path, "read"):
            return self._object_create(
                account, container, obj_name, src, sysmeta,
                properties=properties, policy=policy, key_file=key_file,
                append=append, **kwargs)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(
                    account, container, obj_name, f, sysmeta,
                    properties=properties, policy=policy,
                    key_file=key_file, append=append, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_touch(self, account, container, obj,
                     version=None, **kwargs):
        """
        Trigger a notification about an object
        (as if it just had been created).

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param obj: name of the object to touch
        """
        self.container.content_touch(account, container, obj,
                                     version=version, **kwargs)

    def object_drain(self, account, container, obj,
                     version=None, **kwargs):
        """
        Remove all the chunks of a content, but keep all the metadata.

        :param account: name of the account where the object is present
        :type account: `str`
        :param container: name of the container where the object is present
        :type container: `str`
        :param obj: name of the object to drain
        """
        self.container.content_drain(account, container, obj,
                                     version=version, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_delete(self, account, container, obj,
                      version=None, **kwargs):
        """
        Delete an object from a container. If versioning is enabled and no
        version is specified, the object will be marked as deleted but not
        actually deleted.

        :param account: name of the account the object belongs to
        :type account: `str`
        :param container: name of the container the object belongs to
        :type container: `str`
        :param obj: name of the object to delete
        :param version: version of the object to delete
        :returns: True on success
        """
        return self.container.content_delete(account, container, obj,
                                             version=version, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_delete_many(self, account, container, objs, **kwargs):
        return self.container.content_delete_many(
            account, container, objs, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_truncate(self, account, container, obj,
                        version=None, size=None, **kwargs):
        """
        Truncate object at specified size. Only shrink is supported.
        A download may occur if size is not on chunk boundaries.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :param size: new size of object
        """

        # code copied from object_fetch (should be factorized !)
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)

        for pos in sorted(chunks.keys()):
            chunk = chunks[pos][0]
            if (size >= chunk['offset']
                    and size <= chunk['offset'] + chunk['size']):
                break
        else:
            raise exc.OioException("No chunk found at position %d" % size)

        if chunk['offset'] != size:
            # retrieve partial chunk
            ret = self.object_fetch(account, container, obj,
                                    version=version,
                                    ranges=[(chunk['offset'], size-1)])
            # TODO implement a proper object_update
            pos = int(chunk['pos'].split('.')[0])
            self.object_create(account, container, obj_name=obj,
                               data=ret[1], meta_pos=pos,
                               content_id=meta['id'])

        return self.container.content_truncate(account, container, obj,
                                               version=version, size=size,
                                               **kwargs)

    @handle_container_not_found
    def object_list(self, account, container, limit=None, marker=None,
                    delimiter=None, prefix=None, end_marker=None,
                    properties=False, versions=False, deleted=False,
                    **kwargs):
        """
        Lists objects inside a container.

        :param properties: if True, list object properties along with objects
        :param versions: if True, list all versions of objects
        :param deleted: if True, list also the deleted objects

        :returns: a dict which contains
           * 'objects': the list of objects
           * 'prefixes': common prefixes (only if delimiter and prefix are set)
           * 'properties': a dict of container properties
           * 'system': a dict of system metadata
        """
        _, resp_body = self.container.content_list(
            account, container, limit=limit, marker=marker,
            end_marker=end_marker, prefix=prefix, delimiter=delimiter,
            properties=properties, versions=versions, deleted=deleted,
            **kwargs)

        for obj in resp_body['objects']:
            mtype = obj.get('mime-type')
            if mtype is not None:
                obj['mime_type'] = mtype
                del obj['mime-type']
            version = obj.get('ver')
            if version is not None:
                obj['version'] = version
                del obj['ver']

        return resp_body

    @handle_object_not_found
    def object_locate(self, account, container, obj,
                      version=None, **kwargs):
        """
        Get a description of the object along with the list of its chunks.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :returns: a tuple with object metadata `dict` as first element
            and chunk `list` as second element
        """
        obj_meta, chunks = self.container.content_locate(
            account, container, obj, version=version, **kwargs)
        return obj_meta, chunks

    def object_analyze(self, *args, **kwargs):
        """
        :deprecated: use `object_locate`
        """
        warnings.warn("You'd better use object_locate()",
                      DeprecationWarning)
        return self.object_locate(*args, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_fetch(self, account, container, obj, version=None, ranges=None,
                     key_file=None, **kwargs):
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)
        meta['container_id'] = cid_from_name(account, container).upper()
        meta['ns'] = self.namespace
        self._patch_timeouts(kwargs)
        if storage_method.ec:
            stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs)
        elif storage_method.backblaze:
            stream = self._fetch_stream_backblaze(meta, chunks, ranges,
                                                  storage_method, key_file,
                                                  **kwargs)
        else:
            stream = fetch_stream(chunks, ranges, storage_method, **kwargs)
        return meta, stream

    @handle_object_not_found
    def object_get_properties(self, account, container, obj, **kwargs):
        return self.container.content_get_properties(account, container, obj,
                                                     **kwargs)

    @handle_object_not_found
    def object_show(self, account, container, obj, version=None, **kwargs):
        """
        Get a description of the content along with its user properties.


        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :returns: a `dict` describing the object

        .. python::

            {'hash': '6BF60C17CC15EEA108024903B481738F',
             'ctime': '1481031763',
             'deleted': 'False',
             'properties': {
                 u'projet': u'OpenIO-SDS'},
             'length': '43518',
             'hash_method': 'md5',
             'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
             'version': '1481031762951972',
             'policy': 'EC',
             'id': '20BF2194FD420500CD4729AE0B5CBC07',
             'mime_type': 'application/octet-stream',
             'name': 'Makefile'}
        """
        return self.container.content_show(account, container, obj,
                                           version=version,
                                           **kwargs)

    def object_update(self, account, container, obj, metadata,
                      version=None, clear=False, **kwargs):
        warnings.warn("You'd better use object_set_properties()",
                      DeprecationWarning, stacklevel=2)
        if clear:
            self.object_del_properties(
                account, container, obj, [], version=version, **kwargs)
        if metadata:
            self.object_set_properties(
                account, container, obj, metadata, version=version, **kwargs)

    @handle_object_not_found
    def object_set_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_set_properties(
            account, container, obj, properties={'properties': properties},
            version=version, **kwargs)

    @handle_object_not_found
    def object_del_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_del_properties(
            account, container, obj, properties=properties,
            version=version, **kwargs)

    def _content_preparer(self, account, container, obj_name,
                          policy=None, **kwargs):
        # TODO: optimize by asking more than one metachunk at a time
        obj_meta, first_body = self.container.content_prepare(
            account, container, obj_name, size=1, stgpol=policy,
            autocreate=True, **kwargs)
        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])

        def _fix_mc_pos(chunks, mc_pos):
            for chunk in chunks:
                raw_pos = chunk["pos"].split(".")
                if storage_method.ec:
                    chunk['num'] = int(raw_pos[1])
                    chunk["pos"] = "%d.%d" % (mc_pos, chunk['num'])
                else:
                    chunk["pos"] = str(mc_pos)

        def _metachunk_preparer():
            mc_pos = kwargs.get('meta_pos', 0)
            _fix_mc_pos(first_body, mc_pos)
            yield first_body
            while True:
                mc_pos += 1
                _, next_body = self.container.content_prepare(
                        account, container, obj_name, 1, stgpol=policy,
                        autocreate=True, **kwargs)
                _fix_mc_pos(next_body, mc_pos)
                yield next_body

        return obj_meta, _metachunk_preparer

    def _generate_fullpath(self, account, container_name, path, version):
        return ['{0}/{1}/{2}/{3}'.format(quote_plus(account),
                                         quote_plus(container_name),
                                         quote_plus(path),
                                         version)]

    def _object_create(self, account, container, obj_name, source,
                       sysmeta, properties=None, policy=None,
                       key_file=None, **kwargs):
        self._patch_timeouts(kwargs)
        obj_meta, chunk_prep = self._content_preparer(
            account, container, obj_name,
            policy=policy, **kwargs)
        obj_meta.update(sysmeta)
        obj_meta['content_path'] = obj_name
        obj_meta['container_id'] = cid_from_name(account, container).upper()
        obj_meta['ns'] = self.namespace
        obj_meta['full_path'] = self._generate_fullpath(account, container,
                                                        obj_name,
                                                        obj_meta['version'])
        obj_meta['oio_version'] = (obj_meta.get('oio_version')
                                   or OIO_VERSION)

        # XXX content_id is necessary to update an existing object
        kwargs['content_id'] = kwargs.get('content_id', obj_meta['id'])

        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])
        if storage_method.ec:
            handler = ECWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)
        elif storage_method.backblaze:
            backblaze_info = self._b2_credentials(storage_method, key_file)
            handler = BackblazeWriteHandler(
                source, obj_meta, chunk_prep, storage_method,
                backblaze_info, **kwargs)
        else:
            handler = ReplicatedWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)

        final_chunks, bytes_transferred, content_checksum = handler.stream()

        etag = obj_meta.get('etag')
        if etag and etag.lower() != content_checksum.lower():
            raise exc.EtagMismatch(
                "given etag %s != computed %s" % (etag, content_checksum))
        obj_meta['etag'] = content_checksum

        data = {'chunks': final_chunks, 'properties': properties or {}}
        # FIXME: we may just pass **obj_meta
        self.container.content_create(
            account, container, obj_name, size=bytes_transferred,
            checksum=content_checksum, data=data,
            stgpol=obj_meta['policy'],
            version=obj_meta['version'], mime_type=obj_meta['mime_type'],
            chunk_method=obj_meta['chunk_method'],
            **kwargs)
        return final_chunks, bytes_transferred, content_checksum

    def _b2_credentials(self, storage_method, key_file):
        key_file = key_file or '/etc/oio/sds/b2-appkey.conf'
        try:
            return BackblazeUtils.get_credentials(storage_method, key_file)
        except BackblazeUtilsException as err:
            raise exc.ConfigurationException(str(err))

    def _fetch_stream_backblaze(self, meta, chunks, ranges,
                                storage_method, key_file,
                                **kwargs):
        backblaze_info = self._b2_credentials(storage_method, key_file)
        total_bytes = 0
        current_offset = 0
        size = None
        offset = 0
        for pos in range(len(chunks)):
            if ranges:
                offset = ranges[pos][0]
                size = ranges[pos][1]

            if size is None:
                size = int(meta["length"])
            chunk_size = int(chunks[pos][0]["size"])
            if total_bytes >= size:
                break
            if current_offset + chunk_size > offset:
                if current_offset < offset:
                    _offset = offset - current_offset
                else:
                    _offset = 0
                if chunk_size + total_bytes > size:
                    _size = size - total_bytes
                else:
                    _size = chunk_size
            handler = BackblazeChunkDownloadHandler(
                meta, chunks[pos], _offset, _size,
                backblaze_info=backblaze_info)
            stream = handler.get_stream()
            if not stream:
                raise exc.OioException("Error while downloading")
            total_bytes += len(stream)
            yield stream
            current_offset += chunk_size

    @handle_container_not_found
    def container_refresh(self, account, container, attempts=3, **kwargs):
        for i in range(attempts):
            try:
                self.account.container_reset(account, container, time.time())
            except exc.Conflict:
                if i >= attempts - 1:
                    raise
        try:
            self.container.container_touch(account, container)
        except exc.ClientException as e:
            if e.status != 406 and e.status != 431:
                raise
            # CODE_USER_NOTFOUND or CODE_CONTAINER_NOTFOUND
            metadata = dict()
            metadata["dtime"] = time.time()
            self.account.container_update(account, container, metadata)

    @handle_account_not_found
    def account_refresh(self, account, **kwargs):
        self.account.account_refresh(account)

        containers = self.container_list(account)
        for container in containers:
            try:
                self.container_refresh(account, container[0])
            except exc.NoSuchContainer:
                # container remove in the meantime
                pass

        while containers:
            marker = containers[-1][0]
            containers = self.container_list(account, marker=marker)
            if containers:
                for container in containers:
                    try:
                        self.container_refresh(account, container[0])
                    except exc.NoSuchContainer:
                        # container remove in the meantime
                        pass

    def all_accounts_refresh(self, **kwargs):
        accounts = self.account_list()
        for account in accounts:
            try:
                self.account_refresh(account)
            except exc.NoSuchAccount:  # account remove in the meantime
                pass

    @handle_account_not_found
    def account_flush(self, account):
        self.account.account_flush(account)

    def _random_buffer(self, dictionary, n):
        return ''.join(random.choice(dictionary) for _ in range(n))

    def _generate_copy(self, chunks, random_hex=60):
        # random_hex is the number of hexadecimals characters to generate for
        # the copy path
        copies = []
        for c in chunks:
            tmp = ''.join([c[:-random_hex],
                           self._random_buffer('0123456789ABCDEF',
                                               random_hex)])
            copies.append(tmp)
        return copies

    def _send_copy(self, targets, copies, fullpath):
        headers = {"x-oio-chunk-meta-full-path": fullpath}
        if not hasattr(self, "blob_client"):
            from oio.blob.client import BlobClient
            self.blob_client = BlobClient()
        for t, c in zip(targets, copies):
            self.blob_client.chunk_link(t, c, headers=headers).status

    def _prepare_update_meta2(self, targets, copies, account, container,
                              content):
        targets_beans = []
        copies_beans = []
        for t, c in zip(targets, copies):
            targets_beans.append(self._meta2bean(t['url'], t, content))
            copies_beans.append(self._meta2bean(c, t, content))
        return targets_beans, copies_beans

    def _meta2bean(self, url, meta, content):
        return {"type": "chunk",
                "id": url,
                "hash": meta['hash'],
                "size": int(meta["size"]),
                "pos": meta["pos"],
                "content": content}
class TestStorageTierer(BaseTestCase):
    def setUp(self):
        super(TestStorageTierer, self).setUp()
        self.namespace = self.conf['namespace']
        self.test_account = "test_storage_tiering_%f" % time.time()
        self.gridconf = {"namespace": self.namespace,
                         "container_fetch_limit": 2,
                         "content_fetch_limit": 2,
                         "account": self.test_account,
                         "outdated_threshold": 0,
                         "new_policy": "RAIN"}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self._populate()

    def _populate(self):
        self.container_0_name = "container_empty"
        self.container_0_id = cid_from_name(
            self.test_account, self.container_0_name)
        self.container_client.container_create(
            acct=self.test_account, ref=self.container_0_name)

        self.container_1_name = "container_with_1_content"
        self.container_1_id = cid_from_name(
            self.test_account, self.container_1_name)
        self.container_client.container_create(
            acct=self.test_account, ref=self.container_1_name)
        self.container_1_content_0_name = "container_1_content_0"
        self.container_1_content_0 = self._new_content(
            self.container_1_id, self.container_1_content_0_name, "SINGLE")

        self.container_2_name = "container_with_2_contents"
        self.container_2_id = cid_from_name(
            self.test_account, self.container_2_name)
        self.container_client.container_create(
            acct=self.test_account, ref=self.container_2_name)
        self.container_2_content_0_name = "container_2_content_0"
        self.container_2_content_0 = self._new_content(
            self.container_2_id, self.container_2_content_0_name, "SINGLE")
        self.container_2_content_1_name = "container_2_content_1"
        self.container_2_content_1 = self._new_content(
            self.container_2_id, self.container_2_content_1_name, "TWOCOPIES")

    def _new_content(self, container_id, content_name, stgpol):
        data = random_data(10)
        content = self.content_factory.new(container_id, content_name,
                                           len(data), stgpol)

        content.upload(StringIO.StringIO(data))
        return content

    def tearDown(self):
        super(TestStorageTierer, self).tearDown()

    def test_iter_container_list(self):
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_containers()
        self.assertEqual(gen.next(), self.container_0_name)
        self.assertEqual(gen.next(), self.container_1_name)
        self.assertEqual(gen.next(), self.container_2_name)
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_0(self):
        self.gridconf["outdated_threshold"] = 0
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(gen.next(), (
            self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(gen.next(), (
            self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(gen.next(), (
            self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_9999999999(self):
        self.gridconf["outdated_threshold"] = 9999999999
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_2(self):
        # add a new content created after the three previous contents
        now = int(time.time())
        time.sleep(2)
        self._new_content(self.container_2_id, "titi", "TWOCOPIES")

        self.gridconf["outdated_threshold"] = 2
        worker = StorageTiererWorker(self.gridconf, Mock())
        with mock.patch('oio.crawler.storage_tierer.time.time',
                        mock.MagicMock(return_value=now+2)):
            gen = worker._list_contents()
        self.assertEqual(gen.next(), (
            self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(gen.next(), (
            self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(gen.next(), (
            self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_skip_good_policy(self):
        self.gridconf["new_policy"] = "SINGLE"
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(gen.next(), (
            self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)
Exemplo n.º 12
0
class TestFilters(BaseTestCase):
    def setUp(self):
        super(TestFilters, self).setUp()
        self.account = self.conf['account']
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {'namespace': self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = 'TestFilter%f' % time.time()
        self.container_client = ContainerClient(self.gridconf, admin_mode=True)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"

    def _new_content(self, data, path, admin_mode=True):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               self.stgpol,
                                               admin_mode=admin_mode)
        old_content.create(BytesIO(data), admin_mode=admin_mode)
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path)
        except ClientException as exc:
            print(str(exc))
            self.assertTrue(str(exc).find('NS slave!') != -1)
        content = self._new_content(data, path)
        content.delete(admin_mode=True)

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path)

        # Overwrite without admin mode
        data2 = random_data(11)
        try:
            content = self._new_content(data2, path, admin_mode=False)
        except ClientException as exc:
            self.assertIsInstance(exc, Conflict)

        # Overwrite with admin mode
        content = self._new_content(data2, path)

        # Delete without admin mode
        try:
            content.delete()
        except ClientException as exc:
            self.assertTrue(str(exc).lower().find('worm') != -1)
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data2)

        # Delete with admin mode
        content.delete(admin_mode=True)
Exemplo n.º 13
0
class TestMeta2EventsEmission(BaseTestCase):
    def setUp(self):
        super(TestMeta2EventsEmission, self).setUp()
        self.container_name = 'TestEventsEmission%f' % time.time()
        self.container_id = cid_from_name(self.account, self.container_name)
        self.container_client = ContainerClient(self.conf)
        self.storage_api = ObjectStorageApi(self.conf['namespace'])
        self.beanstalkd0.drain_tube('oio-preserved')

    def wait_for_all_events(self, types):
        pulled_events = dict()
        for event_type in types:
            pulled_events[event_type] = list()

        while True:
            event = self.wait_for_event('oio-preserved',
                                        types=types,
                                        timeout=REASONABLE_EVENT_DELAY)
            if event is None:
                break
            pulled_events[event.event_type].append(event)
        return pulled_events

    def test_container_create(self):
        # Fire up the event
        self.container_client.container_create(self.account,
                                               self.container_name)

        # Grab all events and filter for the needed event type
        wanted_events = self.wait_for_all_events(
            [EventTypes.CONTAINER_NEW, EventTypes.ACCOUNT_SERVICES])

        container_new_events = wanted_events[EventTypes.CONTAINER_NEW]
        account_services_events = wanted_events[EventTypes.ACCOUNT_SERVICES]
        self.assertEqual(1, len(container_new_events))
        self.assertEqual(1, len(account_services_events))
        # Prepping for the next operation.
        container_new_event = container_new_events[0]
        account_services_event = account_services_events[0]

        # Basic info
        for event in (container_new_event, account_services_event):
            self.assertEqual(
                {
                    'ns': self.ns,
                    'account': self.account,
                    'user': self.container_name,
                    'id': self.container_id
                }, event.url)

        # Get the peers list and verify it's the same as received
        raw_dir_info = self.storage_api.directory.list(self.account,
                                                       self.container_name,
                                                       cid=self.container_id)
        raw_dir_info = raw_dir_info['srv']
        expected_peers_list = sorted(
            [x.get('host') for x in raw_dir_info if x.get('type') == 'meta2'])
        received_peers_list = sorted([
            x.get('host') for x in account_services_event.data
            if x.get('type') == 'meta2'
        ])
        self.assertListEqual(expected_peers_list, received_peers_list)

    def test_container_delete(self):
        # Create the container first
        self.container_client.container_create(self.account,
                                               self.container_name)

        # Get the peers list
        raw_dir_info = self.storage_api.directory.list(self.account,
                                                       self.container_name,
                                                       cid=self.container_id)
        raw_dir_info = raw_dir_info['srv']
        expected_peers_list = sorted(
            [x.get('host') for x in raw_dir_info if x.get('type') == 'meta2'])

        self.beanstalkd0.drain_tube('oio-preserved')
        # Fire up the event
        self.container_client.container_delete(self.account,
                                               self.container_name)

        # Grab all events and filter for the needed event type
        wanted_events = self.wait_for_all_events(
            [EventTypes.CONTAINER_DELETED, EventTypes.META2_DELETED])
        container_deleted_events = wanted_events[EventTypes.CONTAINER_DELETED]
        meta2_deleted_events = wanted_events[EventTypes.META2_DELETED]
        self.assertEqual(1, len(container_deleted_events))
        self.assertEqual(len(expected_peers_list), len(meta2_deleted_events))

        # Basic info
        for event in (container_deleted_events + meta2_deleted_events):
            self.assertDictEqual(
                {
                    'ns': self.ns,
                    'account': self.account,
                    'user': self.container_name,
                    'id': self.container_id
                }, event.url)

        # Verify it's the same as received
        received_peers = sorted(
            [event.data.get("peer") for event in meta2_deleted_events])
        self.assertListEqual(expected_peers_list, received_peers)
Exemplo n.º 14
0
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.test_dir = self.conf['sds_path']

        self.chars = string.ascii_lowercase + string.ascii_uppercase +\
            string.digits
        self.chars_id = string.digits + 'ABCDEF'

        self.rawx = 'http://' + self.conf["rawx"][0]['addr']

        self.h = hashlib.new('md5')

        conf = {"namespace": self.namespace}
        self.auditor = BlobAuditorWorker(conf, get_logger(None), None)
        self.container_c = ContainerClient(conf)
        self.blob_c = BlobClient()

        self.ref = rand_generator(self.chars, 8)

        self.container_c.container_create(self.account, self.ref)

        self.url_rand = rand_generator(self.chars_id, 64)

        self.data = rand_generator(self.chars, 1280)
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()

        self.content = TestContent(
            rand_generator(self.chars, 6), len(self.data), self.url_rand, 1)

        self.content.id_container = cid_from_name(
            self.account, self.ref).upper()
        self.chunk = TestChunk(self.content.size, self.url_rand, 0,
                               self.hash_rand)

        self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk)
        self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0",
                            "size": self.chunk.size,
                            "url":  self.chunk_url}

        chunk_meta = {'content_size': self.content.size,
                      'content_chunksnb': self.content.nb_chunks,
                      'content_path': self.content.path,
                      'content_cid': self.content.id_container,
                      'content_id': '0000',
                      'content_version': 1,
                      'chunk_id': self.chunk.id_chunk,
                      'chunk_pos': self.chunk.pos}
        self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data)

        self.chunk_path = self.test_dir + '/data/NS-rawx-1/' +\
            self.chunk.id_chunk[0:2] + "/" + self.chunk.id_chunk
        self.bad_container_id = '0'*64

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_c.content_delete(
                self.account, self.ref, self.content.path)
        except Exception:
            pass

        try:
            self.container_c.container_destroy(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk_path)
        except Exception:
            pass

    def init_content(self):
        self.container_c.content_create(
            self.account, self.ref, self.content.path, self.chunk.size,
            self.hash_rand, data=[self.chunk_proxy])

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk_path)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_container_deleted(self):
        self.container_c.container_destroy(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(rand_generator(self.chars, 1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_size(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(rand_generator(self.chars, 320))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_nbchunk(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.nbchunk', '42')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.size', '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_hash(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.hash', 'WRONG_HASH')

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.size', '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_path(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.path', 'WRONG_PATH')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_id(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.id', 'WRONG_ID')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_container(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.grid.content.container',
            self.bad_container_id)
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_hash(self):
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()
        self.chunk.md5 = self.hash_rand
        self.chunk_proxy['hash'] = self.chunk.md5
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_length(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_chunk_size(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_url(self):
        self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_content_bad_path(self):
        self.content.path = 'BAD_PATH'
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)
Exemplo n.º 15
0
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.ref = random_str(8)

        _, rawx_loc, rawx_addr, rawx_uuid = \
            self.get_service_url('rawx')
        self.rawx_id = 'http://' + (rawx_uuid if rawx_uuid else rawx_addr)

        self.auditor = BlobAuditorWorker(self.conf, get_logger(None), None)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient(conf=self.conf)

        self.container_client.container_create(self.account, self.ref)
        self.content = TestContent(self.account, self.ref)
        self.chunk = TestChunk(self.rawx_id, rawx_loc, self.content.size,
                               self.content.hash)

        chunk_meta = {
            'container_id': self.content.cid,
            'content_path': self.content.path,
            'version': self.content.version,
            'id': self.content.id,
            'full_path': self.content.fullpath,
            'chunk_method': 'plain/nb_copy=3',
            'policy': 'TESTPOLICY',
            'chunk_id': self.chunk.id,
            'chunk_pos': self.chunk.pos,
            'chunk_hash': self.chunk.metachunk_hash,
            'chunk_size': self.chunk.metachunk_size,
            'metachunk_hash': self.chunk.metachunk_hash,
            'metachunk_size': self.chunk.metachunk_size,
            'oio_version': OIO_VERSION
        }
        self.blob_client.chunk_put(self.chunk.url, chunk_meta,
                                   self.content.data)

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_client.content_delete(self.account, self.ref,
                                                 self.content.path)
        except Exception:
            pass

        try:
            self.container_client.container_delete(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk.path)
        except Exception:
            pass

    def init_content(self):
        chunk_proxy = {
            "url": self.chunk.url,
            "pos": str(self.chunk.pos),
            "hash": self.chunk.metachunk_hash,
            "size": self.chunk.metachunk_size
        }
        self.container_client.content_create(self.account,
                                             self.ref,
                                             self.content.path,
                                             version=int(time.time() *
                                                         1000000),
                                             content_id=self.content.id,
                                             size=self.content.size,
                                             checksum=self.content.hash,
                                             data={'chunks': [chunk_proxy]},
                                             stgpol="SINGLE")

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk.path, self.chunk.id)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_container_deleted(self):
        self.container_client.container_delete(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk.path, "wb") as outf:
            outf.write(os.urandom(1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_chunk_bad_chunk_size(self):
        self.init_content()
        with open(self.chunk.path, "wb") as outf:
            outf.write(os.urandom(320))

        exc_class = (exc.FaultyChunk, exc.CorruptedChunk)
        self.assertRaises(exc_class, self.auditor.chunk_audit, self.chunk.path,
                          self.chunk.id)

    def test_xattr_bad_xattr_metachunk_size(self):
        self.init_content()
        xattr.setxattr(self.chunk.path,
                       'user.' + CHUNK_XATTR_KEYS['metachunk_size'], b'320')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_xattr_bad_xattr_metachunk_hash(self):
        self.init_content()
        xattr.setxattr(self.chunk.path,
                       'user.' + CHUNK_XATTR_KEYS['metachunk_hash'],
                       b'0123456789ABCDEF0123456789ABCDEF')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_xattr_bad_xattr_chunk_id(self):
        self.init_content()
        xattr.removexattr(
            self.chunk.path,
            'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id))
        xattr.setxattr(
            self.chunk.path,
            'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + 'WRONG_ID',
            self.content.fullpath.encode('utf-8'))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_xattr_bad_xattr_content_container(self):
        self.init_content()
        xattr.setxattr(
            self.chunk.path,
            'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id),
            encode_fullpath(self.account, 'WRONG_REF', self.content.path,
                            self.content.version,
                            self.content.id).encode('utf-8'))

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_xattr_bad_xattr_content_id(self):
        self.init_content()
        xattr.setxattr(
            self.chunk.path,
            'user.' + CHUNK_XATTR_CONTENT_FULLPATH_PREFIX + str(self.chunk.id),
            encode_fullpath(self.account, self.ref, self.content.path,
                            self.content.version,
                            '0123456789ABCDEF').encode('utf-8'))

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_xattr_bad_xattr_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk.path,
                       'user.' + CHUNK_XATTR_KEYS['chunk_pos'], b'42')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_chunk_bad_meta2_metachunk_size(self):
        self.content.size = 320
        self.chunk.metachunk_size = 320
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_chunk_bad_meta2_metachunk_hash(self):
        self.chunk.metachunk_hash = '0123456789ABCDEF0123456789ABCDEF'
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)

    def test_chunk_bad_meta2_chunk_url(self):
        self.chunk.url = '%s/0123456789ABCDEF' % self.rawx_id
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk.path, self.chunk.id)
Exemplo n.º 16
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(self.container_id,
                                         old_content.content_id),
                broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream), rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size, [(0, 0), (0, 1)],
                           (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Exemplo n.º 17
0
class TestFilters(BaseTestCase):
    def setUp(self):
        super(TestFilters, self).setUp()
        self.account = self.conf['account']
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {'namespace': self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = 'TestFilter%f' % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name,
                                               admin_mode=True)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"

    def _prepare_content(self, path, content_id, admin_mode):
        self.container_client.content_prepare(account=self.account,
                                              reference=self.container_name,
                                              path=path,
                                              content_id=content_id,
                                              size=1,
                                              stgpol=self.stgpol,
                                              autocreate=True,
                                              admin_mode=admin_mode)

    def _new_content(self, data, path, admin_mode):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               self.stgpol,
                                               admin_mode=admin_mode)
        old_content.create(BytesIO(data), admin_mode=admin_mode)
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_slave_and_admin(self):
        if not os.getenv("SLAVE"):
            self.skipTest("must be in slave mode")
        data = random_data(10)
        path = 'test_slave'
        try:
            self._new_content(data, path, False)
        except ClientException as exc:
            self.assertIn('NS slave!', text_type(exc))
        else:
            self.fail("New content: no exception")
        content = self._new_content(data, path, True)
        content.delete(admin_mode=True)

    def test_worm_and_admin(self):
        if not os.getenv("WORM"):
            self.skipTest("must be in worm mode")
        data = random_data(10)
        path = 'test_worm'
        content = self._new_content(data, path, True)

        # Prepare without admin mode:
        # Since the 'prepare' step is done in the proxy, there is no check
        # on the pre-existence of the content. The subsequent prepare MUST
        # now work despite the presence of the content.
        self._prepare_content(path, None, False)
        self._prepare_content(path, content.content_id, False)
        self._prepare_content('test_worm_prepare', content.content_id, False)
        self._prepare_content(path, random_id(32), False)

        # Overwrite without admin mode
        data2 = random_data(11)
        self.assertRaises(Conflict, self._new_content, data2, path, False)

        # Prepare with admin mode
        self._prepare_content(path, None, True)
        self._prepare_content(path, content.content_id, True)
        self._prepare_content('test_worm_prepare', content.content_id, True)
        self._prepare_content(path, random_id(32), True)

        # Overwrite with admin mode
        content = self._new_content(data2, path, True)

        # Delete without admin mode
        try:
            content.delete()
        except ClientException as exc:
            self.assertIn('worm', str(exc))
        else:
            self.fail("Delete without admin mode: no exception")
        downloaded_data = ''.join(content.fetch())
        self.assertEqual(downloaded_data, data2)

        # Delete with admin mode
        content.delete(admin_mode=True)
Exemplo n.º 18
0
class CheckMeta2(CheckService):

    account_name = "_meta2_probe"

    def __init__(self, namespace, **kwargs):
        ep_parts = ["http:/",
                    load_namespace_conf(namespace).get('proxy'),
                    "v3.0",
                    namespace,
                    "content"]

        super(CheckMeta2, self).__init__(namespace, "meta2",
                                         endpoint="/".join(ep_parts), **kwargs)

        self.account = AccountClient({"namespace": self.ns})
        self.container = ContainerClient({"namespace": self.ns})
        self.directory = DirectoryClient({"namespace": self.ns})
        self.reference = random_buffer('0123456789ABCDEF', 64)

    def _get_params(self):
        path = random_buffer('0123456789ABCDEF', 64)
        return {'acct': self.account_name, 'ref': self.reference, 'path': path}

    def _compare_chunks(self, chunks1, chunks2):
        def light_chunks(chunks):
            new_chunks = []
            for chunk in chunks:
                new_chunk = dict()
                new_chunk["url"] = chunk["url"]
                new_chunk["hash"] = chunk["hash"]
                new_chunks.append(new_chunk)
            return new_chunks
        try:
            chunks1 = light_chunks(chunks1)
            chunks1.sort()
            chunks2 = light_chunks(chunks2)
            chunks2.sort()
            return cmp(chunks1, chunks2) == 0
        except TypeError:
            return False

    def _cycle(self, meta2_host):
        self.directory.unlink(
            account=self.account_name, reference=self.reference,
            service_type=self.service_type)
        service = {"host": meta2_host, "type": self.service_type, "args": "",
                   "seq": 1}
        self.directory.force(
            account=self.account_name, reference=self.reference,
            service_type=self.service_type, services=service)

        params = self._get_params()
        global_success = True

        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success
        headers = {'X-oio-action-mode': 'autocreate'}
        _, body, success = self._request(
            "POST", "/prepare", params=params, headers=headers,
            json={"size": "1024"}, expected_status=200)
        global_success &= success
        chunks = body
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success
        headers = {"x-oio-content-meta-length": "1024"}
        _, _, success = self._request(
            "POST", "/create", params=params, headers=headers, json=chunks,
            expected_status=204)
        global_success &= success
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=200)
        global_success &= success
        success = self._compare_chunks(chunks, body)
        global_success &= success
        _, _, success = self._request(
            "POST", "/delete", params=params, expected_status=204)
        global_success &= success
        _, body, success = self._request(
            "GET", "/locate", params=params, expected_status=404)
        global_success &= success

        return global_success

    def run(self):
        try:
            self.container.container_create(account=self.account_name,
                                            reference=self.reference)
            super(CheckMeta2, self).run()
            self.container.container_delete(account=self.account_name,
                                            reference=self.reference)
            sleep(1)
            self.account.account_delete(self.account_name)
        except Exception as exc:
            print("Exception - " + str(exc))
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.test_dir = self.conf['sds_path']

        rawx_num, rawx_path, rawx_addr = self.get_service_url('rawx')
        self.rawx = 'http://' + rawx_addr

        self.h = hashlib.new('md5')

        conf = {"namespace": self.namespace}
        self.auditor = BlobAuditorWorker(conf, get_logger(None), None)
        self.container_c = ContainerClient(conf)
        self.blob_c = BlobClient()

        self.ref = random_str(8)

        self.container_c.container_create(self.account, self.ref)

        self.url_rand = random_id(64)

        self.data = random_str(1280)
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()

        self.content = TestContent(
            random_str(6), len(self.data), self.url_rand, 1)

        self.content.id_container = cid_from_name(
            self.account, self.ref).upper()
        self.chunk = TestChunk(self.content.size, self.url_rand, 0,
                               self.hash_rand)

        self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk)
        self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0",
                            "size": self.chunk.size,
                            "url":  self.chunk_url}

        chunk_meta = {'content_path': self.content.path,
                      'container_id': self.content.id_container,
                      'content_chunkmethod': 'plain/nb_copy=3',
                      'content_policy': 'TESTPOLICY',
                      'content_id': '0000',
                      'content_version': 1,
                      'chunk_id': self.chunk.id_chunk,
                      'chunk_pos': self.chunk.pos}
        self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data)

        self.chunk_path = self.test_dir + '/data/' + self.namespace + \
            '-rawx-1/' + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk
        self.bad_container_id = '0'*64

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_c.content_delete(
                self.account, self.ref, self.content.path)
        except Exception:
            pass

        try:
            self.container_c.container_destroy(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk_path)
        except Exception:
            pass

    def init_content(self):
        self.container_c.content_create(
            self.account, self.ref, self.content.path, self.chunk.size,
            self.hash_rand, data=[self.chunk_proxy])

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk_path)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_container_deleted(self):
        self.container_c.container_destroy(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_size(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(320))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_size(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['chunk_size'], '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_hash(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['chunk_hash'],
            'WRONG_HASH')
        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_path(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['content_path'],
            'WRONG_PATH')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_id(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['chunk_id'],
            'WRONG_ID')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_container(self):
        self.init_content()
        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['container_id'],
            self.bad_container_id)
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42')

        xattr.setxattr(
            self.chunk_path, 'user.' + chunk_xattr_keys['chunk_pos'],
            '42')
        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_hash(self):
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()
        self.chunk.md5 = self.hash_rand
        self.chunk_proxy['hash'] = self.chunk.md5
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_length(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_chunk_size(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_url(self):
        self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)
Exemplo n.º 20
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000,
                                     self.stgpol_ec)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        data = random_data(data_size)
        obj_type = {
            self.stgpol: PlainContent,
            self.stgpol_twocopies: PlainContent,
            self.stgpol_threecopies: PlainContent,
            self.stgpol_ec: ECContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.fetch())

        self.assertEqual(downloaded_data, data)

    @ec
    def test_change_content_0_byte_policy_single_to_ec(self):
        self._test_change_policy(0, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_0_byte_policy_ec_to_twocopies(self):
        self._test_change_policy(0, self.stgpol_ec, self.stgpol_twocopies)

    @ec
    def test_change_content_1_byte_policy_single_to_ec(self):
        self._test_change_policy(1, self.stgpol, self.stgpol_ec)

    @ec
    def test_change_content_chunksize_bytes_policy_twocopies_to_ec(self):
        self._test_change_policy(self.chunk_size, self.stgpol_twocopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_2xchunksize_bytes_policy_threecopies_to_ec(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol_ec)

    @ec
    def test_change_content_1_byte_policy_ec_to_threecopies(self):
        self._test_change_policy(1, self.stgpol_ec, self.stgpol_threecopies)

    @ec
    def test_change_content_chunksize_bytes_policy_ec_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol_ec,
                                 self.stgpol_twocopies)

    @ec
    def test_change_content_2xchunksize_bytes_policy_ec_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_ec,
                                 self.stgpol)

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, self.stgpol_twocopies,
                                 self.stgpol_threecopies)

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, self.stgpol,
                                 self.stgpol_twocopies)

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, self.stgpol_threecopies,
                                 self.stgpol)

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id,
            self.stgpol_twocopies)
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", self.stgpol)

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content(self.stgpol_twocopies, data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
            "Annual report.txt",
            "foo+bar=foobar.txt",
            "100%_bug_free.c",
            "forward/slash/allowed",
            "I\\put\\backslashes\\and$dollar$signs$in$file$names",
            "Je suis tombé sur la tête, mais ça va bien.",
            "%s%f%u%d%%",
            "carriage\rreturn",
            "line\nfeed",
            "ta\tbu\tla\ttion",
            "controlchars",
        ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, "nobody cares", cname)
            answers[cname] = content
        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        obj_set = {
            k["name"].encode("utf8", "ignore")
            for k in listing["objects"]
        }
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Exemplo n.º 21
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.hash,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(stgpol,
                                                        data, broken_pos_list)

        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"])

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)
        self.assertEqual(type(rebuilt_content), DupContent)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.hash, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild("TWOCOPIES", 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild("TWOCOPIES", 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        if len(self.conf['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild("THREECOPIES", 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild("TWOCOPIES", 0, [(0, 0), (0, 1)], (0, 0))

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Exemplo n.º 22
0
class TestMeta2EventsEmission(BaseTestCase):
    def setUp(self):
        super(TestMeta2EventsEmission, self).setUp()
        self.container_name = 'TestEventsEmission%f' % time.time()
        self.container_id = cid_from_name(self.account, self.container_name)
        self.container_client = ContainerClient(self.conf)
        self.storage_api = ObjectStorageApi(self.conf['namespace'])
        self.event_agent_name = 'event-agent-1'
        self.bt_connections = []
        self._bt_make_connections(self.conf['services']['beanstalkd'])

    def tearDown(self):
        super(TestMeta2EventsEmission, self).tearDown()
        self._service(self.event_agent_name, 'start', wait=3)

    def _bt_make_connections(self, bt_list):
        for bt_entry in bt_list:
            self.bt_connections.append(
                Beanstalk.from_url('beanstalk://{0}'.format(bt_entry['addr'])))

    def _bt_watch(self, tube):
        for bt in self.bt_connections:
            bt.watch(tube)

    def _bt_pull_events_by_type(self, event_type):
        pulled_events = []

        for bt in self.bt_connections:
            job_id = True
            while job_id is not None:
                try:
                    job_id, data_raw = bt.reserve(timeout=4)
                    pulled_events.append(json.loads(data_raw))
                    bt.delete(job_id)
                except ResponseError:
                    break
        return [x for x in pulled_events if x.get("event") == event_type]

    def test_container_create(self):
        if len(self.bt_connections) > 1:
            self.skipTest("Unsupported on multi-beanstalk setups.")

        # First shutdown the event-agent
        self._service(self.event_agent_name, 'stop', wait=3)
        self._bt_watch(DEFAULT_TUBE)

        # Fire up the event
        self.container_client.container_create(self.account,
                                               self.container_name)

        # Grab all events and filter for the needed event type
        wanted_events = self._bt_pull_events_by_type(
            EventTypes.ACCOUNT_SERVICES)

        self.assertEqual(len(wanted_events), 1)
        # Prepping for the next operation.
        ev = wanted_events[0]

        # Basic info
        self.assertEqual(
            ev.get("url"), {
                'ns': self.ns,
                'account': self.account,
                'user': self.container_name,
                'id': self.container_id,
            })

        # Get the peers list and verify it's the same as received
        raw_dir_info = self.storage_api.directory.list(self.account,
                                                       self.container_name,
                                                       cid=self.container_id)
        raw_dir_info = raw_dir_info['srv']
        expected_peers_list = sorted(
            [x.get('host') for x in raw_dir_info if x.get('type') == 'meta2'])

        received_peers_list = sorted([
            x.get('host') for x in ev.get('data') if x.get('type') == 'meta2'
        ])

        self.assertListEqual(received_peers_list, expected_peers_list)

    def test_container_delete(self):
        if len(self.bt_connections) > 1:
            self.skipTest("Unsupported on multi-beanstalk setups.")
        self._service(self.event_agent_name, 'stop', wait=3)
        self._bt_watch(DEFAULT_TUBE)

        # Create the container first
        self.container_client.container_create(self.account,
                                               self.container_name)

        # Get the peers list and verify it's the same as received
        raw_dir_info = self.storage_api.directory.list(self.account,
                                                       self.container_name,
                                                       cid=self.container_id)
        raw_dir_info = raw_dir_info['srv']
        expected_peers_list = sorted(
            [x.get('host') for x in raw_dir_info if x.get('type') == 'meta2'])

        # Fire up the event
        self.container_client.container_delete(self.account,
                                               self.container_name)

        # Grab all events and filter for the needed event type
        wanted_events = self._bt_pull_events_by_type(
            EventTypes.CONTAINER_DELETED)

        self.assertEqual(len(wanted_events), len(expected_peers_list))
        # Prepping for the next operation.

        # Basic info
        for ev in wanted_events:
            self.assertEqual(
                ev.get("url"), {
                    'ns': self.ns,
                    'account': self.account,
                    'user': self.container_name,
                    'id': self.container_id,
                })

        received_peers = sorted(
            [str(x.get("data").get("peers")[0]) for x in wanted_events])

        self.assertListEqual(received_peers, expected_peers_list)
Exemplo n.º 23
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (1 + m)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertGreaterEqual(len(chunks), nb_chunks_min)
        self.assertLessEqual(len(chunks), nb_chunks_max)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            self.assertEquals(len(data_chunks_at_pos) >= 1, True)
            self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_0.url)
        self.assertRaises(NotFound,
                          self.blob_client.chunk_get, del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        dl_data = ""
        for buf in download_iter:
            dl_data += buf
        self.assertEqual(len(dl_data), len(data))
        self.assertEqual(dl_data, data)
        download_iter.close()
Exemplo n.º 24
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {
            "k": "6",
            "m": "2",
            "algo": "liber8tion"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "1",
            "distance": "0"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "2",
            "distance": "1"
        })

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec,
                          "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6012/A0A0",
                "pos": "0.p0", "size": 512,
                "hash": "E7D4E4AD460971CA2E3141F2102308D4"},
            {
                "url": "http://127.0.0.1:6010/A01",
                "pos": "0.1", "size": 146,
                "hash": "760AB5DA7C51A3654F1CA622687CD6C3"},
            {
                "url": "http://127.0.0.1:6011/A00",
                "pos": "0.0", "size": 512,
                "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"},
            {
                "url": "http://127.0.0.1:6013/A0A1",
                "pos": "0.p1", "size": 512,
                "hash": "DA9D7F72AEEA5791565724424CE45C16"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/A0",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"},
            {
                "url": "http://127.0.0.1:6011/A1",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/0_p1",
                "pos": "0.p1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6011/0_p0",
                "pos": "0.p0", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6016/0_1",
                "pos": "0.1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6017/0_0",
                "pos": "0.0", "size": 1048576,
                "hash": "00000000000000000000000000000000"}
        ]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi",
                                     1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound, self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")
Exemplo n.º 25
0
class TestRainContent(BaseTestCase):
    def setUp(self):
        super(TestRainContent, self).setUp()

        if len(self.conf['rawx']) < 12:
            self.skipTest("Not enough rawx. "
                          "Rain tests needs more than 12 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestRainContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestRainContent, self).tearDown()

    def _test_upload(self, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        k = 6
        m = 2
        self.assertEqual(type(content), RainContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], "RAIN")
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        nb_chunks_min = metachunk_nb * (k + m) - (k - 1)
        nb_chunks_max = metachunk_nb * (k + m)
        self.assertEquals(len(chunks) >= nb_chunks_min, True)
        self.assertEquals(len(chunks) <= nb_chunks_max, True)

        for metapos in range(metachunk_nb):
            chunks_at_pos = content.chunks.filter(metapos=metapos)
            data_chunks_at_pos = chunks_at_pos.filter(is_parity=False)
            parity_chunks_at_pos = chunks_at_pos.filter(is_parity=True)

            if metapos < metachunk_nb - 1:
                self.assertEqual(len(data_chunks_at_pos), k)
            else:
                self.assertEquals(len(data_chunks_at_pos) >= 1, True)
                self.assertEquals(len(data_chunks_at_pos) <= k, True)
            self.assertEqual(len(parity_chunks_at_pos), m)

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk.hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], chunk.pos)
                self.assertEqual(meta['chunk_hash'], chunk.hash)

            data_begin = metapos * self.chunk_size
            data_end = metapos * self.chunk_size + self.chunk_size
            target_metachunk_hash = md5_data(data[data_begin:data_end])

            metachunk_hash = hashlib.md5()
            for chunk in data_chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                for d in stream:
                    metachunk_hash.update(d)
            self.assertEqual(metachunk_hash.hexdigest().upper(),
                             target_metachunk_hash)

    def test_upload_0_byte(self):
        self._test_upload(0)

    def test_upload_1_byte(self):
        self._test_upload(1)

    def test_upload_chunksize_bytes(self):
        self._test_upload(self.chunk_size)

    def test_upload_chunksize_plus_1_bytes(self):
        self._test_upload(self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "RAIN")
        self.assertEqual(type(content), RainContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos="1.p0"):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos="1.p0"):
            self.assertRaises(NotFound, self.blob_client.chunk_head, chunk.url)

    def _test_rebuild(self, data_size, broken_pos_list):
        data = os.urandom(data_size)
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        # get the new structure of the uploaded content
        uploaded_content = self.content_factory.get(self.container_id,
                                                    old_content.content_id)

        old_info = {}
        for pos in broken_pos_list:
            old_info[pos] = {}
            c = uploaded_content.chunks.filter(pos=pos)[0]
            old_info[pos]["url"] = c.url
            old_info[pos]["id"] = c.id
            old_info[pos]["hash"] = c.hash
            chunk_id_to_rebuild = c.id
            meta, stream = self.blob_client.chunk_get(c.url)
            old_info[pos]["dl_meta"] = meta
            old_info[pos]["dl_hash"] = md5_stream(stream)
            # delete the chunk
            self.blob_client.chunk_delete(c.url)

        # rebuild the broken chunks
        uploaded_content.rebuild_chunk(chunk_id_to_rebuild)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   uploaded_content.content_id)
        self.assertEqual(type(rebuilt_content), RainContent)

        for pos in broken_pos_list:
            c = rebuilt_content.chunks.filter(pos=pos)[0]
            rebuilt_meta, rebuilt_stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(rebuilt_meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(rebuilt_stream),
                             old_info[pos]["dl_hash"])
            self.assertEqual(c.hash, old_info[pos]["hash"])
            self.assertThat(c.url, NotEquals(old_info[pos]["url"]))
            del old_info[pos]["dl_meta"]["chunk_id"]
            del rebuilt_meta["chunk_id"]
            self.assertEqual(rebuilt_meta, old_info[pos]["dl_meta"])

    def test_content_0_byte_rebuild_pos_0_0(self):
        self._test_rebuild(0, ["0.0"])

    def test_content_0_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(0, ["0.0", "0.p0"])

    def test_content_1_byte_rebuild_pos_0_0(self):
        self._test_rebuild(1, ["0.0"])

    def test_content_1_byte_rebuild_pos_0_p0(self):
        self._test_rebuild(1, ["0.p0"])

    def test_content_1_byte_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(1, ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.1"])

    def test_content_chunksize_bytes_rebuild_pos_0_0_and_0_p0(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.0", "0.p0"])

    def test_content_chunksize_bytes_rebuild_pos_0_p0_and_0_p1(self):
        self._test_rebuild(self.conf["chunk_size"], ["0.p0", "0.p1"])

    def test_content_chunksize_bytes_rebuild_more_than_k_chunk(self):
        self.assertRaises(UnrecoverableContent, self._test_rebuild,
                          self.conf["chunk_size"], ["0.0", "0.1", "0.2"])

    def _new_content(self, data, broken_pos_list=[]):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), "RAIN")
        self.assertEqual(type(old_content), RainContent)

        old_content.upload(StringIO.StringIO(data))

        for pos in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[0]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def test_orphan_chunk(self):
        content = self._new_content(random_data(10))

        self.assertRaises(OrphanChunk, content.rebuild_chunk, "uNkNoWnId")

    def test_rebuild_on_the_fly(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.p0"])

        stream = content.rebuild_metachunk("0", on_the_fly=True)

        dl_data = "".join(stream)

        self.assertEqual(dl_data, data)

        del_chunk_0_0 = content.chunks.filter(pos="0.0")[0]
        del_chunk_0_p0 = content.chunks.filter(pos="0.p0")[0]

        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_0.url)
        self.assertRaises(NotFound, self.blob_client.chunk_get,
                          del_chunk_0_p0.url)

    def _test_download(self, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos in broken_pos_list:
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_download_content_0_byte_without_broken_chunks(self):
        self._test_download(0, [])

    def test_download_content_1_byte_without_broken_chunks(self):
        self._test_download(1, [])

    def test_download_content_chunksize_bytes_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"], [])

    def test_download_content_chunksize_plus_1_without_broken_chunks(self):
        self._test_download(self.conf["chunk_size"] + 1, [])

    def test_download_content_0_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(0, ["0.0", "0.p0"])

    def test_download_content_1_byte_with_broken_0_0_and_0_p0(self):
        self._test_download(1, ["0.0", "0.p0"])

    def test_download_content_2xchunksize_with_broken_0_2_and_1_0(self):
        self._test_download(2 * self.conf["chunk_size"], ["0.2", "1.0"])

    def test_download_content_chunksize_bytes_with_3_broken_chunks(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.0", "0.1", "0.2"])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_download_interrupt_close(self):
        data = random_data(self.conf["chunk_size"])
        content = self._new_content(data, ["0.p0"])

        download_iter = content.download()

        self.assertEqual(download_iter.next(), data[0:READ_CHUNK_SIZE - 1])
        download_iter.close()
Exemplo n.º 26
0
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.test_dir = self.conf['sds_path']

        self.chars = string.ascii_lowercase + string.ascii_uppercase +\
            string.digits
        self.chars_id = string.digits + 'ABCDEF'

        self.rawx = 'http://' + self.conf["rawx"][0]['addr']

        self.h = hashlib.new('md5')

        conf = {"namespace": self.namespace}
        self.auditor = BlobAuditorWorker(conf, get_logger(None), None)
        self.container_c = ContainerClient(conf)
        self.blob_c = BlobClient()

        self.ref = rand_generator(self.chars, 8)

        self.container_c.container_create(self.account, self.ref)

        self.url_rand = rand_generator(self.chars_id, 64)

        self.data = rand_generator(self.chars, 1280)
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()

        self.content = TestContent(rand_generator(self.chars, 6),
                                   len(self.data), self.url_rand, 1)

        self.content.id_container = cid_from_name(self.account,
                                                  self.ref).upper()
        self.chunk = TestChunk(self.content.size, self.url_rand, 0,
                               self.hash_rand)

        self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk)
        self.chunk_proxy = {
            "hash": self.chunk.md5,
            "pos": "0",
            "size": self.chunk.size,
            "url": self.chunk_url
        }

        chunk_meta = {
            'content_size': self.content.size,
            'content_chunksnb': self.content.nb_chunks,
            'content_path': self.content.path,
            'content_cid': self.content.id_container,
            'content_mimetype': 'application/octet-stream',
            'content_chunkmethod': 'bytes',
            'content_policy': 'TESTPOLICY',
            'content_id': '0000',
            'content_version': 1,
            'chunk_id': self.chunk.id_chunk,
            'chunk_pos': self.chunk.pos
        }
        self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data)

        self.chunk_path = self.test_dir + '/data/' + self.namespace + \
            '-rawx-1/' + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk
        self.bad_container_id = '0' * 64

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_c.content_delete(self.account, self.ref,
                                            self.content.path)
        except Exception:
            pass

        try:
            self.container_c.container_destroy(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk_path)
        except Exception:
            pass

    def init_content(self):
        self.container_c.content_create(self.account,
                                        self.ref,
                                        self.content.path,
                                        self.chunk.size,
                                        self.hash_rand,
                                        data=[self.chunk_proxy])

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk_path)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_container_deleted(self):
        self.container_c.container_destroy(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(rand_generator(self.chars, 1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_size(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(rand_generator(self.chars, 320))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_nbchunk(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.nbchunk', '42')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.size', '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_hash(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.hash', 'WRONG_HASH')

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.size', '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_path(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.path', 'WRONG_PATH')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_id(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.id', 'WRONG_ID')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_container(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.content.container',
                       self.bad_container_id)
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_hash(self):
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()
        self.chunk.md5 = self.hash_rand
        self.chunk_proxy['hash'] = self.chunk.md5
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_length(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_chunk_size(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_url(self):
        self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_content_bad_path(self):
        self.content.path = 'BAD_PATH'
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)
Exemplo n.º 27
0
class TestMeta2Indexing(BaseTestCase):
    def setUp(self):
        super(TestMeta2Indexing, self).setUp()
        self.rdir_client = RdirClient(self.conf)
        self.directory_client = DirectoryClient(self.conf)
        self.container_client = ContainerClient(self.conf)
        self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
        self.containers_svcs = {}
        self.event_agent_name = 'event-agent-1'

    def tearDown(self):
        super(TestMeta2Indexing, self).tearDown()
        self._containers_cleanup()
        self._service(self.event_agent_name, 'start', wait=3)

    def _containers_cleanup(self):
        for container in self.containers:
            self.container_client.container_delete(self.account, container)
            for svc in self.containers_svcs[container]:
                self.rdir_client.meta2_index_delete(
                    volume_id=svc['host'],
                    container_path="{0}/{1}/{2}".format(
                        self.ns, self.account, container),
                    container_id=cid_from_name(self.account, container))

    def _filter_by_managing_svc(self, all_containers, svc_of_interest):
        """
        Filters through the containers returning only those that have
        svc_of_interest in their list of managing services.
        """
        containers_list = []
        for key in all_containers.keys():
            if svc_of_interest in [x['host'] for x in all_containers[key]]:
                containers_list.append(key)

        return sorted(containers_list)

    def test_volume_indexing_worker(self):
        """
        Test steps:
        - Generate a list of container names and create them
        - Collect their respective meta2 servers
        - For each meta2 server:
            - Run a meta2 indexing worker
            - List all rdir index records and match then with the
              services we're expecting.
        :return:
        """
        self._service(self.event_agent_name, "stop", wait=3)

        for container in self.containers:
            self.container_client.container_create(account=self.account,
                                                   reference=container)

        for container in self.containers:
            self.containers_svcs[container] = [
                x
                for x in self.directory_client.list(account=self.account,
                                                    reference=container)['srv']
                if x['type'] == 'meta2'
            ]

        meta2_data_paths = {}
        for svc in self.conf['services']['meta2']:
            svc_host = svc.get('service_id', svc['addr'])
            meta2_data_paths[svc_host] = svc['path']

        distinct_meta2_servers = set()
        for svc_list in self.containers_svcs.values():
            for svc in svc_list:
                distinct_meta2_servers.add(svc['host'])

        for svc in distinct_meta2_servers:
            expected_containers = self._filter_by_managing_svc(
                self.containers_svcs, svc)
            worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf)
            worker.crawl_volume()
            indexed_containers = sorted([
                x['container_url'].split('/')[-1]
                for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc)
            ])

            for cont in expected_containers:
                self.assertIn(cont, indexed_containers)
Exemplo n.º 28
0
class TestRebuilderCrawler(BaseTestCase):
    def setUp(self):
        super(TestRebuilderCrawler, self).setUp()

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.gridconf = {"namespace": self.namespace}
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()

        self.container_name = "TestRebuilderCrawler%d" % int(time.time())
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)

    def _push_content(self, content):
        for c in content.chunks:
            self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data)

        self.container_client.content_create(acct=content.account,
                                             ref=content.container_name,
                                             path=content.content_name,
                                             size=content.size,
                                             checksum=content.hash,
                                             content_id=content.content_id,
                                             stgpol=content.stgpol,
                                             data=content.get_create_meta2())

    def tearDown(self):
        super(TestRebuilderCrawler, self).tearDown()

    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url and
                    c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))

    @unittest.skipIf(len(get_config()['rawx']) != 3,
                     "The number of rawx must be 3")
    def test_rebuild_no_spare(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "THREECOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)
        content.add_chunk(data, pos='0', rawx=2)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)

    def test_rebuild_upload_failed(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # Force upload to raise an exception
        with patch('oio.content.content.BlobClient') as MockClass:
            instance = MockClass.return_value
            instance.chunk_copy.side_effect = Exception("xx")
            self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                              content.container_id, content.content_id,
                              content.chunks[0].id)

    def test_rebuild_nonexistent_chunk(self):
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          64 * '0', 32 * '0', 64 * '0')

    def test_rebuild_orphan_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id, 64 * '0')

    def test_rebuild_with_no_copy(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "SINGLE")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild chunk without copy
        self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)
Exemplo n.º 29
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"})

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"})

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"})

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec, "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.p0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.0",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.p1",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.p1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.p0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        for cname in (
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
        ):
            content = self._new_content("SINGLE", "nobody cares", cname)
            try:
                self.assertEqual(cname, content.path)
            finally:
                pass  # TODO: delete the content
Exemplo n.º 30
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {
            "k": "6",
            "m": "2",
            "algo": "liber8tion"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "1",
            "distance": "0"
        })

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {
            "nb_copy": "2",
            "distance": "1"
        })

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec,
                          "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6012/A0A0",
                "pos": "0.p0", "size": 512,
                "hash": "E7D4E4AD460971CA2E3141F2102308D4"},
            {
                "url": "http://127.0.0.1:6010/A01",
                "pos": "0.1", "size": 146,
                "hash": "760AB5DA7C51A3654F1CA622687CD6C3"},
            {
                "url": "http://127.0.0.1:6011/A00",
                "pos": "0.0", "size": 512,
                "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"},
            {
                "url": "http://127.0.0.1:6013/A0A1",
                "pos": "0.p1", "size": 512,
                "hash": "DA9D7F72AEEA5791565724424CE45C16"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/A0",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"},
            {
                "url": "http://127.0.0.1:6011/A1",
                "pos": "0", "size": 658,
                "hash": "E952A419957A6E405BFC53EC65483F73"}
        ]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [
            {
                "url": "http://127.0.0.1:6010/0_p1",
                "pos": "0.p1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6011/0_p0",
                "pos": "0.p0", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6016/0_1",
                "pos": "0.1", "size": 1048576,
                "hash": "00000000000000000000000000000000"},
            {
                "url": "http://127.0.0.1:6017/0_0",
                "pos": "0.0", "size": 1048576,
                "hash": "00000000000000000000000000000000"}
        ]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi",
                                     1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data, path="titi"):
        old_content = self.content_factory.new(self.container_id, path,
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound, self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    def test_change_content_0_byte_policy_single_to_rain(self):
        self._test_change_policy(0, "SINGLE", "RAIN")

    def test_change_content_0_byte_policy_rain_to_twocopies(self):
        self._test_change_policy(0, "RAIN", "TWOCOPIES")

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        chunk_id = content.chunks.filter(metapos=0)[0].id
        chunk_url = content.chunks.filter(metapos=0)[0].url
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEquals(c.id, chunk_id)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk("SINGLE")

    def test_twocopies_move_chunk(self):
        self._test_move_chunk("TWOCOPIES")

    def test_rain_move_chunk(self):
        if len(self.conf['rawx']) < 9:
            self.skipTest("Need more than 8 rawx")
        self._test_move_chunk("RAIN")

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        strange_paths = [
                "Annual report.txt",
                "foo+bar=foobar.txt",
                "100%_bug_free.c",
                "forward/slash/allowed",
                "I\\put\\backslashes\\and$dollar$signs$in$file$names",
                "Je suis tombé sur la tête, mais ça va bien.",
                "%s%f%u%d%%",
                "carriage\rreturn",
                "line\nfeed",
                "ta\tbu\tla\ttion",
                "controlchars",
                ]
        answers = dict()
        for cname in strange_paths:
            content = self._new_content("SINGLE", "nobody cares", cname)
            answers[cname] = content
        listing = self.container_client.container_list(self.account,
                                                       self.container_name)
        obj_set = {k["name"].encode("utf8", "ignore")
                   for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)
        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except:
                    pass
Exemplo n.º 31
0
class TestStorageTierer(BaseTestCase):
    def setUp(self):
        super(TestStorageTierer, self).setUp()
        self.namespace = self.conf['namespace']
        self.test_account = "test_storage_tiering_%f" % time.time()
        self.gridconf = {
            "namespace": self.namespace,
            "container_fetch_limit": 2,
            "content_fetch_limit": 2,
            "account": self.test_account,
            "outdated_threshold": 0,
            "new_policy": "EC"
        }
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self._populate()

    def _populate(self):
        self.container_0_name = "container_empty"
        self.container_0_id = cid_from_name(self.test_account,
                                            self.container_0_name)
        self.container_client.container_create(account=self.test_account,
                                               reference=self.container_0_name)

        self.container_1_name = "container_with_1_content"
        self.container_1_id = cid_from_name(self.test_account,
                                            self.container_1_name)
        self.container_client.container_create(account=self.test_account,
                                               reference=self.container_1_name)
        self.container_1_content_0_name = "container_1_content_0"
        self.container_1_content_0 = self._new_content(
            self.container_1_id, self.container_1_content_0_name, "SINGLE",
            self.test_account, self.container_0_name)

        self.container_2_name = "container_with_2_contents"
        self.container_2_id = cid_from_name(self.test_account,
                                            self.container_2_name)
        self.container_client.container_create(account=self.test_account,
                                               reference=self.container_2_name)
        self.container_2_content_0_name = "container_2_content_0"
        self.container_2_content_0 = self._new_content(
            self.container_2_id, self.container_2_content_0_name, "SINGLE",
            self.test_account, self.container_0_name)
        self.container_2_content_1_name = "container_2_content_1"
        self.container_2_content_1 = self._new_content(
            self.container_2_id, self.container_2_content_1_name, "TWOCOPIES",
            self.test_account, self.container_0_name)

    def _new_content(self, container_id, content_name, stgpol, account,
                     reference):
        data = random_data(10)
        content = self.content_factory.new(container_id,
                                           content_name,
                                           len(data),
                                           stgpol,
                                           account=account,
                                           container_name=reference)

        content.create(BytesIO(data))
        return content

    def tearDown(self):
        super(TestStorageTierer, self).tearDown()

    def test_iter_container_list(self):
        worker = StorageTiererWorker(self.gridconf, Mock())
        api = ObjectStorageApi(self.namespace)
        actual = [x[0] for x in api.container_list(self.test_account)]
        if len(actual) < 3:
            print "Slow event propagation!"
            # account events have not yet propagated
            time.sleep(3.0)
            actual = [x[0] for x in api.container_list(self.test_account)[0]]
        gen = worker._list_containers()
        self.assertListEqual(list(gen), actual)

    def test_iter_content_list_outdated_threshold_0(self):
        self.gridconf["outdated_threshold"] = 0
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_9999999999(self):
        self.gridconf["outdated_threshold"] = 9999999999
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_outdated_threshold_2(self):
        # add a new content created after the three previous contents
        now = int(time.time())
        time.sleep(2)
        self._new_content(self.container_2_id, "titi", "TWOCOPIES",
                          self.test_account, self.container_2_name)

        self.gridconf["outdated_threshold"] = 2
        worker = StorageTiererWorker(self.gridconf, Mock())
        with mock.patch('oio.crawler.storage_tierer.time.time',
                        mock.MagicMock(return_value=now + 1)):
            gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_1_id, self.container_1_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_0.content_id))
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)

    def test_iter_content_list_skip_good_policy(self):
        self.gridconf["new_policy"] = "SINGLE"
        worker = StorageTiererWorker(self.gridconf, Mock())
        gen = worker._list_contents()
        self.assertEqual(
            gen.next(),
            (self.container_2_id, self.container_2_content_1.content_id))
        self.assertRaises(StopIteration, gen.next)
Exemplo n.º 32
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()

        self.wait_for_score(('meta2', ))
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.blob_client = BlobClient(conf=self.conf)
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"
        self.stgpol_ec = "EC"

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_get_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_ec,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.2",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.3",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        self.assertEqual(c.version, "1450176946676289")
        # TODO test storage method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[2])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_plain(self):
        meta = {
            "chunk_method": "plain/nb_copy=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash_method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime_type": "application/octet-stream",
            "name": "tox.ini",
            "policy": self.stgpol_twocopies,
            "version": "1450176946676289",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_locate = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id",
                                     "xxx_content_id",
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), PlainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(
            c.full_path,
            encode_fullpath(self.account, self.container_name, "tox.ini",
                            meta['version'], meta['id']))
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_get_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.get,
                          self.container_id, "1234")

    def test_new_ec(self):
        meta = {
            "chunk_method": "ec/algo=liberasurecode_rs_vand,k=6,m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash_method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime_type": "application/octet-stream",
            "name": "titi",
            "policy": self.stgpol_ec,
            "version": "1450341162332663",
            "oio_version": "4.2",
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.3",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.2",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id",
                                     "titi",
                                     1000,
                                     self.stgpol_ec,
                                     account=self.account,
                                     container_name=self.container_name)
        self.assertEqual(type(c), ECContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        # TODO test storage_method
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self,
                     stgpol,
                     data,
                     path="titi",
                     account=None,
                     container_name=None,
                     mime_type=None,
                     properties=None):
        old_content = self.content_factory.new(self.container_id,
                                               path,
                                               len(data),
                                               stgpol,
                                               account=account,
                                               container_name=container_name)
        if properties:
            old_content.properties = properties
        if mime_type:
            old_content.mime_type = mime_type
        old_content.create(BytesIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_move_chunk(self, policy):
        data = random_data(self.chunk_size)
        content = self._new_content(policy, data)

        mc = content.chunks.filter(metapos=0)
        chunk_id = mc[0].id
        chunk_url = mc[0].url
        chunk_host = mc[0].host
        chunk_meta, chunk_stream = self.blob_client.chunk_get(chunk_url)
        chunk_hash = md5_stream(chunk_stream)
        new_chunk = content.move_chunk(chunk_id, service_id=chunk_host)

        content_updated = self.content_factory.get(self.container_id,
                                                   content.content_id)

        hosts = []
        for c in content_updated.chunks.filter(metapos=0):
            self.assertThat(hosts, Not(Contains(c.host)))
            self.assertNotEqual(c.url, chunk_url)
            hosts.append(c.host)

        new_chunk_meta, new_chunk_stream = self.blob_client.chunk_get(
            new_chunk["url"])
        new_chunk_hash = md5_stream(new_chunk_stream)

        self.assertEqual(new_chunk_hash, chunk_hash)
        self.assertGreaterEqual(new_chunk_meta['chunk_mtime'],
                                chunk_meta['chunk_mtime'])

        del chunk_meta["chunk_id"]
        del new_chunk_meta["chunk_id"]
        del chunk_meta["chunk_mtime"]
        del new_chunk_meta["chunk_mtime"]
        self.assertEqual(new_chunk_meta, chunk_meta)

    def test_single_move_chunk(self):
        self._test_move_chunk(self.stgpol)

    def test_twocopies_move_chunk(self):
        self._test_move_chunk(self.stgpol_twocopies)

    @ec
    def test_ec_move_chunk(self):
        self._test_move_chunk(self.stgpol_ec)

    def test_move_chunk_not_in_content(self):
        data = random_data(self.chunk_size)
        content = self._new_content(self.stgpol_twocopies, data)
        with ExpectedException(OrphanChunk):
            content.move_chunk("1234")

    def test_strange_paths(self):
        answers = dict()
        for cname in strange_paths:
            content = self._new_content(self.stgpol, b"nobody cares", cname)
            answers[cname] = content

        _, listing = self.container_client.content_list(
            self.account, self.container_name)
        if PY2:
            obj_set = {k["name"].encode('utf-8') for k in listing["objects"]}
        else:
            obj_set = {k["name"] for k in listing["objects"]}
        try:
            # Ensure the saved path is the one we gave the object
            for cname in answers:
                self.assertEqual(cname, answers[cname].path)
                fullpath = encode_fullpath(self.account, self.container_name,
                                           cname, answers[cname].version,
                                           answers[cname].content_id)
                self.assertEqual(answers[cname].full_path, fullpath)
            # Ensure all objects appear in listing
            for cname in strange_paths:
                self.assertIn(cname, obj_set)

        finally:
            # Cleanup
            for cname in answers:
                try:
                    content.delete()
                except Exception:
                    pass
Exemplo n.º 33
0
class TestContentRebuildFilter(BaseTestCase):
    def setUp(self):
        super(TestContentRebuildFilter, self).setUp()
        self.namespace = self.conf['namespace']
        self.gridconf = {"namespace": self.namespace}
        self.container = "TestContentRebuildFilter%f" % time.time()
        self.ref = self.container
        self.container_client = ContainerClient(self.conf)
        self.container_client.container_create(self.account, self.container)
        syst = self.container_client.container_get_properties(
            self.account, self.container)['system']
        self.container_id = syst['sys.name'].split('.', 1)[0]
        self.object_storage_api = ObjectStorageApi(namespace=self.namespace)
        queue_addr = choice(self.conf['services']['beanstalkd'])['addr']
        self.queue_url = queue_addr
        self.conf['queue_url'] = 'beanstalk://' + self.queue_url
        self.conf['tube'] = DEFAULT_REBUILDER_TUBE
        self.notify_filter = NotifyFilter(app=_App, conf=self.conf)
        bt = Beanstalk.from_url(self.conf['queue_url'])
        bt.drain_tube(DEFAULT_REBUILDER_TUBE)
        bt.close()

    def _create_event(self, content_name, present_chunks, missing_chunks,
                      content_id):
        event = {}
        event["when"] = time.time()
        event["event"] = "storage.content.broken"
        event["data"] = {
            "present_chunks": present_chunks,
            "missing_chunks": missing_chunks
        }
        event["url"] = {
            "ns": self.namespace,
            "account": self.account,
            "user": self.container,
            "path": content_name,
            "id": self.container_id,
            "content": content_id
        }
        return event

    def _is_chunks_created(self, previous, after, pos_created):
        remain = list(after)
        for p in previous:
            for r in remain:
                if p["url"] == r["url"]:
                    remain.remove(r)
                    break
        if len(remain) != len(pos_created):
            return False
        for r in remain:
            if r["pos"] in pos_created:
                remain.remove(r)
            else:
                return False
        return True

    def _rebuild(self, event, job_id=0):
        self.blob_rebuilder = subprocess.Popen([
            'oio-blob-rebuilder', self.namespace,
            '--beanstalkd=' + self.queue_url
        ])
        time.sleep(3)
        self.blob_rebuilder.kill()

    def _remove_chunks(self, chunks, content_id):
        if not chunks:
            return
        for chunk in chunks:
            chunk['id'] = chunk['url']
            chunk['content'] = content_id
            chunk['type'] = 'chunk'
        self.container_client.container_raw_delete(self.account,
                                                   self.container,
                                                   data=chunks)

    def _check_rebuild(self,
                       content_name,
                       chunks,
                       missing_pos,
                       meta,
                       chunks_to_remove,
                       chunk_created=True):
        self._remove_chunks(chunks_to_remove, meta['id'])
        event = self._create_event(content_name, chunks, missing_pos,
                                   meta['id'])
        self.notify_filter.process(env=event, cb=None)
        self._rebuild(event)
        _, after = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        self.assertIs(chunk_created,
                      self._is_chunks_created(chunks, after, missing_pos))

    def test_nothing_missing(self):
        content_name = "test_nothing_missing"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)

        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = []
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=True)

    def test_missing_1_chunk(self):
        content_name = "test_missing_1_chunk"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_last_chunk(self):
        content_name = "test_missing_last_chunk"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_2_chunks(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 2):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_all_chunks(self):
        content_name = "test_missing_all_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="SINGLE",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_all_chunks_of_a_pos(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))

        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_multiple_chunks(self):
        content_name = "test_missing_multiple_chunks"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(9))
        chunks_to_remove.append(chunks.pop(6))
        chunks_to_remove.append(chunks.pop(4))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "1", "2", "3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec_2(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(3))
        chunks_to_remove.append(chunks.pop(5))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.5", "0.8"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3", "0.4"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)
Exemplo n.º 34
0
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.test_dir = self.conf['sds_path']

        rawx_num, rawx_path, rawx_addr = self.get_service_url('rawx')
        self.rawx = 'http://' + rawx_addr

        self.h = hashlib.new('md5')

        conf = {"namespace": self.namespace}
        self.auditor = BlobAuditorWorker(conf, get_logger(None), None)
        self.container_c = ContainerClient(conf)
        self.blob_c = BlobClient()

        self.ref = random_str(8)

        self.container_c.container_create(self.account, self.ref)

        self.url_rand = random_id(64)

        self.data = random_str(1280)
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()

        self.content = TestContent(random_str(6), len(self.data),
                                   self.url_rand, 1)

        self.content.id_container = cid_from_name(self.account,
                                                  self.ref).upper()
        self.chunk = TestChunk(self.content.size, self.url_rand, 0,
                               self.hash_rand)

        self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk)
        self.chunk_proxy = {
            "hash": self.chunk.md5,
            "pos": "0",
            "size": self.chunk.size,
            "url": self.chunk_url
        }

        chunk_meta = {
            'content_path': self.content.path,
            'container_id': self.content.id_container,
            'chunk_method': 'plain/nb_copy=3',
            'policy': 'TESTPOLICY',
            'id': '0000',
            'version': 1,
            'chunk_id': self.chunk.id_chunk,
            'chunk_pos': self.chunk.pos,
            'chunk_hash': self.chunk.md5,
            'full_path':
            ['%s/%s/%s' % (self.account, self.ref, self.content.path)],
            'oio_version': OIO_VERSION
        }
        self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data)

        self.chunk_path = self.test_dir + '/data/' + self.namespace + \
            '-rawx-1/' + self.chunk.id_chunk[0:3] + "/" + self.chunk.id_chunk
        self.bad_container_id = '0' * 64

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_c.content_delete(self.account, self.ref,
                                            self.content.path)
        except Exception:
            pass

        try:
            self.container_c.container_destroy(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk_path)
        except Exception:
            pass

    def init_content(self):
        self.container_c.content_create(self.account,
                                        self.ref,
                                        self.content.path,
                                        self.chunk.size,
                                        self.hash_rand,
                                        data=[self.chunk_proxy])

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk_path)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_container_deleted(self):
        self.container_c.container_delete(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_size(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(320))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path,
                       'user.' + chunk_xattr_keys['chunk_size'], '-1')

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_hash(self):
        self.init_content()
        xattr.setxattr(self.chunk_path,
                       'user.' + chunk_xattr_keys['chunk_hash'], 'WRONG_HASH')
        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_path(self):
        self.init_content()
        xattr.setxattr(self.chunk_path,
                       'user.' + chunk_xattr_keys['content_path'],
                       'WRONG_PATH')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_id(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.' + chunk_xattr_keys['chunk_id'],
                       'WRONG_ID')

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_content_container(self):
        self.init_content()
        xattr.setxattr(self.chunk_path,
                       'user.' + chunk_xattr_keys['container_id'],
                       self.bad_container_id)
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_xattr_bad_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, 'user.grid.chunk.position', '42')

        xattr.setxattr(self.chunk_path,
                       'user.' + chunk_xattr_keys['chunk_pos'], '42')
        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_hash(self):
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()
        self.chunk.md5 = self.hash_rand
        self.chunk_proxy['hash'] = self.chunk.md5
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_length(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_chunk_size(self):
        self.chunk.size = 320
        self.chunk_proxy['size'] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit,
                          self.chunk_path)

    def test_chunk_bad_url(self):
        self.chunk_proxy['url'] = '%s/WRONG_ID' % self.rawx
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit,
                          self.chunk_path)
Exemplo n.º 35
0
class TestAccountClient(BaseTestCase):
    def setUp(self):
        super(TestAccountClient, self).setUp()
        self.account_id = "test_account_%f" % time.time()

        self.account_client = AccountClient(self.conf)
        self.container_client = ContainerClient(self.conf)

        retry = 3
        for i in range(retry + 1):
            try:
                self.account_client.account_create(self.account_id)
                break
            except ClientException:
                if i < retry:
                    time.sleep(2)
                else:
                    raise
        self.container_client.container_create(account=self.account_id,
                                               reference="container1")
        self.container_client.container_create(account=self.account_id,
                                               reference="container2")
        time.sleep(.5)  # ensure container event have been processed

    def test_container_list(self):
        resp = self.account_client.container_list(self.account_id)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"],
                         [["container1", 0, 0, 0], ["container2", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id, limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container1", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id,
                                                  marker="container1",
                                                  limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [["container2", 0, 0, 0]])

        resp = self.account_client.container_list(self.account_id,
                                                  marker="container2",
                                                  limit=1)
        self.assertEquals(resp["containers"], 2)
        self.assertEqual(resp["listing"], [])

    # TODO: move this test somewhere under tests/unit/
    def test_account_service_refresh(self):
        self.account_client.endpoint = "126.0.0.1:6666"
        self.account_client._last_refresh = time.time()
        self.account_client._get_account_addr = Mock(
            return_value="126.0.0.1:6667")
        self.assertRaises(OioNetworkException,
                          self.account_client.account_list)
        self.account_client._get_account_addr.assert_called_once()
        self.assertIn("126.0.0.1:6667", self.account_client.endpoint)

    def test_container_reset(self):
        metadata = dict()
        metadata["mtime"] = time.time()
        metadata["bytes"] = 42
        metadata["objects"] = 12
        self.account_client.container_update(self.account_id,
                                             "container1",
                                             metadata=metadata)

        self.account_client.container_reset(self.account_id, "container1",
                                            time.time())
        resp = self.account_client.container_list(self.account_id,
                                                  prefix="container1")
        for container in resp["listing"]:
            name, nb_objects, nb_bytes, _ = container
            if name == 'container1':
                self.assertEqual(nb_objects, 0)
                self.assertEqual(nb_bytes, 0)
                return
        self.fail("No container container1")

    def test_account_refresh(self):
        metadata = dict()
        metadata["mtime"] = time.time()
        metadata["bytes"] = 42
        metadata["objects"] = 12
        self.account_client.container_update(self.account_id,
                                             "container1",
                                             metadata=metadata)

        self.account_client.account_refresh(self.account_id)

        resp = self.account_client.account_show(self.account_id)
        self.assertEqual(resp["bytes"], 42)
        self.assertEqual(resp["objects"], 12)

    def test_account_flush(self):
        metadata = dict()
        metadata["mtime"] = time.time()
        metadata["bytes"] = 42
        metadata["objects"] = 12
        self.account_client.container_update(self.account_id,
                                             "container1",
                                             metadata=metadata)

        self.account_client.account_flush(self.account_id)

        resp = self.account_client.account_show(self.account_id)
        self.assertEqual(resp["bytes"], 0)
        self.assertEqual(resp["objects"], 0)

        resp = self.account_client.container_list(self.account_id)
        self.assertEqual(len(resp["listing"]), 0)
Exemplo n.º 36
0
class TestDupContent(BaseTestCase):
    def setUp(self):
        super(TestDupContent, self).setUp()

        if len(self.conf['rawx']) < 3:
            self.skipTest("Not enough rawx. "
                          "Dup tests needs more than 2 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()
        self.container_name = "TestDupContent%f" % time.time()
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestDupContent, self).tearDown()

    def _test_upload(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), stgpol)
        self.assertEqual(type(content), DupContent)

        content.upload(StringIO.StringIO(data))

        meta, chunks = self.container_client.content_show(
            cid=self.container_id, content=content.content_id)
        chunks = ChunksHelper(chunks)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], "titi")

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        if stgpol == "THREECOPIES":
            nb_copy = 3
        elif stgpol == "TWOCOPIES":
            nb_copy = 2
        elif stgpol == "SINGLE":
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_size'], str(len(data)))
                self.assertEqual(meta['content_path'], "titi")
                self.assertEqual(meta['content_cid'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                self.assertEqual(meta['chunk_hash'], chunk_hash)

    def test_twocopies_upload_0_byte(self):
        self._test_upload("TWOCOPIES", 0)

    def test_twocopies_upload_1_byte(self):
        self._test_upload("TWOCOPIES", 1)

    def test_twocopies_upload_chunksize_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size)

    def test_twocopies_upload_chunksize_plus_1_bytes(self):
        self._test_upload("TWOCOPIES", self.chunk_size + 1)

    def test_single_upload_0_byte(self):
        self._test_upload("SINGLE", 0)

    def test_single_upload_chunksize_plus_1_bytes(self):
        self._test_upload("SINGLE", self.chunk_size + 1)

    def test_chunks_cleanup_when_upload_failed(self):
        data = random_data(2 * self.chunk_size)
        content = self.content_factory.new(self.container_id, "titi",
                                           len(data), "TWOCOPIES")
        self.assertEqual(type(content), DupContent)

        # set bad url for position 1
        for chunk in content.chunks.filter(pos=1):
            chunk.url = "http://127.0.0.1:9/DEADBEEF"

        self.assertRaises(Exception, content.upload, StringIO.StringIO(data))
        for chunk in content.chunks.exclude(pos=1):
            self.assertRaises(NotFound,
                              self.blob_client.chunk_head, chunk.url)

    def _new_content(self, stgpol, data, broken_pos_list):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        self.assertEqual(type(old_content), DupContent)

        old_content.upload(StringIO.StringIO(data))

        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_download(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content = self._new_content(stgpol, data, broken_pos_list)

        downloaded_data = "".join(content.download())

        self.assertEqual(downloaded_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_download_content_0_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 0, [])

    def test_twocopies_download_content_0_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 0, [(0, 0)])

    def test_twocopies_download_content_1_byte_without_broken_chunks(self):
        self._test_download("TWOCOPIES", 1, [])

    def test_twocopies_download_content_1_byte_with_broken_0_0(self):
        self._test_download("TWOCOPIES", 1, [(0, 0)])

    def test_twocopies_download_chunksize_bytes_without_broken_chunks(self):
        self._test_download("TWOCOPIES", self.chunk_size, [])

    def test_twocopies_download_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_download("TWOCOPIES", self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_download_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content = self._new_content("TWOCOPIES", data, [(0, 0), (0, 1)])
        gen = content.download()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_download_content_1_byte_without_broken_chunks(self):
        self._test_download("SINGLE", 1, [])

    def test_single_download_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_download("SINGLE", self.chunk_size * 2, [])
Exemplo n.º 37
0
class TestRebuilderCrawler(BaseTestCase):
    def setUp(self):
        super(TestRebuilderCrawler, self).setUp()

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']

        self.gridconf = {"namespace": self.namespace}
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient()

        self.container_name = "TestRebuilderCrawler%d" % int(time.time())
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)

    def _push_content(self, content):
        for c in content.chunks:
            self.blob_client.chunk_put(c.url, c.get_create_xattr(), c.data)

        self.container_client.content_create(acct=content.account,
                                             ref=content.container_name,
                                             path=content.content_name,
                                             size=content.size,
                                             checksum=content.hash,
                                             content_id=content.content_id,
                                             stgpol=content.stgpol,
                                             data=content.get_create_meta2())

    def tearDown(self):
        super(TestRebuilderCrawler, self).tearDown()

    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url
                    and c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))

    @unittest.skipIf(
        len(get_config()['rawx']) != 3, "The number of rawx must be 3")
    def test_rebuild_no_spare(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "THREECOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)
        content.add_chunk(data, pos='0', rawx=2)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        self.assertRaises(SpareChunkException, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)

    def test_rebuild_upload_failed(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # Force upload to raise an exception
        with patch('oio.content.content.BlobClient') as MockClass:
            instance = MockClass.return_value
            instance.chunk_copy.side_effect = Exception("xx")
            self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                              content.container_id, content.content_id,
                              content.chunks[0].id)

    def test_rebuild_nonexistent_chunk(self):
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild, 64 * '0',
                          32 * '0', 64 * '0')

    def test_rebuild_orphan_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild an nonexistant chunk
        self.assertRaises(OrphanChunk, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id, 64 * '0')

    def test_rebuild_with_no_copy(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "SINGLE")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        # try to rebuild chunk without copy
        self.assertRaises(UnrecoverableContent, rebuilder.chunk_rebuild,
                          content.container_id, content.content_id,
                          content.chunks[0].id)
Exemplo n.º 38
0
class TestContentFactory(BaseTestCase):
    def setUp(self):
        super(TestContentFactory, self).setUp()
        self.namespace = self.conf['namespace']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_name = "TestContentFactory%f" % time.time()
        self.container_client = ContainerClient(self.gridconf)
        self.container_client.container_create(acct=self.account,
                                               ref=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()

    def tearDown(self):
        super(TestContentFactory, self).tearDown()

    def test_extract_datasec(self):
        self.content_factory.ns_info = {
            "data_security": {
                "DUPONETWO": "DUP:distance=1|nb_copy=2",
                "RAIN": "RAIN:k=6|m=2|algo=liber8tion"
            },
            "storage_policy": {
                "RAIN": "NONE:RAIN:NONE",
                "SINGLE": "NONE:NONE:NONE",
                "TWOCOPIES": "NONE:DUPONETWO:NONE"
            }
        }

        ds_type, ds_args = self.content_factory._extract_datasec("RAIN")
        self.assertEqual(ds_type, "RAIN")
        self.assertEqual(ds_args, {"k": "6", "m": "2", "algo": "liber8tion"})

        ds_type, ds_args = self.content_factory._extract_datasec("SINGLE")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "1", "distance": "0"})

        ds_type, ds_args = self.content_factory._extract_datasec("TWOCOPIES")
        self.assertEqual(ds_type, "DUP")
        self.assertEqual(ds_args, {"nb_copy": "2", "distance": "1"})

        self.assertRaises(InconsistentContent,
                          self.content_factory._extract_datasec, "UnKnOwN")

    def test_get_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "RAIN",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6012/A0A0",
            "pos": "0.p0",
            "size": 512,
            "hash": "E7D4E4AD460971CA2E3141F2102308D4"
        }, {
            "url": "http://127.0.0.1:6010/A01",
            "pos": "0.1",
            "size": 146,
            "hash": "760AB5DA7C51A3654F1CA622687CD6C3"
        }, {
            "url": "http://127.0.0.1:6011/A00",
            "pos": "0.0",
            "size": 512,
            "hash": "B1D08B86B8CAA90A2092CCA0DF9201DB"
        }, {
            "url": "http://127.0.0.1:6013/A0A1",
            "pos": "0.p1",
            "size": 512,
            "hash": "DA9D7F72AEEA5791565724424CE45C16"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[2])
        self.assertEqual(c.chunks[1].raw(), chunks[1])
        self.assertEqual(c.chunks[2].raw(), chunks[0])
        self.assertEqual(c.chunks[3].raw(), chunks[3])

    def test_get_dup(self):
        meta = {
            "chunk-method": "plain/bytes",
            "ctime": "1450176946",
            "deleted": "False",
            "hash": "E952A419957A6E405BFC53EC65483F73",
            "hash-method": "md5",
            "id": "3FA2C4A1ED2605005335A276890EC458",
            "length": "658",
            "mime-type": "application/octet-stream",
            "name": "tox.ini",
            "policy": "TWOCOPIES",
            "version": "1450176946676289"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/A0",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }, {
            "url": "http://127.0.0.1:6011/A1",
            "pos": "0",
            "size": 658,
            "hash": "E952A419957A6E405BFC53EC65483F73"
        }]
        self.content_factory.container_client.content_show = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.get("xxx_container_id", "xxx_content_id")
        self.assertEqual(type(c), DupContent)
        self.assertEqual(c.content_id, "3FA2C4A1ED2605005335A276890EC458")
        self.assertEqual(c.length, 658)
        self.assertEqual(c.path, "tox.ini")
        self.assertEqual(c.version, "1450176946676289")
        self.assertEqual(c.nb_copy, 2)
        self.assertEqual(c.distance, 1)
        self.assertEqual(len(c.chunks), 2)
        self.assertEqual(c.chunks[0].raw(), chunks[0])
        self.assertEqual(c.chunks[1].raw(), chunks[1])

    def test_new_rain(self):
        meta = {
            "chunk-method": "plain/rain?algo=liber8tion&k=6&m=2",
            "ctime": "1450341162",
            "deleted": "False",
            "hash": "",
            "hash-method": "md5",
            "id": "F4B1C8DD132705007DE8B43D0709DAA2",
            "length": "1000",
            "mime-type": "application/octet-stream",
            "name": "titi",
            "policy": "RAIN",
            "version": "1450341162332663"
        }
        chunks = [{
            "url": "http://127.0.0.1:6010/0_p1",
            "pos": "0.p1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6011/0_p0",
            "pos": "0.p0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6016/0_1",
            "pos": "0.1",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }, {
            "url": "http://127.0.0.1:6017/0_0",
            "pos": "0.0",
            "size": 1048576,
            "hash": "00000000000000000000000000000000"
        }]
        self.content_factory.container_client.content_prepare = Mock(
            return_value=(meta, chunks))
        c = self.content_factory.new("xxx_container_id", "titi", 1000, "RAIN")
        self.assertEqual(type(c), RainContent)
        self.assertEqual(c.content_id, "F4B1C8DD132705007DE8B43D0709DAA2")
        self.assertEqual(c.length, 1000)
        self.assertEqual(c.path, "titi")
        self.assertEqual(c.version, "1450341162332663")
        self.assertEqual(c.algo, "liber8tion")
        self.assertEqual(c.k, 6)
        self.assertEqual(c.m, 2)
        self.assertEqual(len(c.chunks), 4)
        self.assertEqual(c.chunks[0].raw(), chunks[3])
        self.assertEqual(c.chunks[1].raw(), chunks[2])
        self.assertEqual(c.chunks[2].raw(), chunks[1])
        self.assertEqual(c.chunks[3].raw(), chunks[0])

    def _new_content(self, stgpol, data):
        old_content = self.content_factory.new(self.container_id, "titi",
                                               len(data), stgpol)
        old_content.upload(StringIO.StringIO(data))
        return self.content_factory.get(self.container_id,
                                        old_content.content_id)

    def _test_change_policy(self, data_size, old_policy, new_policy):
        if (old_policy == "RAIN" or new_policy == "RAIN") \
                and len(self.conf['rawx']) < 8:
            self.skipTest("RAIN: Need more than 8 rawx to run")

        data = random_data(data_size)
        obj_type = {
            "SINGLE": DupContent,
            "TWOCOPIES": DupContent,
            "THREECOPIES": DupContent,
            "RAIN": RainContent
        }
        old_content = self._new_content(old_policy, data)
        self.assertEqual(type(old_content), obj_type[old_policy])

        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, new_policy)

        self.assertRaises(NotFound,
                          self.container_client.content_show,
                          self.account,
                          cid=old_content.container_id,
                          content=old_content.content_id)

        new_content = self.content_factory.get(self.container_id,
                                               changed_content.content_id)
        self.assertEqual(type(new_content), obj_type[new_policy])

        downloaded_data = "".join(new_content.download())

        self.assertEqual(downloaded_data, data)

    # TODO add tests with RAIN empty contents when supported

    def test_change_content_1_byte_policy_single_to_rain(self):
        self._test_change_policy(1, "SINGLE", "RAIN")

    def test_change_content_chunksize_bytes_policy_twocopies_to_rain(self):
        self._test_change_policy(self.chunk_size, "TWOCOPIES", "RAIN")

    def test_change_content_2xchunksize_bytes_policy_threecopies_to_rain(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "RAIN")

    def test_change_content_1_byte_policy_rain_to_threecopies(self):
        self._test_change_policy(1, "RAIN", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_rain_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "RAIN", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_rain_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "RAIN", "SINGLE")

    def test_change_content_0_byte_policy_twocopies_to_threecopies(self):
        self._test_change_policy(0, "TWOCOPIES", "THREECOPIES")

    def test_change_content_chunksize_bytes_policy_single_to_twocopies(self):
        self._test_change_policy(self.chunk_size, "SINGLE", "TWOCOPIES")

    def test_change_content_2xchunksize_bytes_policy_3copies_to_single(self):
        self._test_change_policy(self.chunk_size * 2, "THREECOPIES", "SINGLE")

    def test_change_content_with_same_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        changed_content = self.content_factory.change_policy(
            old_content.container_id, old_content.content_id, "TWOCOPIES")
        self.assertEqual(old_content.content_id, changed_content.content_id)

    def test_change_policy_unknown_content(self):
        self.assertRaises(ContentNotFound, self.content_factory.change_policy,
                          self.container_id, "1234", "SINGLE")

    def test_change_policy_unknown_storage_policy(self):
        data = random_data(10)
        old_content = self._new_content("TWOCOPIES", data)
        self.assertRaises(ClientException, self.content_factory.change_policy,
                          self.container_id, old_content.content_id, "UnKnOwN")
Exemplo n.º 39
0
class ObjectStorageApi(object):
    """
    The Object Storage API.

    High level API that wraps `AccountClient`, `ContainerClient` and
    `DirectoryClient` classes.
    """
    def __init__(self, namespace, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        """
        self.namespace = namespace
        self.connection_timeout = utils.float_value(
            kwargs.get("connection_timeout"), None)
        self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None)
        self.write_timeout = utils.float_value(kwargs.get("write_timeout"),
                                               None)

        # FIXME: share session between all the clients
        self.directory = DirectoryClient({"namespace": self.namespace},
                                         **kwargs)
        self.account = AccountClient({"namespace": self.namespace}, **kwargs)
        self.container = ContainerClient({"namespace": self.namespace},
                                         **kwargs)

    def account_create(self, account, headers=None):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        return self.account.account_create(account, headers=headers)

    @handle_account_not_found
    def account_delete(self, account, headers=None):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account.account_delete(account, headers=headers)

    @handle_account_not_found
    def account_show(self, account, headers=None):
        """
        Get information about an account.
        """
        return self.account.account_show(account, headers=headers)

    def account_list(self, headers=None):
        """
        List accounts
        """
        return self.account.account_list(headers=headers)

    # FIXME:
    @handle_account_not_found
    def account_update(self, account, metadata, to_delete=None, headers=None):
        self.account.account_update(account,
                                    metadata,
                                    to_delete,
                                    headers=headers)

    @handle_account_not_found
    def account_set_properties(self, account, properties, headers=None):
        self.account_update(account, properties, headers=headers)

    @handle_account_not_found
    def account_del_properties(self, account, properties, headers=None):
        self.account_update(account, None, properties, headers=headers)

    def container_create(self,
                         account,
                         container,
                         properties=None,
                         headers=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        return self.container.container_create(account,
                                               container,
                                               properties=properties,
                                               headers=headers,
                                               autocreate=True,
                                               **kwargs)

    @handle_container_not_found
    def container_touch(self, account, container, headers=None, **kwargs):
        """
        Trigger a notification about the container state.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        self.container.container_touch(account,
                                       container,
                                       headers=headers,
                                       **kwargs)

    def container_create_many(self,
                              account,
                              containers,
                              properties=None,
                              headers=None,
                              **kwargs):
        """
        Create Many containers

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: `list`
        :param properties: properties to set on the containers
        :type properties: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        return self.container.container_create_many(account,
                                                    containers,
                                                    properties=properties,
                                                    headers=headers,
                                                    autocreate=True,
                                                    **kwargs)

    @handle_container_not_found
    def container_delete(self, account, container, headers=None, **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        self.container.container_delete(account,
                                        container,
                                        headers=headers,
                                        **kwargs)

    @handle_account_not_found
    def container_list(self,
                       account,
                       limit=None,
                       marker=None,
                       end_marker=None,
                       prefix=None,
                       delimiter=None,
                       headers=None):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        resp = self.account.container_list(account,
                                           limit=limit,
                                           marker=marker,
                                           end_marker=end_marker,
                                           prefix=prefix,
                                           delimiter=delimiter,
                                           headers=headers)
        return resp["listing"]

    @handle_container_not_found
    def container_show(self, account, container, headers=None):
        """
        Get information about a container (user properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" containing a `dict`
            of user properties.
        """
        return self.container.container_show(account,
                                             container,
                                             headers=headers)

    @handle_container_not_found
    def container_get_properties(self,
                                 account,
                                 container,
                                 properties=None,
                                 headers=None):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: *ignored*
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        return self.container.container_get_properties(account,
                                                       container,
                                                       properties=properties,
                                                       headers=headers)

    @handle_container_not_found
    def container_set_properties(self,
                                 account,
                                 container,
                                 properties=None,
                                 clear=False,
                                 headers=None,
                                 **kwargs):
        """
        Set properties on a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container where to set properties
        :type container: `str`
        :param properties: a dictionary of properties
        :type properties: `dict`
        :param clear:
        :type clear: `bool`
        :param headers: extra headers to pass to the proxy
        :type headers: `dict`
        :keyword system: dictionary of system properties to set
        """
        return self.container.container_set_properties(account,
                                                       container,
                                                       properties,
                                                       clear=clear,
                                                       headers=headers,
                                                       **kwargs)

    @handle_container_not_found
    def container_del_properties(self,
                                 account,
                                 container,
                                 properties,
                                 headers=None,
                                 **kwargs):
        return self.container.container_del_properties(account,
                                                       container,
                                                       properties,
                                                       headers=headers,
                                                       **kwargs)

    def container_update(self,
                         account,
                         container,
                         metadata,
                         clear=False,
                         headers=None):
        if not metadata:
            self.container_del_properties(account,
                                          container, [],
                                          headers=headers)
        else:
            self.container_set_properties(account,
                                          container,
                                          metadata,
                                          clear,
                                          headers=headers)

    @handle_container_not_found
    def object_create(self,
                      account,
                      container,
                      file_or_path=None,
                      data=None,
                      etag=None,
                      obj_name=None,
                      mime_type=None,
                      metadata=None,
                      policy=None,
                      headers=None,
                      key_file=None,
                      **_kwargs):
        """
        Create an object in *container* of *account* with data taken from
        either *data* (`str` or `generator`) or *file_or_path* (path to a file
        or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :param headers: extra headers to pass to the proxy
        :type headers: `dict`
        :keyword key_file:
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = utils.GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName("No name for the object has been specified")

        sysmeta = {'mime_type': mime_type, 'etag': etag}

        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()

        if src is data:
            return self._object_create(account,
                                       container,
                                       obj_name,
                                       BytesIO(data),
                                       sysmeta,
                                       properties=metadata,
                                       policy=policy,
                                       headers=headers,
                                       key_file=key_file)
        elif hasattr(file_or_path, "read"):
            return self._object_create(account,
                                       container,
                                       obj_name,
                                       src,
                                       sysmeta,
                                       properties=metadata,
                                       policy=policy,
                                       headers=headers,
                                       key_file=key_file)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(account,
                                           container,
                                           obj_name,
                                           f,
                                           sysmeta,
                                           properties=metadata,
                                           policy=policy,
                                           headers=headers,
                                           key_file=key_file)

    def object_touch(self, account, container, obj, headers=None, **kwargs):
        """
        Trigger a notification about an object
        (as if it just had been created).

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param obj: name of the object to touch
        :param headers: extra headers to pass to the proxy

        """
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        self.container.content_touch(account,
                                     container,
                                     obj,
                                     headers=headers,
                                     **kwargs)

    @handle_object_not_found
    def object_delete(self, account, container, obj, headers=None, **kwargs):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        return self.container.content_delete(account,
                                             container,
                                             obj,
                                             headers=headers,
                                             **kwargs)

    def object_delete_many(self,
                           account,
                           container,
                           objs,
                           headers=None,
                           **kwargs):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        return self.container.content_delete_many(account,
                                                  container,
                                                  objs,
                                                  headers=headers,
                                                  **kwargs)

    @handle_container_not_found
    def object_list(self,
                    account,
                    container,
                    limit=None,
                    marker=None,
                    delimiter=None,
                    prefix=None,
                    end_marker=None,
                    headers=None,
                    properties=False,
                    **kwargs):
        """
        Lists objects inside a container.

        :returns: a dict which contains
           * 'objects': the list of objects
           * 'prefixes': common prefixes (only if delimiter and prefix are set)
           * 'properties': a dict of container properties
           * 'system': system metadata
        """
        _, resp_body = self.container.content_list(account,
                                                   container,
                                                   limit=limit,
                                                   marker=marker,
                                                   end_marker=end_marker,
                                                   prefix=prefix,
                                                   delimiter=delimiter,
                                                   properties=properties,
                                                   headers=headers,
                                                   **kwargs)

        for obj in resp_body['objects']:
            mtype = obj.get('mime-type')
            if mtype:
                obj['mime_type'] = mtype
                del obj['mime-type']

        return resp_body

    # FIXME:
    @handle_object_not_found
    def object_locate(self, account, container, obj, headers=None):
        obj_meta, body = self.container.content_locate(account, container, obj)
        return obj_meta, body

    def object_analyze(self, *args, **kwargs):
        """
        :deprecated: use `object_locate`
        """
        return self.object_locate(*args, **kwargs)

    def object_fetch(self,
                     account,
                     container,
                     obj,
                     ranges=None,
                     headers=None,
                     key_file=None):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        meta, raw_chunks = self.object_locate(account,
                                              container,
                                              obj,
                                              headers=headers)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)
        meta['container_id'] = utils.name2cid(account, container).upper()
        meta['ns'] = self.namespace
        if storage_method.ec:
            stream = self._fetch_stream_ec(meta, chunks, ranges,
                                           storage_method, headers)
        elif storage_method.backblaze:
            stream = self._fetch_stream_backblaze(meta, chunks, ranges,
                                                  storage_method, key_file)
        else:
            stream = self._fetch_stream(meta, chunks, ranges, storage_method,
                                        headers)
        return meta, stream

    @handle_object_not_found
    def object_get_properties(self, account, container, obj, headers=None):
        return self.container.content_get_properties(account, container, obj)

    def object_show(self, account, container, obj, headers=None):
        """
        Get a description of the content along with its user properties.


        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :returns: a `dict` describing the object

        .. python::

            {'hash': '6BF60C17CC15EEA108024903B481738F',
             'ctime': '1481031763',
             'deleted': 'False',
             'properties': {
                 u'projet': u'OpenIO-SDS'},
             'length': '43518',
             'hash_method': 'md5',
             'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
             'version': '1481031762951972',
             'policy': 'EC',
             'id': '20BF2194FD420500CD4729AE0B5CBC07',
             'mime_type': 'application/octet-stream',
             'name': 'Makefile'}
        """
        return self.container.content_show(account,
                                           container,
                                           obj,
                                           headers=headers)

    def object_update(self,
                      account,
                      container,
                      obj,
                      metadata,
                      clear=False,
                      headers=None):
        if clear:
            self.object_del_properties(account,
                                       container,
                                       obj, [],
                                       headers=headers)
        if metadata:
            self.object_set_properties(account,
                                       container,
                                       obj,
                                       metadata,
                                       headers=headers)

    @handle_object_not_found
    def object_set_properties(self,
                              account,
                              container,
                              obj,
                              properties,
                              clear=False,
                              headers=None,
                              **kwargs):
        return self.container.content_set_properties(
            account,
            container,
            obj,
            properties={'properties': properties},
            headers=headers,
            **kwargs)

    @handle_object_not_found
    def object_del_properties(self,
                              account,
                              container,
                              obj,
                              properties,
                              headers=None,
                              **kwargs):
        return self.container.content_del_properties(account,
                                                     container,
                                                     obj,
                                                     properties=properties,
                                                     headers=headers,
                                                     **kwargs)

    # FIXME: remove and call self.container.content_prepare() directly
    def _content_prepare(self,
                         account,
                         container,
                         obj_name,
                         size,
                         policy=None,
                         headers=None):
        return self.container.content_prepare(account,
                                              container,
                                              obj_name,
                                              size,
                                              stgpol=policy,
                                              autocreate=True,
                                              headers=headers)

    def _content_preparer(self,
                          account,
                          container,
                          obj_name,
                          policy=None,
                          headers=None):
        # TODO: optimize by asking more than one metachunk at a time
        obj_meta, first_body = self.container.content_prepare(account,
                                                              container,
                                                              obj_name,
                                                              size=1,
                                                              stgpol=policy,
                                                              autocreate=True,
                                                              headers=headers)
        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])

        def _fix_mc_pos(chunks, mc_pos):
            for chunk in chunks:
                raw_pos = chunk["pos"].split(".")
                if storage_method.ec:
                    chunk['num'] = int(raw_pos[1])
                    chunk["pos"] = "%d.%d" % (mc_pos, chunk['num'])
                else:
                    chunk["pos"] = str(mc_pos)

        def _metachunk_preparer():
            mc_pos = 0
            _fix_mc_pos(first_body, mc_pos)
            yield first_body
            while True:
                mc_pos += 1
                _, next_body = self._content_prepare(account, container,
                                                     obj_name, 1, policy,
                                                     headers)
                _fix_mc_pos(next_body, mc_pos)
                yield next_body

        return obj_meta, _metachunk_preparer

    def _object_create(self,
                       account,
                       container,
                       obj_name,
                       source,
                       sysmeta,
                       properties=None,
                       policy=None,
                       headers=None,
                       key_file=None):
        obj_meta, chunk_prep = self._content_preparer(account,
                                                      container,
                                                      obj_name,
                                                      policy=policy,
                                                      headers=headers)
        obj_meta.update(sysmeta)
        obj_meta['content_path'] = obj_name
        obj_meta['container_id'] = utils.name2cid(account, container).upper()
        obj_meta['ns'] = self.namespace

        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])
        if storage_method.ec:
            handler = ECWriteHandler(
                source,
                obj_meta,
                chunk_prep,
                storage_method,
                headers=headers,
                write_timeout=self.write_timeout,
                read_timeout=self.read_timeout,
                connection_timeout=self.connection_timeout)
        elif storage_method.backblaze:
            backblaze_info = self._b2_credentials(storage_method, key_file)
            handler = BackblazeWriteHandler(source, obj_meta, chunk_prep,
                                            storage_method, headers,
                                            backblaze_info)
        else:
            handler = ReplicatedWriteHandler(
                source,
                obj_meta,
                chunk_prep,
                storage_method,
                headers=headers,
                write_timeout=self.write_timeout,
                read_timeout=self.read_timeout,
                connection_timeout=self.connection_timeout)

        final_chunks, bytes_transferred, content_checksum = handler.stream()

        etag = obj_meta.get('etag')
        if etag and etag.lower() != content_checksum.lower():
            raise exc.EtagMismatch("given etag %s != computed %s" %
                                   (etag, content_checksum))
        obj_meta['etag'] = content_checksum

        data = {'chunks': final_chunks, 'properties': properties or {}}
        # FIXME: we may just pass **obj_meta
        self.container.content_create(account,
                                      container,
                                      obj_name,
                                      size=bytes_transferred,
                                      checksum=content_checksum,
                                      data=data,
                                      content_id=obj_meta['id'],
                                      stgpol=obj_meta['policy'],
                                      version=obj_meta['version'],
                                      mime_type=obj_meta['mime_type'],
                                      chunk_method=obj_meta['chunk_method'],
                                      headers=headers)
        return final_chunks, bytes_transferred, content_checksum

    def _fetch_stream(self, meta, chunks, ranges, storage_method, headers):
        total_bytes = 0
        headers = headers or {}
        ranges = ranges or [(None, None)]

        meta_range_list = get_meta_ranges(ranges, chunks)

        for meta_range_dict in meta_range_list:
            for pos, meta_range in meta_range_dict.iteritems():
                meta_start, meta_end = meta_range
                if meta_start is not None and meta_end is not None:
                    headers['Range'] = http_header_from_ranges([meta_range])
                reader = io.ChunkReader(
                    iter(chunks[pos]),
                    io.READ_CHUNK_SIZE,
                    headers,
                    connection_timeout=self.connection_timeout,
                    response_timeout=self.read_timeout,
                    read_timeout=self.read_timeout)
                try:
                    it = reader.get_iter()
                except Exception as err:
                    raise exc.OioException(
                        "Error while downloading position %d: %s" % (pos, err))
                for part in it:
                    for d in part['iter']:
                        total_bytes += len(d)
                        yield d

    def _fetch_stream_ec(self, meta, chunks, ranges, storage_method, headers):
        ranges = ranges or [(None, None)]

        meta_range_list = get_meta_ranges(ranges, chunks)

        for meta_range_dict in meta_range_list:
            for pos, meta_range in meta_range_dict.iteritems():
                meta_start, meta_end = meta_range
                handler = ECChunkDownloadHandler(
                    storage_method,
                    chunks[pos],
                    meta_start,
                    meta_end,
                    headers,
                    connection_timeout=self.connection_timeout,
                    response_timeout=self.read_timeout,
                    read_timeout=self.read_timeout)
                stream = handler.get_stream()
                for part_info in stream:
                    for d in part_info['iter']:
                        yield d
                stream.close()

    def _b2_credentials(self, storage_method, key_file):
        key_file = key_file or '/etc/oio/sds/b2-appkey.conf'
        try:
            return BackblazeUtils.get_credentials(storage_method, key_file)
        except BackblazeUtilsException as err:
            raise exc.ConfigurationException(str(err))

    def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method,
                                key_file):
        backblaze_info = self._b2_credentials(storage_method, key_file)
        total_bytes = 0
        current_offset = 0
        size = None
        offset = 0
        for pos in range(len(chunks)):
            if ranges:
                offset = ranges[pos][0]
                size = ranges[pos][1]

            if size is None:
                size = int(meta["length"])
            chunk_size = int(chunks[pos][0]["size"])
            if total_bytes >= size:
                break
            if current_offset + chunk_size > offset:
                if current_offset < offset:
                    _offset = offset - current_offset
                else:
                    _offset = 0
                if chunk_size + total_bytes > size:
                    _size = size - total_bytes
                else:
                    _size = chunk_size
            handler = BackblazeChunkDownloadHandler(
                meta,
                chunks[pos],
                _offset,
                _size,
                backblaze_info=backblaze_info)
            stream = handler.get_stream()
            if not stream:
                raise exc.OioException("Error while downloading")
            total_bytes += len(stream)
            yield stream
            current_offset += chunk_size
Exemplo n.º 40
0
class TestPlainContent(BaseTestCase):
    def setUp(self):
        super(TestPlainContent, self).setUp()

        if len(self.conf['services']['rawx']) < 4:
            self.skipTest(
                "Plain tests needs more than 3 rawx to run")

        self.namespace = self.conf['namespace']
        self.account = self.conf['account']
        self.chunk_size = self.conf['chunk_size']
        self.gridconf = {"namespace": self.namespace}
        self.content_factory = ContentFactory(self.gridconf)
        self.container_client = ContainerClient(self.gridconf)
        self.blob_client = BlobClient(self.conf)
        self.container_name = "TestPlainContent-%f" % time.time()
        self.container_client.container_create(account=self.account,
                                               reference=self.container_name)
        self.container_id = cid_from_name(self.account,
                                          self.container_name).upper()
        self.content = random_str(64)
        self.stgpol = "SINGLE"
        self.stgpol_twocopies = "TWOCOPIES"
        self.stgpol_threecopies = "THREECOPIES"

    def _test_create(self, stgpol, data_size):
        data = random_data(data_size)
        content = self.content_factory.new(self.container_id, self.content,
                                           len(data), stgpol)

        content.create(BytesIO(data))

        meta, chunks = self.container_client.content_locate(
            cid=self.container_id, content=content.content_id)
        self.assertEqual(meta['hash'], md5_data(data))
        self.assertEqual(meta['length'], str(len(data)))
        self.assertEqual(meta['policy'], stgpol)
        self.assertEqual(meta['name'], self.content)

        metachunk_nb = int(math.ceil(float(len(data)) / self.chunk_size))
        if metachunk_nb == 0:
            metachunk_nb = 1  # special case for empty content

        chunks = ChunksHelper(chunks)

        # TODO NO NO NO
        if stgpol == self.stgpol_threecopies:
            nb_copy = 3
        elif stgpol == self.stgpol_twocopies:
            nb_copy = 2
        elif stgpol == self.stgpol:
            nb_copy = 1

        self.assertEqual(len(chunks), metachunk_nb * nb_copy)

        for pos in range(metachunk_nb):
            chunks_at_pos = chunks.filter(pos=pos)
            self.assertEqual(len(chunks_at_pos), nb_copy)

            data_begin = pos * self.chunk_size
            data_end = pos * self.chunk_size + self.chunk_size
            chunk_hash = md5_data(data[data_begin:data_end])

            for chunk in chunks_at_pos:
                meta, stream = self.blob_client.chunk_get(chunk.url)
                self.assertEqual(md5_stream(stream), chunk_hash)
                self.assertEqual(meta['content_path'], self.content)
                self.assertEqual(meta['container_id'], self.container_id)
                self.assertEqual(meta['content_id'], meta['content_id'])
                self.assertEqual(meta['chunk_id'], chunk.id)
                self.assertEqual(meta['chunk_pos'], str(pos))
                # Check that chunk data matches chunk hash from xattr
                self.assertEqual(meta['chunk_hash'], chunk_hash)
                # Check that chunk data matches chunk hash from database
                self.assertEqual(chunk.checksum, chunk_hash)
                full_path = encode_fullpath(
                    self.account, self.container_name, self.content,
                    meta['content_version'], meta['content_id'])
                self.assertEqual(meta['full_path'], full_path)
                self.assertEqual(meta['oio_version'], '4.2')

    def test_twocopies_create_0_byte(self):
        self._test_create(self.stgpol_twocopies, 0)

    def test_twocopies_create_1_byte(self):
        self._test_create(self.stgpol_twocopies, 1)

    def test_twocopies_create_chunksize_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size)

    def test_twocopies_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol_twocopies, self.chunk_size + 1)

    def test_twocopies_create_6294503_bytes(self):
        self._test_create(self.stgpol_twocopies, 6294503)

    def test_single_create_0_byte(self):
        self._test_create(self.stgpol, 0)

    def test_single_create_chunksize_plus_1_bytes(self):
        self._test_create(self.stgpol, self.chunk_size + 1)

    def _new_content(self, stgpol, data, broken_pos_list=[]):
        old_content = self.content_factory.new(
            self.container_id, self.content, len(data), stgpol)

        old_content.create(BytesIO(data))

        broken_chunks_info = {}
        for pos, idx in broken_pos_list:
            c = old_content.chunks.filter(pos=pos)[idx]
            meta, stream = self.blob_client.chunk_get(c.url)
            if pos not in broken_chunks_info:
                broken_chunks_info[pos] = {}
            broken_chunks_info[pos][idx] = {
                "url": c.url,
                "id": c.id,
                "hash": c.checksum,
                "dl_meta": meta,
                "dl_hash": md5_stream(stream)
            }
            self.blob_client.chunk_delete(c.url)

        # get the new structure of the uploaded content
        return (self.content_factory.get(
            self.container_id, old_content.content_id), broken_chunks_info)

    def _rebuild_and_check(self, content, broken_chunks_info, full_rebuild_pos,
                           allow_frozen_container=False):
        rebuild_pos, rebuild_idx = full_rebuild_pos
        rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
        content.rebuild_chunk(rebuild_chunk_info["id"],
                              allow_frozen_container=allow_frozen_container)

        # get the new structure of the content
        rebuilt_content = self.content_factory.get(self.container_id,
                                                   content.content_id)

        # find the rebuilt chunk
        for c in rebuilt_content.chunks.filter(pos=rebuild_pos):
            if len(content.chunks.filter(id=c.id)) > 0:
                # not the rebuilt chunk
                # if this chunk is broken, it must not have been rebuilt
                for b_c_i in broken_chunks_info[rebuild_pos].values():
                    if c.id == b_c_i["id"]:
                        with ExpectedException(NotFound):
                            _, _ = self.blob_client.chunk_get(c.url)
                continue
            meta, stream = self.blob_client.chunk_get(c.url)
            self.assertEqual(meta["chunk_id"], c.id)
            self.assertEqual(md5_stream(stream),
                             rebuild_chunk_info["dl_hash"])
            self.assertEqual(c.checksum, rebuild_chunk_info["hash"])
            self.assertThat(c.url, NotEquals(rebuild_chunk_info["url"]))
            del meta["chunk_id"]
            del rebuild_chunk_info["dl_meta"]["chunk_id"]
            self.assertEqual(meta, rebuild_chunk_info["dl_meta"])

    def _test_rebuild(self, stgpol, data_size, broken_pos_list,
                      full_rebuild_pos):
        data = random_data(data_size)
        content, broken_chunks_info = self._new_content(
            stgpol, data, broken_pos_list)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos)

    def test_2copies_content_0_byte_1broken_rebuild_pos_0_idx_0(self):
        self._test_rebuild(self.stgpol_twocopies, 0, [(0, 0)], (0, 0))

    def test_2copies_content_1_byte_1broken_rebuild_pos_0_idx_1(self):
        self._test_rebuild(self.stgpol_twocopies, 1, [(0, 1)], (0, 1))

    def test_3copies_content_chunksize_bytes_2broken_rebuild_pos_0_idx_1(self):
        if len(self.conf['services']['rawx']) <= 3:
            self.skipTest("Need more than 3 rawx")
        self._test_rebuild(self.stgpol_threecopies, self.chunk_size,
                           [(0, 0), (0, 1)], (0, 1))

    def test_3copies_content_2xchksize_bytes_2broken_rebuild_pos_1_idx_2(self):
        self._test_rebuild(self.stgpol_threecopies, 2 * self.chunk_size,
                           [(1, 0), (1, 2)], (1, 2))

    def test_2copies_content_0_byte_2broken_rebuild_pos_0_idx_0(self):
        with ExpectedException(UnrecoverableContent):
            self._test_rebuild(
                self.stgpol_twocopies, 0, [(0, 0), (0, 1)], (0, 0))

    def test_rebuild_chunk_in_frozen_container(self):
        data = random_data(self.chunk_size)
        content, broken_chunks_info = self._new_content(
            self.stgpol_twocopies, data, [(0, 0)])
        system = dict()
        system['sys.status'] = str(OIO_DB_FROZEN)
        self.container_client.container_set_properties(
            self.account, self.container_name, None, system=system)

        try:
            full_rebuild_pos = (0, 0)
            rebuild_pos, rebuild_idx = full_rebuild_pos
            rebuild_chunk_info = broken_chunks_info[rebuild_pos][rebuild_idx]
            self.assertRaises(ServiceBusy,
                              content.rebuild_chunk, rebuild_chunk_info["id"])
        finally:
            system['sys.status'] = str(OIO_DB_ENABLED)
            self.container_client.container_set_properties(
                self.account, self.container_name, None, system=system)

        self._rebuild_and_check(content, broken_chunks_info, full_rebuild_pos,
                                allow_frozen_container=True)

    def _test_fetch(self, stgpol, data_size, broken_pos_list):
        data = random_data(data_size)
        content, _ = self._new_content(stgpol, data, broken_pos_list)

        fetched_data = "".join(content.fetch())

        self.assertEqual(fetched_data, data)

        for pos, idx in broken_pos_list:
            # check nothing has been rebuilt
            c = content.chunks.filter(pos=pos)[0]
            self.assertRaises(NotFound, self.blob_client.chunk_delete, c.url)

    def test_twocopies_fetch_content_0_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 0, [])

    def test_twocopies_fetch_content_0_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 0, [(0, 0)])

    def test_twocopies_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, 1, [])

    def test_twocopies_fetch_content_1_byte_with_broken_0_0(self):
        self._test_fetch(self.stgpol_twocopies, 1, [(0, 0)])

    def test_twocopies_fetch_chunksize_bytes_without_broken_chunks(self):
        self._test_fetch(self.stgpol_twocopies, self.chunk_size, [])

    def test_twocopies_fetch_2xchuksize_bytes_with_broken_0_0_and_1_0(self):
        self._test_fetch(
            self.stgpol_twocopies, self.chunk_size * 2, [(0, 0), (1, 0)])

    def test_twocopies_fetch_content_chunksize_bytes_2_broken_chunks(self):
        data = random_data(self.chunk_size)
        content, _ = self._new_content(
            self.stgpol_twocopies, data, [(0, 0), (0, 1)])
        gen = content.fetch()
        self.assertRaises(UnrecoverableContent, gen.next)

    def test_single_fetch_content_1_byte_without_broken_chunks(self):
        self._test_fetch(self.stgpol, 1, [])

    def test_single_fetch_chunksize_bytes_plus_1_without_broken_chunk(self):
        self._test_fetch(self.stgpol, self.chunk_size * 2, [])
Exemplo n.º 41
0
class TestBlobAuditorFunctional(BaseTestCase):
    def setUp(self):
        super(TestBlobAuditorFunctional, self).setUp()
        self.namespace = self.conf["namespace"]
        self.account = self.conf["account"]

        self.test_dir = self.conf["sds_path"]

        rawx_num, rawx_path, rawx_addr = self.get_service_url("rawx")
        self.rawx = "http://" + rawx_addr

        self.h = hashlib.new("md5")

        conf = {"namespace": self.namespace}
        self.auditor = BlobAuditorWorker(conf, get_logger(None), None)
        self.container_c = ContainerClient(conf)
        self.blob_c = BlobClient()

        self.ref = random_str(8)

        self.container_c.container_create(self.account, self.ref)

        self.url_rand = random_id(64)

        self.data = random_str(1280)
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()

        self.content = TestContent(random_str(6), len(self.data), self.url_rand, 1)

        self.content.id_container = cid_from_name(self.account, self.ref).upper()
        self.chunk = TestChunk(self.content.size, self.url_rand, 0, self.hash_rand)

        self.chunk_url = "%s/%s" % (self.rawx, self.chunk.id_chunk)
        self.chunk_proxy = {"hash": self.chunk.md5, "pos": "0", "size": self.chunk.size, "url": self.chunk_url}

        chunk_meta = {
            "content_path": self.content.path,
            "container_id": self.content.id_container,
            "chunk_method": "plain/nb_copy=3",
            "policy": "TESTPOLICY",
            "id": "0000",
            "version": 1,
            "chunk_id": self.chunk.id_chunk,
            "chunk_pos": self.chunk.pos,
            "chunk_hash": self.chunk.md5,
        }
        self.blob_c.chunk_put(self.chunk_url, chunk_meta, self.data)

        self.chunk_path = (
            self.test_dir
            + "/data/"
            + self.namespace
            + "-rawx-1/"
            + self.chunk.id_chunk[0:3]
            + "/"
            + self.chunk.id_chunk
        )
        self.bad_container_id = "0" * 64

    def tearDown(self):
        super(TestBlobAuditorFunctional, self).tearDown()

        try:
            self.container_c.content_delete(self.account, self.ref, self.content.path)
        except Exception:
            pass

        try:
            self.container_c.container_destroy(self.account, self.ref)
        except Exception:
            pass

        try:
            os.remove(self.chunk_path)
        except Exception:
            pass

    def init_content(self):
        self.container_c.content_create(
            self.account, self.ref, self.content.path, self.chunk.size, self.hash_rand, data=[self.chunk_proxy]
        )

    def test_chunk_audit(self):
        self.init_content()
        self.auditor.chunk_audit(self.chunk_path)

    def test_content_deleted(self):
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_container_deleted(self):
        self.container_c.container_destroy(self.account, self.ref)

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_corrupted(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(1280))

        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_bad_size(self):
        self.init_content()
        with open(self.chunk_path, "w") as f:
            f.write(random_str(320))

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_chunk_size(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["chunk_size"], "-1")

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_chunk_hash(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["chunk_hash"], "WRONG_HASH")
        self.assertRaises(exc.CorruptedChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_content_path(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["content_path"], "WRONG_PATH")

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_chunk_id(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["chunk_id"], "WRONG_ID")

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_content_container(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["container_id"], self.bad_container_id)
        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_xattr_bad_chunk_position(self):
        self.init_content()
        xattr.setxattr(self.chunk_path, "user.grid.chunk.position", "42")

        xattr.setxattr(self.chunk_path, "user." + chunk_xattr_keys["chunk_pos"], "42")
        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_bad_hash(self):
        self.h.update(self.data)
        self.hash_rand = self.h.hexdigest().lower()
        self.chunk.md5 = self.hash_rand
        self.chunk_proxy["hash"] = self.chunk.md5
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_bad_length(self):
        self.chunk.size = 320
        self.chunk_proxy["size"] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_bad_chunk_size(self):
        self.chunk.size = 320
        self.chunk_proxy["size"] = self.chunk.size
        self.init_content()

        self.assertRaises(exc.FaultyChunk, self.auditor.chunk_audit, self.chunk_path)

    def test_chunk_bad_url(self):
        self.chunk_proxy["url"] = "%s/WRONG_ID" % self.rawx
        self.init_content()

        self.assertRaises(exc.OrphanChunk, self.auditor.chunk_audit, self.chunk_path)