Exemplo n.º 1
0
def get_to_final_state():
    replicators = Manager(["account-replicator", "container-replicator", "object-replicator"])
    replicators.stop()
    updaters = Manager(["container-updater", "object-updater"])
    updaters.stop()

    replicators.once()
    updaters.once()
    replicators.once()
Exemplo n.º 2
0
Arquivo: common.py Projeto: 701/swift
def get_to_final_state():
    replicators = Manager(['account-replicator', 'container-replicator',
                           'object-replicator'])
    replicators.stop()
    updaters = Manager(['container-updater', 'object-updater'])
    updaters.stop()

    replicators.once()
    updaters.once()
    replicators.once()
Exemplo n.º 3
0
def get_to_final_state():
    replicators = Manager(
        ['account-replicator', 'container-replicator', 'object-replicator'])
    replicators.stop()
    updaters = Manager(['container-updater', 'object-updater'])
    updaters.stop()

    replicators.once()
    updaters.once()
    replicators.once()
Exemplo n.º 4
0
class TestObjectExpirer(ReplProbeTest):

    def setUp(self):
        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise unittest.SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        super(TestObjectExpirer, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def _check_obj_in_container_listing(self):
        for obj in self.client.iter_objects(self.account,
                                            self.container_name):

            if self.object_name == obj['name']:
                return True

        return False

    @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy")
    def test_expirer_object_split_brain(self):
        old_policy = random.choice(ENABLED_POLICIES)
        wrong_policy = random.choice([p for p in ENABLED_POLICIES
                                      if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        self.get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assertIn('x-backend-timestamp', metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirer again after replication
        self.expirer.once()

        # object is not in the listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in ENABLED_POLICIES:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name,
                acceptable_statuses=(4,),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assertIn('x-backend-timestamp', metadata)
                self.assertGreater(Timestamp(metadata['x-backend-timestamp']),
                                   create_timestamp)

    def test_expirer_doesnt_make_async_pendings(self):
        # The object expirer cleans up its own queue. The inner loop
        # basically looks like this:
        #
        #    for obj in stuff_to_delete:
        #        delete_the_object(obj)
        #        remove_the_queue_entry(obj)
        #
        # By default, upon receipt of a DELETE request for an expiring
        # object, the object servers will create async_pending records to
        # clean the expirer queue. Since the expirer cleans its own queue,
        # this is unnecessary. The expirer can make requests in such a way
        # tha the object server does not write out any async pendings; this
        # test asserts that this is the case.

        # Make an expiring object in each policy
        for policy in ENABLED_POLICIES:
            container_name = "expirer-test-%d" % policy.idx
            container_headers = {'X-Storage-Policy': policy.name}
            client.put_container(self.url, self.token, container_name,
                                 headers=container_headers)

            now = time.time()
            delete_at = int(now + 2.0)
            client.put_object(
                self.url, self.token, container_name, "some-object",
                headers={'X-Delete-At': str(delete_at),
                         'X-Timestamp': Timestamp(now).normal},
                contents='dontcare')

        time.sleep(2.0)
        # make sure auto-created expirer-queue containers get in the account
        # listing so the expirer can find them
        Manager(['container-updater']).once()

        # Make sure there's no async_pendings anywhere. Probe tests only run
        # on single-node installs anyway, so this set should be small enough
        # that an exhaustive check doesn't take too long.
        all_obj_nodes = self.get_all_object_nodes()
        pendings_before = self.gather_async_pendings(all_obj_nodes)

        # expire the objects
        Manager(['object-expirer']).once()
        pendings_after = self.gather_async_pendings(all_obj_nodes)
        self.assertEqual(pendings_after, pendings_before)

    def test_expirer_object_should_not_be_expired(self):

        # Current object-expirer checks the correctness via x-if-delete-at
        # header that it can be deleted by expirer. If there are objects
        # either which doesn't have x-delete-at header as metadata or which
        # has different x-delete-at value from x-if-delete-at value,
        # object-expirer's delete will fail as 412 PreconditionFailed.
        # However, if some of the objects are in handoff nodes, the expirer
        # can put the tombstone with the timestamp as same as x-delete-at and
        # the object consistency will be resolved as the newer timestamp will
        # be winner (in particular, overwritten case w/o x-delete-at). This
        # test asserts such a situation that, at least, the overwriten object
        # which have larger timestamp than the original expirered date should
        # be safe.

        def put_object(headers):
            # use internal client to PUT objects so that X-Timestamp in headers
            # is effective
            headers['Content-Length'] = '0'
            path = self.client.make_path(
                self.account, self.container_name, self.object_name)
            try:
                self.client.make_request('PUT', path, headers, (2,))
            except UnexpectedResponse as e:
                self.fail(
                    'Expected 201 for PUT object but got %s' % e.resp.status)

        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated)
        #   < T(expirer_executed)
        # Recreated obj should be appeared in any split brain case

        obj_brain.put_container()

        # T(obj_deleted with x-delete-at)
        # object-server accepts req only if X-Delete-At is later than 'now'
        # so here, T(obj_created) < T(obj_deleted with x-delete-at)
        now = time.time()
        delete_at = int(now + 2.0)
        recreate_at = delete_at + 1.0
        put_object(headers={'X-Delete-At': str(delete_at),
                            'X-Timestamp': Timestamp(now).normal})

        # some object servers stopped to make a situation that the
        # object-expirer can put tombstone in the primary nodes.
        obj_brain.stop_primary_half()

        # increment the X-Timestamp explicitly
        # (will be T(obj_deleted with x-delete-at) < T(obj_recreated))
        put_object(headers={'X-Object-Meta-Expired': 'False',
                            'X-Timestamp': Timestamp(recreate_at).normal})

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # sanity, the newer object is still there
        try:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name)
        except UnexpectedResponse as e:
            self.fail(
                'Expected 200 for HEAD object but got %s' % e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

        # some object servers recovered
        obj_brain.start_primary_half()

        # sleep until after recreated_at
        while time.time() <= recreate_at:
            time.sleep(0.1)
        # Now, expirer runs at the time after obj is recreated
        self.expirer.once()

        # verify that original object was deleted by expirer
        obj_brain.stop_handoff_half()
        try:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name,
                acceptable_statuses=(4,))
        except UnexpectedResponse as e:
            self.fail(
                'Expected 404 for HEAD object but got %s' % e.resp.status)
        obj_brain.start_handoff_half()

        # and inconsistent state of objects is recovered by replicator
        Manager(['object-replicator']).once()

        # check if you can get recreated object
        try:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name)
        except UnexpectedResponse as e:
            self.fail(
                'Expected 200 for HEAD object but got %s' % e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

    def _test_expirer_delete_outdated_object_version(self, object_exists):
        # This test simulates a case where the expirer tries to delete
        # an outdated version of an object.
        # One case is where the expirer gets a 404, whereas the newest version
        # of the object is offline.
        # Another case is where the expirer gets a 412, since the old version
        # of the object mismatches the expiration time sent by the expirer.
        # In any of these cases, the expirer should retry deleting the object
        # later, for as long as a reclaim age has not passed.
        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        obj_brain.put_container()

        if object_exists:
            obj_brain.put_object()

        # currently, the object either doesn't exist, or does not have
        # an expiration

        # stop primary servers and put a newer version of the object, this
        # time with an expiration. only the handoff servers will have
        # the new version
        obj_brain.stop_primary_half()
        now = time.time()
        delete_at = int(now + 2.0)
        obj_brain.put_object({'X-Delete-At': str(delete_at)})

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()

        # update object record in the container listing
        Manager(['container-replicator']).once()

        # take handoff servers down, and bring up the outdated primary servers
        obj_brain.start_primary_half()
        obj_brain.stop_handoff_half()

        # wait until object expiration time
        while time.time() <= delete_at:
            time.sleep(0.1)

        # run expirer against the outdated servers. it should fail since
        # the outdated version does not match the expiration time
        self.expirer.once()

        # bring all servers up, and run replicator to update servers
        obj_brain.start_handoff_half()
        Manager(['object-replicator']).once()

        # verify the deletion has failed by checking the container listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # run expirer again, delete should now succeed
        self.expirer.once()

        # verify the deletion by checking the container listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

    def test_expirer_delete_returns_outdated_404(self):
        self._test_expirer_delete_outdated_object_version(object_exists=False)

    def test_expirer_delete_returns_outdated_412(self):
        self._test_expirer_delete_outdated_object_version(object_exists=True)
Exemplo n.º 5
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def setUp(self):
        p = Popen("resetswift 2>&1", shell=True, stdout=PIPE)
        stdout, _stderr = p.communicate()
        print stdout
        Manager(['all']).stop()
        self.pids = {}
        try:
            self.account_ring = get_ring(
                'account',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices)
            self.container_ring = get_ring(
                'container',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(
                self.policy.ring_name,
                self.obj_required_replicas,
                self.obj_required_devices,
                server='object')
            Manager(['main']).start(wait=False)
            self.port2server = {}
            for server, port in [('account', 6002), ('container', 6001),
                                 ('object', 6000)]:
                for number in xrange(1, 9):
                    self.port2server[port + (number * 10)] = \
                        '%s%d' % (server, number)
            for port in self.port2server:
                check_server(port, self.port2server, self.pids)
            self.port2server[8080] = 'proxy'
            self.url, self.token, self.account = \
                check_server(8080, self.port2server, self.pids)
            self.configs = defaultdict(dict)
            for name in ('account', 'container', 'object'):
                for server_name in (name, '%s-replicator' % name):
                    for server in Manager([server_name]):
                        for i, conf in enumerate(server.conf_files(), 1):
                            self.configs[server.server][i] = conf
            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
            self.server_port_to_conf = {}
            # get some configs backend daemon configs loaded up
            for server in ('account', 'container', 'object'):
                self.server_port_to_conf[server] = build_port_to_conf(server)
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        conf = self.server_port_to_conf[server][node['port']]
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            node['port'], self.port2server)
        return config_number

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()
Exemplo n.º 6
0
class TestReconstructorRevert(ECProbeTest):
    def setUp(self):
        super(TestReconstructorRevert, self).setUp()
        self.container_name = "container-%s" % uuid.uuid4()
        self.object_name = "object-%s" % uuid.uuid4()

        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(
            self.url, self.token, self.container_name, self.object_name, resp_chunk_size=64 * 2 ** 10
        )
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {"X-Backend-Storage-Policy-Index": int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node,
            part,
            self.account,
            self.container_name,
            self.object_name,
            headers=req_headers,
            resp_chunk_size=64 * 2 ** 20,
        )
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def test_revert_object(self):
        # create EC container
        headers = {"X-Storage-Policy": self.policy.name}
        client.put_container(self.url, self.token, self.container_name, headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)

        # kill 2 a parity count number of primary nodes so we can
        # force data onto handoffs, we do that by renaming dev dirs
        # to induce 507
        p_dev1 = self.device_dir("object", onodes[0])
        p_dev2 = self.device_dir("object", onodes[1])
        self.kill_drive(p_dev1)
        self.kill_drive(p_dev2)

        # PUT object
        contents = Body()
        headers = {"x-object-meta-foo": "meta-foo"}
        headers_post = {"x-object-meta-bar": "meta-bar"}
        client.put_object(
            self.url, self.token, self.container_name, self.object_name, contents=contents, headers=headers
        )
        client.post_object(self.url, self.token, self.container_name, self.object_name, headers=headers_post)
        del headers_post["X-Auth-Token"]  # WTF, where did this come from?

        # these primaries can't servce the data any more, we expect 507
        # here and not 404 because we're using mount_check to kill nodes
        for onode in (onodes[0], onodes[1]):
            try:
                self.direct_get(onode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 507)
            else:
                self.fail("Node data on %r was not fully destoryed!" % (onode,))

        # now take out another primary
        p_dev3 = self.device_dir("object", onodes[2])
        self.kill_drive(p_dev3)

        # this node can't servce the data any more
        try:
            self.direct_get(onodes[2], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 507)
        else:
            self.fail("Node data on %r was not fully destoryed!" % (onode,))

        # make sure we can still GET the object and its correct
        # we're now pulling from handoffs and reconstructing
        etag = self.proxy_get()
        self.assertEqual(etag, contents.etag)

        # rename the dev dirs so they don't 507 anymore
        self.revive_drive(p_dev1)
        self.revive_drive(p_dev2)
        self.revive_drive(p_dev3)

        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode["port"] - 6000) / 10
            self.reconstructor.once(number=hnode_id)

        # first threee primaries have data again
        for onode in (onodes[0], onodes[2]):
            self.direct_get(onode, opart)

        # check meta
        meta = client.head_object(self.url, self.token, self.container_name, self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])

        # handoffs are empty
        for hnode in hnodes:
            try:
                self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
            else:
                self.fail("Node data on %r was not fully destoryed!" % (hnode,))

    def test_delete_propogate(self):
        # create EC container
        headers = {"X-Storage-Policy": self.policy.name}
        client.put_container(self.url, self.token, self.container_name, headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)
        p_dev2 = self.device_dir("object", onodes[1])

        # PUT object
        contents = Body()
        client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents)

        # now lets shut one down
        self.kill_drive(p_dev2)

        # delete on the ones that are left
        client.delete_object(self.url, self.token, self.container_name, self.object_name)

        # spot check a node
        try:
            self.direct_get(onodes[0], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail("Node data on %r was not fully destoryed!" % (onodes[0],))

        # enable the first node again
        self.revive_drive(p_dev2)

        # propogate the delete...
        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode["port"] - 6000) / 10
            self.reconstructor.once(number=hnode_id)

        # check the first node to make sure its gone
        try:
            self.direct_get(onodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail("Node data on %r was not fully destoryed!" % (onodes[0]))

        # make sure proxy get can't find it
        try:
            self.proxy_get()
        except Exception as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail("Node data on %r was not fully destoryed!" % (onodes[0]))

    def test_reconstruct_from_reverted_fragment_archive(self):
        headers = {"X-Storage-Policy": self.policy.name}
        client.put_container(self.url, self.token, self.container_name, headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail("ring balancing did not use all available nodes")
        primary_node = node_list[0]

        # ... and 507 it's device
        primary_device = self.device_dir("object", primary_node)
        self.kill_drive(primary_device)

        # PUT object
        contents = Body()
        etag = client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents)
        self.assertEqual(contents.etag, etag)

        # fix the primary device and sanity GET
        self.revive_drive(primary_device)
        self.assertEqual(etag, self.proxy_get())

        # find a handoff holding the fragment
        for hnode in self.object_ring.get_more_nodes(opart):
            try:
                reverted_fragment_etag = self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                if err.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail("Unable to find handoff fragment!")

        # we'll force the handoff device to revert instead of potentially
        # racing with rebuild by deleting any other fragments that may be on
        # the same server
        handoff_fragment_etag = None
        for node in onodes:
            if self.is_local_to(node, hnode):
                # we'll keep track of the etag of this fragment we're removing
                # in case we need it later (queue forshadowing music)...
                try:
                    handoff_fragment_etag = self.direct_get(node, opart)
                except direct_client.DirectClientException as err:
                    if err.http_status != 404:
                        raise
                    # this just means our handoff device was on the same
                    # machine as the primary!
                    continue
                # use the primary nodes device - not the hnode device
                part_dir = self.storage_dir("object", node, part=opart)
                shutil.rmtree(part_dir, True)

        # revert from handoff device with reconstructor
        self.reconstructor.once(number=self.config_number(hnode))

        # verify fragment reverted to primary server
        self.assertEqual(reverted_fragment_etag, self.direct_get(primary_node, opart))

        # now we'll remove some data on one of the primary node's partners
        partner = random.choice(reconstructor._get_partners(primary_node["index"], onodes))

        try:
            rebuilt_fragment_etag = self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            # partner already had it's fragment removed
            if handoff_fragment_etag is not None and self.is_local_to(hnode, partner):
                # oh, well that makes sense then...
                rebuilt_fragment_etag = handoff_fragment_etag
            else:
                # I wonder what happened?
                self.fail("Partner inexplicably missing fragment!")
        part_dir = self.storage_dir("object", partner, part=opart)
        shutil.rmtree(part_dir, True)

        # sanity, it's gone
        try:
            self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
        else:
            self.fail("successful GET of removed partner fragment archive!?")

        # and force the primary node to do a rebuild
        self.reconstructor.once(number=self.config_number(primary_node))

        # and validate the partners rebuilt_fragment_etag
        try:
            self.assertEqual(rebuilt_fragment_etag, self.direct_get(partner, opart))
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            else:
                self.fail("Did not find rebuilt fragment on partner node")
Exemplo n.º 7
0
class TestReconstructorRevert(ECProbeTest):
    def setUp(self):
        super(TestReconstructorRevert, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()

        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url,
                                          self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2**10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(node,
                                                        part,
                                                        self.account,
                                                        self.container_name,
                                                        self.object_name,
                                                        headers=req_headers,
                                                        resp_chunk_size=64 *
                                                        2**20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def test_revert_object(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)

        # kill 2 a parity count number of primary nodes so we can
        # force data onto handoffs, we do that by renaming dev dirs
        # to induce 507
        p_dev1 = self.device_dir(onodes[0])
        p_dev2 = self.device_dir(onodes[1])
        self.kill_drive(p_dev1)
        self.kill_drive(p_dev2)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents,
                          headers=headers)
        client.post_object(self.url,
                           self.token,
                           self.container_name,
                           self.object_name,
                           headers=headers_post)
        # (Some versions of?) swiftclient will mutate the headers dict on post
        headers_post.pop('X-Auth-Token', None)

        # these primaries can't serve the data any more, we expect 507
        # here and not 404 because we're using mount_check to kill nodes
        for onode in (onodes[0], onodes[1]):
            try:
                self.direct_get(onode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 507)
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (onode, ))

        # now take out another primary
        p_dev3 = self.device_dir(onodes[2])
        self.kill_drive(p_dev3)

        # this node can't servce the data any more
        try:
            self.direct_get(onodes[2], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 507)
        else:
            self.fail('Node data on %r was not fully destroyed!' % (onode, ))

        # make sure we can still GET the object and its correct
        # we're now pulling from handoffs and reconstructing
        etag = self.proxy_get()
        self.assertEqual(etag, contents.etag)

        # rename the dev dirs so they don't 507 anymore
        self.revive_drive(p_dev1)
        self.revive_drive(p_dev2)
        self.revive_drive(p_dev3)

        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode['port'] - 6000) // 10
            self.reconstructor.once(number=hnode_id)

        # first three primaries have data again
        for onode in (onodes[0], onodes[2]):
            self.direct_get(onode, opart)

        # check meta
        meta = client.head_object(self.url, self.token, self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertIn(key, meta)
            self.assertEqual(meta[key], headers_post[key])

        # handoffs are empty
        for hnode in hnodes:
            try:
                self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (hnode, ))

    def test_delete_propagate(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)
        hnodes = list(
            itertools.islice(self.object_ring.get_more_nodes(opart), 2))

        # PUT object
        contents = Body()
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents)

        # now lets shut down a couple of primaries
        failed_nodes = random.sample(onodes, 2)
        for node in failed_nodes:
            self.kill_drive(self.device_dir(node))

        # Write tombstones over the nodes that are still online
        client.delete_object(self.url, self.token, self.container_name,
                             self.object_name)

        # spot check the primary nodes that are still online
        delete_timestamp = None
        for node in onodes:
            if node in failed_nodes:
                continue
            try:
                self.direct_get(node, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
                delete_timestamp = err.http_headers['X-Backend-Timestamp']
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (node, ))

        # run the reconstructor on the handoff node multiple times until
        # tombstone is pushed out - each handoff node syncs to a few
        # primaries each time
        iterations = 0
        while iterations < 52:
            self.reconstructor.once(number=self.config_number(hnodes[0]))
            iterations += 1
            # see if the tombstone is reverted
            try:
                self.direct_get(hnodes[0], opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
                if 'X-Backend-Timestamp' not in err.http_headers:
                    # this means the tombstone is *gone* so it's reverted
                    break
        else:
            self.fail('Still found tombstone on %r after %s iterations' %
                      (hnodes[0], iterations))

        # tombstone is still on the *second* handoff
        try:
            self.direct_get(hnodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
            self.assertEqual(err.http_headers['X-Backend-Timestamp'],
                             delete_timestamp)
        else:
            self.fail('Found obj data on %r' % hnodes[1])

        # repair the primaries
        self.revive_drive(self.device_dir(failed_nodes[0]))
        self.revive_drive(self.device_dir(failed_nodes[1]))

        # run reconstructor on second handoff
        self.reconstructor.once(number=self.config_number(hnodes[1]))

        # verify tombstone is reverted on the first pass
        try:
            self.direct_get(hnodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
            self.assertNotIn('X-Backend-Timestamp', err.http_headers)
        else:
            self.fail('Found obj data on %r' % hnodes[1])

        # sanity make sure proxy get can't find it
        try:
            self.proxy_get()
        except Exception as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destroyed!' % (onodes[0]))

    def test_reconstruct_from_reverted_fragment_archive(self):
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # ... and 507 it's device
        primary_device = self.device_dir(primary_node)
        self.kill_drive(primary_device)

        # PUT object
        contents = Body()
        etag = client.put_object(self.url,
                                 self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents)
        self.assertEqual(contents.etag, etag)

        # fix the primary device and sanity GET
        self.revive_drive(primary_device)
        self.assertEqual(etag, self.proxy_get())

        # find a handoff holding the fragment
        for hnode in self.object_ring.get_more_nodes(opart):
            try:
                reverted_fragment_etag = self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                if err.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail('Unable to find handoff fragment!')

        # we'll force the handoff device to revert instead of potentially
        # racing with rebuild by deleting any other fragments that may be on
        # the same server
        handoff_fragment_etag = None
        for node in onodes:
            if self.is_local_to(node, hnode):
                # we'll keep track of the etag of this fragment we're removing
                # in case we need it later (queue forshadowing music)...
                try:
                    handoff_fragment_etag = self.direct_get(node, opart)
                except direct_client.DirectClientException as err:
                    if err.http_status != 404:
                        raise
                    # this just means our handoff device was on the same
                    # machine as the primary!
                    continue
                # use the primary nodes device - not the hnode device
                part_dir = self.storage_dir(node, part=opart)
                shutil.rmtree(part_dir, True)

        # revert from handoff device with reconstructor
        self.reconstructor.once(number=self.config_number(hnode))

        # verify fragment reverted to primary server
        self.assertEqual(reverted_fragment_etag,
                         self.direct_get(primary_node, opart))

        # now we'll remove some data on one of the primary node's partners
        partner = random.choice(
            reconstructor._get_partners(primary_node['index'], onodes))

        try:
            rebuilt_fragment_etag = self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            # partner already had it's fragment removed
            if (handoff_fragment_etag is not None
                    and self.is_local_to(hnode, partner)):
                # oh, well that makes sense then...
                rebuilt_fragment_etag = handoff_fragment_etag
            else:
                # I wonder what happened?
                self.fail('Partner inexplicably missing fragment!')
        part_dir = self.storage_dir(partner, part=opart)
        shutil.rmtree(part_dir, True)

        # sanity, it's gone
        try:
            self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
        else:
            self.fail('successful GET of removed partner fragment archive!?')

        # and force the primary node to do a rebuild
        self.reconstructor.once(number=self.config_number(primary_node))

        # and validate the partners rebuilt_fragment_etag
        try:
            self.assertEqual(rebuilt_fragment_etag,
                             self.direct_get(partner, opart))
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            else:
                self.fail('Did not find rebuilt fragment on partner node')
Exemplo n.º 8
0
class TestObjectExpirer(ReplProbeTest):
    def setUp(self):
        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise unittest.SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        super(TestObjectExpirer, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def _check_obj_in_container_listing(self):
        for obj in self.client.iter_objects(self.account, self.container_name):

            if self.object_name == obj['name']:
                return True

        return False

    @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy")
    def test_expirer_object_split_brain(self):
        old_policy = random.choice(ENABLED_POLICIES)
        wrong_policy = random.choice(
            [p for p in ENABLED_POLICIES if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        self.get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assertIn('x-backend-timestamp', metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirier again after replication
        self.expirer.once()

        # object is not in the listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in ENABLED_POLICIES:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4, ),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assertIn('x-backend-timestamp', metadata)
                self.assertGreater(Timestamp(metadata['x-backend-timestamp']),
                                   create_timestamp)

    def test_expirer_object_should_not_be_expired(self):

        # Current object-expirer checks the correctness via x-if-delete-at
        # header that it can be deleted by expirer. If there are objects
        # either which doesn't have x-delete-at header as metadata or which
        # has different x-delete-at value from x-if-delete-at value,
        # object-expirer's delete will fail as 412 PreconditionFailed.
        # However, if some of the objects are in handoff nodes, the expirer
        # can put the tombstone with the timestamp as same as x-delete-at and
        # the object consistency will be resolved as the newer timestamp will
        # be winner (in particular, overwritten case w/o x-delete-at). This
        # test asserts such a situation that, at least, the overwriten object
        # which have larger timestamp than the original expirered date should
        # be safe.

        def put_object(headers):
            # use internal client to PUT objects so that X-Timestamp in headers
            # is effective
            headers['Content-Length'] = '0'
            path = self.client.make_path(self.account, self.container_name,
                                         self.object_name)
            try:
                self.client.make_request('PUT', path, headers, (2, ))
            except UnexpectedResponse as e:
                self.fail('Expected 201 for PUT object but got %s' %
                          e.resp.status)

        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated)
        #   < T(expirer_executed)
        # Recreated obj should be appeared in any split brain case

        obj_brain.put_container()

        # T(obj_deleted with x-delete-at)
        # object-server accepts req only if X-Delete-At is later than 'now'
        # so here, T(obj_created) < T(obj_deleted with x-delete-at)
        now = time.time()
        delete_at = int(now + 2.0)
        recreate_at = delete_at + 1.0
        put_object(headers={
            'X-Delete-At': str(delete_at),
            'X-Timestamp': Timestamp(now).normal
        })

        # some object servers stopped to make a situation that the
        # object-expirer can put tombstone in the primary nodes.
        obj_brain.stop_primary_half()

        # increment the X-Timestamp explicitly
        # (will be T(obj_deleted with x-delete-at) < T(obj_recreated))
        put_object(
            headers={
                'X-Object-Meta-Expired': 'False',
                'X-Timestamp': Timestamp(recreate_at).normal
            })

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # sanity, the newer object is still there
        try:
            metadata = self.client.get_object_metadata(self.account,
                                                       self.container_name,
                                                       self.object_name)
        except UnexpectedResponse as e:
            self.fail('Expected 200 for HEAD object but got %s' %
                      e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

        # some object servers recovered
        obj_brain.start_primary_half()

        # sleep until after recreated_at
        while time.time() <= recreate_at:
            time.sleep(0.1)
        # Now, expirer runs at the time after obj is recreated
        self.expirer.once()

        # verify that original object was deleted by expirer
        obj_brain.stop_handoff_half()
        try:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4, ))
        except UnexpectedResponse as e:
            self.fail('Expected 404 for HEAD object but got %s' %
                      e.resp.status)
        obj_brain.start_handoff_half()

        # and inconsistent state of objects is recovered by replicator
        Manager(['object-replicator']).once()

        # check if you can get recreated object
        try:
            metadata = self.client.get_object_metadata(self.account,
                                                       self.container_name,
                                                       self.object_name)
        except UnexpectedResponse as e:
            self.fail('Expected 200 for HEAD object but got %s' %
                      e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

    def _test_expirer_delete_outdated_object_version(self, object_exists):
        # This test simulates a case where the expirer tries to delete
        # an outdated version of an object.
        # One case is where the expirer gets a 404, whereas the newest version
        # of the object is offline.
        # Another case is where the expirer gets a 412, since the old version
        # of the object mismatches the expiration time sent by the expirer.
        # In any of these cases, the expirer should retry deleting the object
        # later, for as long as a reclaim age has not passed.
        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        obj_brain.put_container()

        if object_exists:
            obj_brain.put_object()

        # currently, the object either doesn't exist, or does not have
        # an expiration

        # stop primary servers and put a newer version of the object, this
        # time with an expiration. only the handoff servers will have
        # the new version
        obj_brain.stop_primary_half()
        now = time.time()
        delete_at = int(now + 2.0)
        obj_brain.put_object({'X-Delete-At': str(delete_at)})

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()

        # update object record in the container listing
        Manager(['container-replicator']).once()

        # take handoff servers down, and bring up the outdated primary servers
        obj_brain.start_primary_half()
        obj_brain.stop_handoff_half()

        # wait until object expiration time
        while time.time() <= delete_at:
            time.sleep(0.1)

        # run expirer against the outdated servers. it should fail since
        # the outdated version does not match the expiration time
        self.expirer.once()

        # bring all servers up, and run replicator to update servers
        obj_brain.start_handoff_half()
        Manager(['object-replicator']).once()

        # verify the deletion has failed by checking the container listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # run expirer again, delete should now succeed
        self.expirer.once()

        # this is mainly to paper over lp bug #1652323
        self.get_to_final_state()

        # verify the deletion by checking the container listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

    def test_expirer_delete_returns_outdated_404(self):
        self._test_expirer_delete_outdated_object_version(object_exists=False)

    def test_expirer_delete_returns_outdated_412(self):
        self._test_expirer_delete_outdated_object_version(object_exists=True)
Exemplo n.º 9
0
    def test_reconciler_move_object_twice(self):
        # select some policies
        old_policy = random.choice(list(POLICIES))
        new_policy = random.choice([p for p in POLICIES if p != old_policy])

        # setup a split brain
        self.brain.stop_handoff_half()
        # get old_policy on two primaries
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.start_handoff_half()
        self.brain.stop_primary_half()
        # force a recreate on handoffs
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.delete_container()
        self.brain.put_container(policy_index=int(new_policy))
        self.brain.put_object()  # populate memcache with new_policy
        self.brain.start_primary_half()

        # at this point two primaries have old policy
        container_part, container_nodes = self.container_ring.get_nodes(
            self.account, self.container_name)
        head_responses = []
        for node in container_nodes:
            metadata = direct_client.direct_head_container(
                node, container_part, self.account, self.container_name)
            head_responses.append((node, metadata))
        old_container_node_ids = [
            node['id'] for node, metadata in head_responses if int(old_policy)
            == int(metadata['X-Backend-Storage-Policy-Index'])
        ]
        self.assertEqual(2, len(old_container_node_ids))

        # hopefully memcache still has the new policy cached
        self.brain.put_object()
        # double-check object correctly written to new policy
        conf_files = []
        for server in Manager(['container-reconciler']).servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        client = InternalClient(conf_file, 'probe-test', 3)
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # shutdown the containers that know about the new policy
        self.brain.stop_handoff_half()

        # and get rows enqueued from old nodes
        for server_type in ('container-replicator', 'container-updater'):
            server = Manager([server_type])
            tuple(server.once(number=n + 1) for n in old_container_node_ids)

        # verify entry in the queue for the "misplaced" new_policy
        for container in client.iter_containers('.misplaced_objects'):
            for obj in client.iter_objects('.misplaced_objects',
                                           container['name']):
                expected = '%d:/%s/%s/%s' % (new_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # verify object in old_policy
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # verify object is *not* in new_policy
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})

        get_to_final_state()

        # verify entry in the queue
        client = InternalClient(conf_file, 'probe-test', 3)
        for container in client.iter_containers('.misplaced_objects'):
            for obj in client.iter_objects('.misplaced_objects',
                                           container['name']):
                expected = '%d:/%s/%s/%s' % (old_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # and now it flops back
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # make sure the queue is settled
        get_to_final_state()
        for container in client.iter_containers('.misplaced_objects'):
            for obj in client.iter_objects('.misplaced_objects',
                                           container['name']):
                self.fail('Found unexpected object %r in the queue' % obj)
Exemplo n.º 10
0
class TestReconstructorRebuild(ECProbeTest):
    def _make_name(self, prefix):
        return ('%s%s' % (prefix, uuid.uuid4())).encode()

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = self._make_name('container-')
        self.object_name = self._make_name('object-')
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object and POST some metadata
        self.proxy_put()
        self.headers_post = {
            self._make_name('x-object-meta-').decode('utf8'):
            self._make_name('meta-bar-').decode('utf8')
        }
        client.post_object(self.url,
                           self.token,
                           self.container_name,
                           self.object_name,
                           headers=dict(self.headers_post))

        self.opart, self.onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # stash frag etags and metadata for later comparison
        self.frag_headers, self.frag_etags = self._assert_all_nodes_have_frag()
        for node_index, hdrs in self.frag_headers.items():
            # sanity check
            self.assertIn(
                'X-Backend-Durable-Timestamp', hdrs,
                'Missing durable timestamp in %r' % self.frag_headers)

    def proxy_put(self, extra_headers=None):
        contents = Body()
        headers = {
            self._make_name('x-object-meta-').decode('utf8'):
            self._make_name('meta-foo-').decode('utf8'),
        }
        if extra_headers:
            headers.update(extra_headers)
        self.etag = client.put_object(self.url,
                                      self.token,
                                      self.container_name,
                                      self.object_name,
                                      contents=contents,
                                      headers=headers)

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url,
                                          self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2**10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return headers, resp_checksum.hexdigest()

    def direct_get(self, node, part, require_durable=True, extra_headers=None):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        if extra_headers:
            req_headers.update(extra_headers)
        if not require_durable:
            req_headers.update(
                {'X-Backend-Fragment-Preferences': json.dumps([])})
        # node dict has unicode values so utf8 decode our path parts too in
        # case they have non-ascii characters
        if six.PY2:
            acc, con, obj = (s.decode('utf8')
                             for s in (self.account, self.container_name,
                                       self.object_name))
        else:
            acc, con, obj = self.account, self.container_name, self.object_name
        headers, data = direct_client.direct_get_object(node,
                                                        part,
                                                        acc,
                                                        con,
                                                        obj,
                                                        headers=req_headers,
                                                        resp_chunk_size=64 *
                                                        2**20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return headers, hasher.hexdigest()

    def _break_nodes(self, failed, non_durable):
        # delete partitions on the failed nodes and remove durable marker from
        # non-durable nodes
        for i, node in enumerate(self.onodes):
            part_dir = self.storage_dir(node, part=self.opart)
            if i in failed:
                shutil.rmtree(part_dir, True)
                try:
                    self.direct_get(node, self.opart)
                except direct_client.DirectClientException as err:
                    self.assertEqual(err.http_status, 404)
            elif i in non_durable:
                for dirs, subdirs, files in os.walk(part_dir):
                    for fname in files:
                        if fname.endswith('.data'):
                            non_durable_fname = fname.replace('#d', '')
                            os.rename(os.path.join(dirs, fname),
                                      os.path.join(dirs, non_durable_fname))
                            break
                headers, etag = self.direct_get(node,
                                                self.opart,
                                                require_durable=False)
                self.assertNotIn('X-Backend-Durable-Timestamp', headers)
            try:
                os.remove(os.path.join(part_dir, 'hashes.pkl'))
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def _assert_all_nodes_have_frag(self, extra_headers=None):
        # check all frags are in place
        failures = []
        frag_etags = {}
        frag_headers = {}
        for node in self.onodes:
            try:
                headers, etag = self.direct_get(node,
                                                self.opart,
                                                extra_headers=extra_headers)
                frag_etags[node['index']] = etag
                del headers['Date']  # Date header will vary so remove it
                frag_headers[node['index']] = headers
            except direct_client.DirectClientException as err:
                failures.append((node, err))
        if failures:
            self.fail('\n'.join([
                '    Node %r raised %r' % (self._format_node(node), exc)
                for (node, exc) in failures
            ]))
        return frag_headers, frag_etags

    @contextmanager
    def _annotate_failure_with_scenario(self, failed, non_durable):
        try:
            yield
        except (AssertionError, ClientException) as err:
            self.fail(
                'Scenario with failed nodes: %r, non-durable nodes: %r\n'
                ' failed with:\n%s' %
                ([self._format_node(self.onodes[n]) for n in failed
                  ], [self._format_node(self.onodes[n])
                      for n in non_durable], err))

    def _test_rebuild_scenario(self, failed, non_durable,
                               reconstructor_cycles):
        # helper method to test a scenario with some nodes missing their
        # fragment and some nodes having non-durable fragments
        with self._annotate_failure_with_scenario(failed, non_durable):
            self._break_nodes(failed, non_durable)

        # make sure we can still GET the object and it is correct; the
        # proxy is doing decode on remaining fragments to get the obj
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])

        # fire up reconstructor
        for i in range(reconstructor_cycles):
            self.reconstructor.once()

        # check GET via proxy returns expected data and metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])
        # check all frags are intact, durable and have expected metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            frag_headers, frag_etags = self._assert_all_nodes_have_frag()
            self.assertEqual(self.frag_etags, frag_etags)
            # self._frag_headers include X-Backend-Durable-Timestamp so this
            # assertion confirms that the rebuilt frags are all durable
            self.assertEqual(self.frag_headers, frag_headers)

    def test_rebuild_missing_frags(self):
        # build up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5), )
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for failed_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario(failed_nodes, [], 1)

    def test_rebuild_non_durable_frags(self):
        # build up a list of node lists to make non-durable,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5), )
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for non_durable_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario([], non_durable_nodes, 1)

    def test_rebuild_with_missing_frags_and_non_durable_frags(self):
        # pick some nodes with parts deleted, some with non-durable fragments
        scenarios = [
            # failed, non-durable
            ((0, 2), (4, )),
            ((0, 4), (2, )),
        ]
        for failed, non_durable in scenarios:
            self._test_rebuild_scenario(failed, non_durable, 3)
        scenarios = [
            # failed, non-durable
            ((0, 1), (2, )),
            ((0, 2), (1, )),
        ]
        for failed, non_durable in scenarios:
            # why 2 repeats? consider missing fragment on nodes 0, 1  and
            # missing durable on node 2: first reconstructor cycle on node 3
            # will make node 2 durable, first cycle on node 5 will rebuild on
            # node 0; second cycle on node 0 or 2 will rebuild on node 1. Note
            # that it is possible, that reconstructor processes on each node
            # run in order such that all rebuild complete in once cycle, but
            # that is not guaranteed, we allow 2 cycles to be sure.
            self._test_rebuild_scenario(failed, non_durable, 2)
        scenarios = [
            # failed, non-durable
            ((0, 2), (1, 3, 5)),
            ((0, ), (1, 2, 4, 5)),
        ]
        for failed, non_durable in scenarios:
            # why 3 repeats? consider missing fragment on node 0 and single
            # durable on node 3: first reconstructor cycle on node 3 will make
            # nodes 2 and 4 durable, second cycle on nodes 2 and 4 will make
            # node 1 and 5 durable, third cycle on nodes 1 or 5 will
            # reconstruct the missing fragment on node 0.
            self._test_rebuild_scenario(failed, non_durable, 3)

    def test_rebuild_partner_down(self):
        # we have to pick a lower index because we have few handoffs
        nodes = self.onodes[:2]
        random.shuffle(nodes)  # left or right is fine
        primary_node, partner_node = nodes

        # capture fragment etag from partner
        failed_partner_meta, failed_partner_etag = self.direct_get(
            partner_node, self.opart)

        # and 507 the failed partner device
        device_path = self.device_dir(partner_node)
        self.kill_drive(device_path)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # a handoff will pickup the rebuild
        hnodes = list(self.object_ring.get_more_nodes(self.opart))
        for node in hnodes:
            try:
                found_meta, found_etag = self.direct_get(node, self.opart)
            except DirectClientException as e:
                if e.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail('Unable to fetch rebuilt frag from handoffs %r '
                      'given primary nodes %r with %s unmounted '
                      'trying to rebuild from %s' % (
                          [h['device'] for h in hnodes],
                          [n['device'] for n in self.onodes],
                          partner_node['device'],
                          primary_node['device'],
                      ))
        self.assertEqual(failed_partner_etag, found_etag)
        del failed_partner_meta['Date']
        del found_meta['Date']
        self.assertEqual(failed_partner_meta, found_meta)

        # just to be nice
        self.revive_drive(device_path)

    def test_sync_expired_object(self):
        # verify that missing frag can be rebuilt for an expired object
        delete_after = 2
        self.proxy_put(extra_headers={'x-delete-after': delete_after})
        self.proxy_get()  # sanity check
        orig_frag_headers, orig_frag_etags = self._assert_all_nodes_have_frag(
            extra_headers={'X-Backend-Replication': 'True'})

        # wait for object to expire
        timeout = time.time() + delete_after + 1
        while time.time() < timeout:
            try:
                self.proxy_get()
            except ClientException as e:
                if e.http_status == 404:
                    break
                else:
                    raise
        else:
            self.fail('Timed out waiting for %s/%s to expire after %ss' %
                      (self.container_name, self.object_name, delete_after))

        # sanity check - X-Backend-Replication let's us get expired frag...
        fail_node = random.choice(self.onodes)
        self.direct_get(fail_node,
                        self.opart,
                        extra_headers={'X-Backend-Replication': 'True'})
        # ...until we remove the frag from fail_node
        self._break_nodes([self.onodes.index(fail_node)], [])
        # ...now it's really gone
        with self.assertRaises(DirectClientException) as cm:
            self.direct_get(fail_node,
                            self.opart,
                            extra_headers={'X-Backend-Replication': 'True'})
        self.assertEqual(404, cm.exception.http_status)
        self.assertNotIn('X-Backend-Timestamp', cm.exception.http_headers)

        # run the reconstructor
        self.reconstructor.once()

        # the missing frag is now in place but expired
        with self.assertRaises(DirectClientException) as cm:
            self.direct_get(fail_node, self.opart)
        self.assertEqual(404, cm.exception.http_status)
        self.assertIn('X-Backend-Timestamp', cm.exception.http_headers)

        # check all frags are intact, durable and have expected metadata
        frag_headers, frag_etags = self._assert_all_nodes_have_frag(
            extra_headers={'X-Backend-Replication': 'True'})
        self.assertEqual(orig_frag_etags, frag_etags)
        self.maxDiff = None
        self.assertEqual(orig_frag_headers, frag_headers)

    def test_sync_unexpired_object_metadata(self):
        # verify that metadata can be sync'd to a frag that has missed a POST
        # and consequently that frag appears to be expired, when in fact the
        # POST removed the x-delete-at header
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers={'x-storage-policy': self.policy.name})
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)
        delete_at = int(time.time() + 3)
        contents = ('body-%s' % uuid.uuid4()).encode()
        headers = {'x-delete-at': delete_at}
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          self.object_name,
                          headers=headers,
                          contents=contents)
        # fail a primary
        post_fail_node = random.choice(onodes)
        post_fail_path = self.device_dir(post_fail_node)
        self.kill_drive(post_fail_path)
        # post over w/o x-delete-at
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, {'content-type': 'something-new'})
        # revive failed primary
        self.revive_drive(post_fail_path)
        # wait for the delete_at to pass, and check that it thinks the object
        # is expired
        timeout = time.time() + 5
        err = None
        while time.time() < timeout:
            try:
                direct_client.direct_head_object(
                    post_fail_node,
                    opart,
                    self.account,
                    self.container_name,
                    self.object_name,
                    headers={
                        'X-Backend-Storage-Policy-Index': int(self.policy)
                    })
            except direct_client.ClientException as client_err:
                if client_err.http_status != 404:
                    raise
                err = client_err
                break
            else:
                time.sleep(0.1)
        else:
            self.fail('Failed to get a 404 from node with expired object')
        self.assertEqual(err.http_status, 404)
        self.assertIn('X-Backend-Timestamp', err.http_headers)

        # but from the proxy we've got the whole story
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name)
        self.assertNotIn('X-Delete-At', headers)
        self.reconstructor.once()

        # ... and all the nodes have the final unexpired state
        for node in onodes:
            headers = direct_client.direct_head_object(
                node,
                opart,
                self.account,
                self.container_name,
                self.object_name,
                headers={'X-Backend-Storage-Policy-Index': int(self.policy)})
            self.assertNotIn('X-Delete-At', headers)
Exemplo n.º 11
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def _load_rings_and_configs(self):
        self.ipport2server = {}
        self.configs = defaultdict(dict)
        self.account_ring = get_ring(
            'account',
            self.acct_cont_required_replicas,
            self.acct_cont_required_devices,
            ipport2server=self.ipport2server,
            config_paths=self.configs)
        self.container_ring = get_ring(
            'container',
            self.acct_cont_required_replicas,
            self.acct_cont_required_devices,
            ipport2server=self.ipport2server,
            config_paths=self.configs)
        self.policy = get_policy(**self.policy_requirements)
        self.object_ring = get_ring(
            self.policy.ring_name,
            self.obj_required_replicas,
            self.obj_required_devices,
            server='object',
            ipport2server=self.ipport2server,
            config_paths=self.configs)
        for server in Manager(['proxy-server']):
            for conf in server.conf_files():
                self.configs['proxy-server'] = conf

    def setUp(self):
        # previous test may have left DatabaseBroker instances in garbage with
        # open connections to db files which will prevent unmounting devices in
        # resetswift, so collect garbage now
        gc.collect()
        resetswift()
        kill_orphans()
        self._load_rings_and_configs()
        try:
            self.servers_per_port = any(
                int(readconf(c, section_name='object-replicator').get(
                    'servers_per_port', '0'))
                for c in self.configs['object-replicator'].values())

            Manager(['main']).start(wait=True)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server)
            proxy_conf = readconf(self.configs['proxy-server'],
                                  section_name='app:proxy-server')
            proxy_ipport = (proxy_conf.get('bind_ip', '127.0.0.1'),
                            int(proxy_conf.get('bind_port', 8080)))
            self.ipport2server[proxy_ipport] = 'proxy'
            self.url, self.token, self.account = check_server(
                proxy_ipport, self.ipport2server)
            self.account_1 = {
                'url': self.url, 'token': self.token, 'account': self.account}

            rv = _retry_timeout(_check_proxy, args=(
                'test2:tester2', 'testing2'))
            self.account_2 = {
                k: v for (k, v) in zip(('url', 'token', 'account'), rv)}

            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass
        info_url = "%s://%s/info" % (urlparse(self.url).scheme,
                                     urlparse(self.url).netloc)
        proxy_conn = client.http_connection(info_url)
        self.cluster_info = client.get_capabilities(proxy_conn)

    def tearDown(self):
        Manager(['all']).kill()

    def assertLengthEqual(self, obj, length):
        obj_len = len(obj)
        self.assertEqual(obj_len, length, 'len(%r) == %d, not %d' % (
            obj, obj_len, length))

    def device_dir(self, node):
        server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        repl_server = '%s-replicator' % server_type
        conf = readconf(self.configs[repl_server][config_number],
                        section_name=repl_server)
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1['ip'] == node2['ip']

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1['port'] == node2['port']

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(disabled_name, device)
        else:
            os.system('sudo mount %s' % device)

    def make_internal_client(self):
        tempdir = mkdtemp()
        try:
            conf_path = os.path.join(tempdir, 'internal_client.conf')
            conf_body = """
            [DEFAULT]
            swift_dir = /etc/swift

            [pipeline:main]
            pipeline = catch_errors cache copy proxy-server

            [app:proxy-server]
            use = egg:swift#proxy
            allow_account_management = True

            [filter:copy]
            use = egg:swift#copy

            [filter:cache]
            use = egg:swift#memcache

            [filter:catch_errors]
            use = egg:swift#catch_errors
            """
            with open(conf_path, 'w') as f:
                f.write(dedent(conf_body))
            return internal_client.InternalClient(conf_path, 'test', 1)
        finally:
            shutil.rmtree(tempdir)

    def get_all_object_nodes(self):
        """
        Returns a list of all nodes in all object storage policies.

        :return: a list of node dicts.
        """
        all_obj_nodes = {}
        for policy in ENABLED_POLICIES:
            for dev in policy.object_ring.devs:
                all_obj_nodes[dev['device']] = dev
        return all_obj_nodes.values()

    def gather_async_pendings(self, onodes):
        """
        Returns a list of paths to async pending files found on given nodes.

        :param onodes: a list of nodes.
        :return: a list of file paths.
        """
        async_pendings = []
        for onode in onodes:
            device_dir = self.device_dir(onode)
            for ap_pol_dir in os.listdir(device_dir):
                if not ap_pol_dir.startswith('async_pending'):
                    # skip 'objects', 'containers', etc.
                    continue
                async_pending_dir = os.path.join(device_dir, ap_pol_dir)
                try:
                    ap_dirs = os.listdir(async_pending_dir)
                except OSError as err:
                    if err.errno == errno.ENOENT:
                        pass
                    else:
                        raise
                else:
                    for ap_dir in ap_dirs:
                        ap_dir_fullpath = os.path.join(
                            async_pending_dir, ap_dir)
                        async_pendings.extend([
                            os.path.join(ap_dir_fullpath, ent)
                            for ent in os.listdir(ap_dir_fullpath)])
        return async_pendings

    def run_custom_daemon(self, klass, conf_section, conf_index,
                          custom_conf, **kwargs):
        conf_file = self.configs[conf_section][conf_index]
        conf = utils.readconf(conf_file, conf_section)
        conf.update(custom_conf)
        # Use a CaptureLogAdapter in order to preserve the pattern of tests
        # calling the log accessor methods (e.g. get_lines_for_level) directly
        # on the logger instance
        with capture_logger(conf, conf.get('log_name', conf_section),
                            log_to_console=kwargs.pop('verbose', False),
                            log_route=conf_section) as log_adapter:
            daemon = klass(conf, log_adapter)
            daemon.run_once(**kwargs)
        return daemon
Exemplo n.º 12
0
class TestReconstructorRevert(ECProbeTest):

    def setUp(self):
        super(TestReconstructorRevert, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()

        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def test_revert_object(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)

        # kill 2 a parity count number of primary nodes so we can
        # force data onto handoffs, we do that by renaming dev dirs
        # to induce 507
        p_dev1 = self.device_dir('object', onodes[0])
        p_dev2 = self.device_dir('object', onodes[1])
        self.kill_drive(p_dev1)
        self.kill_drive(p_dev2)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}
        client.put_object(self.url, self.token, self.container_name,
                          self.object_name, contents=contents,
                          headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        # (Some versions of?) swiftclient will mutate the headers dict on post
        headers_post.pop('X-Auth-Token', None)

        # these primaries can't serve the data any more, we expect 507
        # here and not 404 because we're using mount_check to kill nodes
        for onode in (onodes[0], onodes[1]):
            try:
                self.direct_get(onode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 507)
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (onode,))

        # now take out another primary
        p_dev3 = self.device_dir('object', onodes[2])
        self.kill_drive(p_dev3)

        # this node can't servce the data any more
        try:
            self.direct_get(onodes[2], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 507)
        else:
            self.fail('Node data on %r was not fully destroyed!' %
                      (onode,))

        # make sure we can still GET the object and its correct
        # we're now pulling from handoffs and reconstructing
        etag = self.proxy_get()
        self.assertEqual(etag, contents.etag)

        # rename the dev dirs so they don't 507 anymore
        self.revive_drive(p_dev1)
        self.revive_drive(p_dev2)
        self.revive_drive(p_dev3)

        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode['port'] - 6000) / 10
            self.reconstructor.once(number=hnode_id)

        # first three primaries have data again
        for onode in (onodes[0], onodes[2]):
            self.direct_get(onode, opart)

        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertIn(key, meta)
            self.assertEqual(meta[key], headers_post[key])

        # handoffs are empty
        for hnode in hnodes:
            try:
                self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (hnode,))

    def test_delete_propagate(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        hnodes = list(itertools.islice(
            self.object_ring.get_more_nodes(opart), 2))

        # PUT object
        contents = Body()
        client.put_object(self.url, self.token, self.container_name,
                          self.object_name, contents=contents)

        # now lets shut down a couple of primaries
        failed_nodes = random.sample(onodes, 2)
        for node in failed_nodes:
            self.kill_drive(self.device_dir('object', node))

        # Write tombstones over the nodes that are still online
        client.delete_object(self.url, self.token,
                             self.container_name,
                             self.object_name)

        # spot check the primary nodes that are still online
        delete_timestamp = None
        for node in onodes:
            if node in failed_nodes:
                continue
            try:
                self.direct_get(node, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
                delete_timestamp = err.http_headers['X-Backend-Timestamp']
            else:
                self.fail('Node data on %r was not fully destroyed!' %
                          (node,))

        # run the reconstructor on the handoff node multiple times until
        # tombstone is pushed out - each handoff node syncs to a few
        # primaries each time
        iterations = 0
        while iterations < 52:
            self.reconstructor.once(number=self.config_number(hnodes[0]))
            iterations += 1
            # see if the tombstone is reverted
            try:
                self.direct_get(hnodes[0], opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
                if 'X-Backend-Timestamp' not in err.http_headers:
                    # this means the tombstone is *gone* so it's reverted
                    break
        else:
            self.fail('Still found tombstone on %r after %s iterations' % (
                hnodes[0], iterations))

        # tombstone is still on the *second* handoff
        try:
            self.direct_get(hnodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
            self.assertEqual(err.http_headers['X-Backend-Timestamp'],
                             delete_timestamp)
        else:
            self.fail('Found obj data on %r' % hnodes[1])

        # repair the primaries
        self.revive_drive(self.device_dir('object', failed_nodes[0]))
        self.revive_drive(self.device_dir('object', failed_nodes[1]))

        # run reconstructor on second handoff
        self.reconstructor.once(number=self.config_number(hnodes[1]))

        # verify tombstone is reverted on the first pass
        try:
            self.direct_get(hnodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
            self.assertNotIn('X-Backend-Timestamp', err.http_headers)
        else:
            self.fail('Found obj data on %r' % hnodes[1])

        # sanity make sure proxy get can't find it
        try:
            self.proxy_get()
        except Exception as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destroyed!' %
                      (onodes[0]))

    def test_reconstruct_from_reverted_fragment_archive(self):
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # ... and 507 it's device
        primary_device = self.device_dir('object', primary_node)
        self.kill_drive(primary_device)

        # PUT object
        contents = Body()
        etag = client.put_object(self.url, self.token, self.container_name,
                                 self.object_name, contents=contents)
        self.assertEqual(contents.etag, etag)

        # fix the primary device and sanity GET
        self.revive_drive(primary_device)
        self.assertEqual(etag, self.proxy_get())

        # find a handoff holding the fragment
        for hnode in self.object_ring.get_more_nodes(opart):
            try:
                reverted_fragment_etag = self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                if err.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail('Unable to find handoff fragment!')

        # we'll force the handoff device to revert instead of potentially
        # racing with rebuild by deleting any other fragments that may be on
        # the same server
        handoff_fragment_etag = None
        for node in onodes:
            if self.is_local_to(node, hnode):
                # we'll keep track of the etag of this fragment we're removing
                # in case we need it later (queue forshadowing music)...
                try:
                    handoff_fragment_etag = self.direct_get(node, opart)
                except direct_client.DirectClientException as err:
                    if err.http_status != 404:
                        raise
                    # this just means our handoff device was on the same
                    # machine as the primary!
                    continue
                # use the primary nodes device - not the hnode device
                part_dir = self.storage_dir('object', node, part=opart)
                shutil.rmtree(part_dir, True)

        # revert from handoff device with reconstructor
        self.reconstructor.once(number=self.config_number(hnode))

        # verify fragment reverted to primary server
        self.assertEqual(reverted_fragment_etag,
                         self.direct_get(primary_node, opart))

        # now we'll remove some data on one of the primary node's partners
        partner = random.choice(reconstructor._get_partners(
            primary_node['index'], onodes))

        try:
            rebuilt_fragment_etag = self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            # partner already had it's fragment removed
            if (handoff_fragment_etag is not None and
                    self.is_local_to(hnode, partner)):
                # oh, well that makes sense then...
                rebuilt_fragment_etag = handoff_fragment_etag
            else:
                # I wonder what happened?
                self.fail('Partner inexplicably missing fragment!')
        part_dir = self.storage_dir('object', partner, part=opart)
        shutil.rmtree(part_dir, True)

        # sanity, it's gone
        try:
            self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
        else:
            self.fail('successful GET of removed partner fragment archive!?')

        # and force the primary node to do a rebuild
        self.reconstructor.once(number=self.config_number(primary_node))

        # and validate the partners rebuilt_fragment_etag
        try:
            self.assertEqual(rebuilt_fragment_etag,
                             self.direct_get(partner, opart))
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            else:
                self.fail('Did not find rebuilt fragment on partner node')
Exemplo n.º 13
0
class TestObjectExpirer(ReplProbeTest):

    def setUp(self):
        if len(ENABLED_POLICIES) < 2:
            raise SkipTest('Need more than one policy')

        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        super(TestObjectExpirer, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def test_expirer_object_split_brain(self):
        old_policy = random.choice(ENABLED_POLICIES)
        wrong_policy = random.choice([p for p in ENABLED_POLICIES
                                      if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        self.get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assertTrue('x-backend-timestamp' in metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        for obj in self.client.iter_objects(self.account,
                                            self.container_name):
            if self.object_name == obj['name']:
                break
        else:
            self.fail('Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirier again after replication
        self.expirer.once()

        # object is not in the listing
        for obj in self.client.iter_objects(self.account,
                                            self.container_name):
            if self.object_name == obj['name']:
                self.fail('Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in ENABLED_POLICIES:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name,
                acceptable_statuses=(4,),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assertTrue('x-backend-timestamp' in metadata)
                self.assertTrue(Timestamp(metadata['x-backend-timestamp']) >
                                create_timestamp)

    def test_expirer_object_should_not_be_expired(self):
        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated)
        #   < T(expirer_executed)
        # Recreated obj should be appeared in any split brain case

        # T(obj_created)
        first_created_at = time.time()
        # T(obj_deleted with x-delete-at)
        # object-server accepts req only if X-Delete-At is later than 'now'
        delete_at = int(time.time() + 1.5)
        # T(obj_recreated)
        recreated_at = time.time() + 2.0
        # T(expirer_executed) - 'now'
        sleep_for_expirer = 2.01

        obj_brain.put_container(int(self.policy))
        obj_brain.put_object(
            headers={'X-Delete-At': delete_at,
                     'X-Timestamp': Timestamp(first_created_at).internal})

        # some object servers stopped
        obj_brain.stop_primary_half()
        obj_brain.put_object(
            headers={'X-Timestamp': Timestamp(recreated_at).internal,
                     'X-Object-Meta-Expired': 'False'})

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # some object servers recovered
        obj_brain.start_primary_half()
        # sleep to make sure expirer runs at the time after obj is recreated
        time.sleep(sleep_for_expirer)
        self.expirer.once()
        # inconsistent state of objects is recovered
        Manager(['object-replicator']).once()

        # check if you can get recreated object
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name)
        self.assertIn('x-object-meta-expired', metadata)
Exemplo n.º 14
0
class TestReconstructorRebuild(ECProbeTest):
    def _make_name(self, prefix):
        return '%s%s' % (prefix, uuid.uuid4())

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = self._make_name('container-')
        self.object_name = self._make_name('object-')
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object and POST some metadata
        contents = Body()
        headers = {
            self._make_name('x-object-meta-').decode('utf8'):
            self._make_name('meta-foo-').decode('utf8'),
        }
        self.headers_post = {
            self._make_name('x-object-meta-').decode('utf8'):
            self._make_name('meta-bar-').decode('utf8')
        }

        self.etag = client.put_object(self.url,
                                      self.token,
                                      self.container_name,
                                      self.object_name,
                                      contents=contents,
                                      headers=headers)
        client.post_object(self.url,
                           self.token,
                           self.container_name,
                           self.object_name,
                           headers=dict(self.headers_post))

        self.opart, self.onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # stash frag etags and metadata for later comparison
        self.frag_headers, self.frag_etags = self._assert_all_nodes_have_frag()
        for node_index, hdrs in self.frag_headers.items():
            # sanity check
            self.assertIn(
                'X-Backend-Durable-Timestamp', hdrs,
                'Missing durable timestamp in %r' % self.frag_headers)

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url,
                                          self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2**10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return headers, resp_checksum.hexdigest()

    def direct_get(self, node, part, require_durable=True):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        if not require_durable:
            req_headers.update(
                {'X-Backend-Fragment-Preferences': json.dumps([])})
        # node dict has unicode values so utf8 decode our path parts too in
        # case they have non-ascii characters
        headers, data = direct_client.direct_get_object(
            node,
            part,
            self.account.decode('utf8'),
            self.container_name.decode('utf8'),
            self.object_name.decode('utf8'),
            headers=req_headers,
            resp_chunk_size=64 * 2**20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return headers, hasher.hexdigest()

    def _break_nodes(self, failed, non_durable):
        # delete partitions on the failed nodes and remove durable marker from
        # non-durable nodes
        for i, node in enumerate(self.onodes):
            part_dir = self.storage_dir('object', node, part=self.opart)
            if i in failed:
                shutil.rmtree(part_dir, True)
                try:
                    self.direct_get(node, self.opart)
                except direct_client.DirectClientException as err:
                    self.assertEqual(err.http_status, 404)
            elif i in non_durable:
                for dirs, subdirs, files in os.walk(part_dir):
                    for fname in files:
                        if fname.endswith('.data'):
                            non_durable_fname = fname.replace('#d', '')
                            os.rename(os.path.join(dirs, fname),
                                      os.path.join(dirs, non_durable_fname))
                            break
                headers, etag = self.direct_get(node,
                                                self.opart,
                                                require_durable=False)
                self.assertNotIn('X-Backend-Durable-Timestamp', headers)
            try:
                os.remove(os.path.join(part_dir, 'hashes.pkl'))
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def _assert_all_nodes_have_frag(self):
        # check all frags are in place
        failures = []
        frag_etags = {}
        frag_headers = {}
        for node in self.onodes:
            try:
                headers, etag = self.direct_get(node, self.opart)
                frag_etags[node['index']] = etag
                del headers['Date']  # Date header will vary so remove it
                frag_headers[node['index']] = headers
            except direct_client.DirectClientException as err:
                failures.append((node, err))
        if failures:
            self.fail('\n'.join([
                '    Node %r raised %r' % (self._format_node(node), exc)
                for (node, exc) in failures
            ]))
        return frag_headers, frag_etags

    @contextmanager
    def _annotate_failure_with_scenario(self, failed, non_durable):
        try:
            yield
        except (AssertionError, ClientException) as err:
            self.fail(
                'Scenario with failed nodes: %r, non-durable nodes: %r\n'
                ' failed with:\n%s' %
                ([self._format_node(self.onodes[n]) for n in failed
                  ], [self._format_node(self.onodes[n])
                      for n in non_durable], err))

    def _test_rebuild_scenario(self, failed, non_durable,
                               reconstructor_cycles):
        # helper method to test a scenario with some nodes missing their
        # fragment and some nodes having non-durable fragments
        with self._annotate_failure_with_scenario(failed, non_durable):
            self._break_nodes(failed, non_durable)

        # make sure we can still GET the object and it is correct; the
        # proxy is doing decode on remaining fragments to get the obj
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])

        # fire up reconstructor
        for i in range(reconstructor_cycles):
            self.reconstructor.once()

        # check GET via proxy returns expected data and metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])
        # check all frags are intact, durable and have expected metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            frag_headers, frag_etags = self._assert_all_nodes_have_frag()
            self.assertEqual(self.frag_etags, frag_etags)
            # self._frag_headers include X-Backend-Durable-Timestamp so this
            # assertion confirms that the rebuilt frags are all durable
            self.assertEqual(self.frag_headers, frag_headers)

    def test_rebuild_missing_frags(self):
        # build up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5), )
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for failed_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario(failed_nodes, [], 1)

    def test_rebuild_non_durable_frags(self):
        # build up a list of node lists to make non-durable,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5), )
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for non_durable_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario([], non_durable_nodes, 1)

    def test_rebuild_with_missing_frags_and_non_durable_frags(self):
        # pick some nodes with parts deleted, some with non-durable fragments
        scenarios = [
            # failed, non-durable
            ((0, 2), (4, )),
            ((0, 4), (2, )),
        ]
        for failed, non_durable in scenarios:
            self._test_rebuild_scenario(failed, non_durable, 3)
        scenarios = [
            # failed, non-durable
            ((0, 1), (2, )),
            ((0, 2), (1, )),
        ]
        for failed, non_durable in scenarios:
            # why 2 repeats? consider missing fragment on nodes 0, 1  and
            # missing durable on node 2: first reconstructor cycle on node 3
            # will make node 2 durable, first cycle on node 5 will rebuild on
            # node 0; second cycle on node 0 or 2 will rebuild on node 1. Note
            # that it is possible, that reconstructor processes on each node
            # run in order such that all rebuild complete in once cycle, but
            # that is not guaranteed, we allow 2 cycles to be sure.
            self._test_rebuild_scenario(failed, non_durable, 2)
        scenarios = [
            # failed, non-durable
            ((0, 2), (1, 3, 5)),
            ((0, ), (1, 2, 4, 5)),
        ]
        for failed, non_durable in scenarios:
            # why 3 repeats? consider missing fragment on node 0 and single
            # durable on node 3: first reconstructor cycle on node 3 will make
            # nodes 2 and 4 durable, second cycle on nodes 2 and 4 will make
            # node 1 and 5 durable, third cycle on nodes 1 or 5 will
            # reconstruct the missing fragment on node 0.
            self._test_rebuild_scenario(failed, non_durable, 3)

    def test_rebuild_partner_down(self):
        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in self.onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(
            _get_partners(primary_node['index'], self.onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [
            n for n in self.onodes
            if n['id'] not in (primary_node['id'], partner_node['id'])
        ][0]
        # ... capture it's fragment etag
        failed_primary_meta, failed_primary_etag = self.direct_get(
            failed_primary, self.opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=self.opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        failed_primary_meta_new, failed_primary_etag_new = self.direct_get(
            failed_primary, self.opart)
        del failed_primary_meta['Date']
        del failed_primary_meta_new['Date']
        self.assertEqual(failed_primary_etag, failed_primary_etag_new)
        self.assertEqual(failed_primary_meta, failed_primary_meta_new)

        # just to be nice
        self.revive_drive(device_path)
Exemplo n.º 15
0
class TestObjectExpirer(ReplProbeTest):
    def setUp(self):
        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise unittest.SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        super(TestObjectExpirer, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def _check_obj_in_container_listing(self):
        for obj in self.client.iter_objects(self.account, self.container_name):

            if self.object_name == obj['name']:
                return True

        return False

    @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy")
    def test_expirer_object_split_brain(self):
        old_policy = random.choice(ENABLED_POLICIES)
        wrong_policy = random.choice(
            [p for p in ENABLED_POLICIES if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        self.get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assertIn('x-backend-timestamp', metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirer again after replication
        self.expirer.once()

        # object is not in the listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in ENABLED_POLICIES:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4, ),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assertIn('x-backend-timestamp', metadata)
                self.assertGreater(Timestamp(metadata['x-backend-timestamp']),
                                   create_timestamp)

    def test_expirer_doesnt_make_async_pendings(self):
        # The object expirer cleans up its own queue. The inner loop
        # basically looks like this:
        #
        #    for obj in stuff_to_delete:
        #        delete_the_object(obj)
        #        remove_the_queue_entry(obj)
        #
        # By default, upon receipt of a DELETE request for an expiring
        # object, the object servers will create async_pending records to
        # clean the expirer queue. Since the expirer cleans its own queue,
        # this is unnecessary. The expirer can make requests in such a way
        # tha the object server does not write out any async pendings; this
        # test asserts that this is the case.

        # Make an expiring object in each policy
        for policy in ENABLED_POLICIES:
            container_name = "expirer-test-%d" % policy.idx
            container_headers = {'X-Storage-Policy': policy.name}
            client.put_container(self.url,
                                 self.token,
                                 container_name,
                                 headers=container_headers)

            now = time.time()
            delete_at = int(now + 2.0)
            client.put_object(self.url,
                              self.token,
                              container_name,
                              "some-object",
                              headers={
                                  'X-Delete-At': str(delete_at),
                                  'X-Timestamp': Timestamp(now).normal
                              },
                              contents='dontcare')

        time.sleep(2.0)
        # make sure auto-created expirer-queue containers get in the account
        # listing so the expirer can find them
        Manager(['container-updater']).once()

        # Make sure there's no async_pendings anywhere. Probe tests only run
        # on single-node installs anyway, so this set should be small enough
        # that an exhaustive check doesn't take too long.
        all_obj_nodes = self.get_all_object_nodes()
        pendings_before = self.gather_async_pendings(all_obj_nodes)

        # expire the objects
        Manager(['object-expirer']).once()
        pendings_after = self.gather_async_pendings(all_obj_nodes)
        self.assertEqual(pendings_after, pendings_before)

    def test_expirer_object_should_not_be_expired(self):

        # Current object-expirer checks the correctness via x-if-delete-at
        # header that it can be deleted by expirer. If there are objects
        # either which doesn't have x-delete-at header as metadata or which
        # has different x-delete-at value from x-if-delete-at value,
        # object-expirer's delete will fail as 412 PreconditionFailed.
        # However, if some of the objects are in handoff nodes, the expirer
        # can put the tombstone with the timestamp as same as x-delete-at and
        # the object consistency will be resolved as the newer timestamp will
        # be winner (in particular, overwritten case w/o x-delete-at). This
        # test asserts such a situation that, at least, the overwriten object
        # which have larger timestamp than the original expirered date should
        # be safe.

        def put_object(headers):
            # use internal client to PUT objects so that X-Timestamp in headers
            # is effective
            headers['Content-Length'] = '0'
            path = self.client.make_path(self.account, self.container_name,
                                         self.object_name)
            try:
                self.client.make_request('PUT', path, headers, (2, ))
            except UnexpectedResponse as e:
                self.fail('Expected 201 for PUT object but got %s' %
                          e.resp.status)

        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated)
        #   < T(expirer_executed)
        # Recreated obj should be appeared in any split brain case

        obj_brain.put_container()

        # T(obj_deleted with x-delete-at)
        # object-server accepts req only if X-Delete-At is later than 'now'
        # so here, T(obj_created) < T(obj_deleted with x-delete-at)
        now = time.time()
        delete_at = int(now + 2.0)
        recreate_at = delete_at + 1.0
        put_object(headers={
            'X-Delete-At': str(delete_at),
            'X-Timestamp': Timestamp(now).normal
        })

        # some object servers stopped to make a situation that the
        # object-expirer can put tombstone in the primary nodes.
        obj_brain.stop_primary_half()

        # increment the X-Timestamp explicitly
        # (will be T(obj_deleted with x-delete-at) < T(obj_recreated))
        put_object(
            headers={
                'X-Object-Meta-Expired': 'False',
                'X-Timestamp': Timestamp(recreate_at).normal
            })

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # sanity, the newer object is still there
        try:
            metadata = self.client.get_object_metadata(self.account,
                                                       self.container_name,
                                                       self.object_name)
        except UnexpectedResponse as e:
            self.fail('Expected 200 for HEAD object but got %s' %
                      e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

        # some object servers recovered
        obj_brain.start_primary_half()

        # sleep until after recreated_at
        while time.time() <= recreate_at:
            time.sleep(0.1)
        # Now, expirer runs at the time after obj is recreated
        self.expirer.once()

        # verify that original object was deleted by expirer
        obj_brain.stop_handoff_half()
        try:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4, ))
        except UnexpectedResponse as e:
            self.fail('Expected 404 for HEAD object but got %s' %
                      e.resp.status)
        obj_brain.start_handoff_half()

        # and inconsistent state of objects is recovered by replicator
        Manager(['object-replicator']).once()

        # check if you can get recreated object
        try:
            metadata = self.client.get_object_metadata(self.account,
                                                       self.container_name,
                                                       self.object_name)
        except UnexpectedResponse as e:
            self.fail('Expected 200 for HEAD object but got %s' %
                      e.resp.status)

        self.assertIn('x-object-meta-expired', metadata)

    def _test_expirer_delete_outdated_object_version(self, object_exists):
        # This test simulates a case where the expirer tries to delete
        # an outdated version of an object.
        # One case is where the expirer gets a 404, whereas the newest version
        # of the object is offline.
        # Another case is where the expirer gets a 412, since the old version
        # of the object mismatches the expiration time sent by the expirer.
        # In any of these cases, the expirer should retry deleting the object
        # later, for as long as a reclaim age has not passed.
        obj_brain = BrainSplitter(self.url, self.token, self.container_name,
                                  self.object_name, 'object', self.policy)

        obj_brain.put_container()

        if object_exists:
            obj_brain.put_object()

        # currently, the object either doesn't exist, or does not have
        # an expiration

        # stop primary servers and put a newer version of the object, this
        # time with an expiration. only the handoff servers will have
        # the new version
        obj_brain.stop_primary_half()
        now = time.time()
        delete_at = int(now + 2.0)
        obj_brain.put_object({'X-Delete-At': str(delete_at)})

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()

        # update object record in the container listing
        Manager(['container-replicator']).once()

        # take handoff servers down, and bring up the outdated primary servers
        obj_brain.start_primary_half()
        obj_brain.stop_handoff_half()

        # wait until object expiration time
        while time.time() <= delete_at:
            time.sleep(0.1)

        # run expirer against the outdated servers. it should fail since
        # the outdated version does not match the expiration time
        self.expirer.once()

        # bring all servers up, and run replicator to update servers
        obj_brain.start_handoff_half()
        Manager(['object-replicator']).once()

        # verify the deletion has failed by checking the container listing
        self.assertTrue(self._check_obj_in_container_listing(),
                        msg='Did not find listing for %s' % self.object_name)

        # run expirer again, delete should now succeed
        self.expirer.once()

        # verify the deletion by checking the container listing
        self.assertFalse(self._check_obj_in_container_listing(),
                         msg='Found listing for %s' % self.object_name)

    def test_expirer_delete_returns_outdated_404(self):
        self._test_expirer_delete_outdated_object_version(object_exists=False)

    def test_expirer_delete_returns_outdated_412(self):
        self._test_expirer_delete_outdated_object_version(object_exists=True)

    def test_slo_async_delete(self):
        if not self.cluster_info.get('slo', {}).get('allow_async_delete'):
            raise unittest.SkipTest('allow_async_delete not enabled')

        segment_container = self.container_name + '_segments'
        client.put_container(self.url, self.token, self.container_name, {})
        client.put_container(self.url, self.token, segment_container, {})
        client.put_object(self.url, self.token, segment_container, 'segment_1',
                          b'1234')
        client.put_object(self.url, self.token, segment_container, 'segment_2',
                          b'5678')
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          'slo',
                          json.dumps([
                              {
                                  'path': segment_container + '/segment_1'
                              },
                              {
                                  'data': 'Cg=='
                              },
                              {
                                  'path': segment_container + '/segment_2'
                              },
                          ]),
                          query_string='multipart-manifest=put')
        _, body = client.get_object(self.url, self.token, self.container_name,
                                    'slo')
        self.assertEqual(body, b'1234\n5678')

        client.delete_object(
            self.url,
            self.token,
            self.container_name,
            'slo',
            query_string='multipart-manifest=delete&async=true')

        # Object's deleted
        _, objects = client.get_container(self.url, self.token,
                                          self.container_name)
        self.assertEqual(objects, [])
        with self.assertRaises(client.ClientException) as caught:
            client.get_object(self.url, self.token, self.container_name, 'slo')
        self.assertEqual(404, caught.exception.http_status)

        # But segments are still around and accessible
        _, objects = client.get_container(self.url, self.token,
                                          segment_container)
        self.assertEqual([o['name'] for o in objects],
                         ['segment_1', 'segment_2'])
        _, body = client.get_object(self.url, self.token, segment_container,
                                    'segment_1')
        self.assertEqual(body, b'1234')
        _, body = client.get_object(self.url, self.token, segment_container,
                                    'segment_2')
        self.assertEqual(body, b'5678')

        # make sure auto-created expirer-queue containers get in the account
        # listing so the expirer can find them
        Manager(['container-updater']).once()
        self.expirer.once()

        # Now the expirer has cleaned up the segments
        _, objects = client.get_container(self.url, self.token,
                                          segment_container)
        self.assertEqual(objects, [])
        with self.assertRaises(client.ClientException) as caught:
            client.get_object(self.url, self.token, segment_container,
                              'segment_1')
        self.assertEqual(404, caught.exception.http_status)
        with self.assertRaises(client.ClientException) as caught:
            client.get_object(self.url, self.token, segment_container,
                              'segment_2')
        self.assertEqual(404, caught.exception.http_status)
Exemplo n.º 16
0
class TestObjectExpirer(ReplProbeTest):
    def setUp(self):
        if len(ENABLED_POLICIES) < 2:
            raise SkipTest("Need more than one policy")

        self.expirer = Manager(["object-expirer"])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise SkipTest("Unable to verify object-expirer service")

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, "probe-test", 3)

        super(TestObjectExpirer, self).setUp()
        self.container_name = "container-%s" % uuid.uuid4()
        self.object_name = "object-%s" % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name)

    def test_expirer_object_split_brain(self):
        old_policy = random.choice(ENABLED_POLICIES)
        wrong_policy = random.choice([p for p in ENABLED_POLICIES if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={"X-Delete-After": 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={"X-Backend-Storage-Policy-Index": int(old_policy)},
        )
        create_timestamp = Timestamp(metadata["x-timestamp"])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(["object-updater"]).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(["container-updater"]).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        self.get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4,),
            headers={"X-Backend-Storage-Policy-Index": int(old_policy)},
        )
        self.assertTrue("x-backend-timestamp" in metadata)
        self.assertEqual(Timestamp(metadata["x-backend-timestamp"]), create_timestamp)

        # but it is still in the listing
        for obj in self.client.iter_objects(self.account, self.container_name):
            if self.object_name == obj["name"]:
                break
        else:
            self.fail("Did not find listing for %s" % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirier again after replication
        self.expirer.once()

        # object is not in the listing
        for obj in self.client.iter_objects(self.account, self.container_name):
            if self.object_name == obj["name"]:
                self.fail("Found listing for %s" % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in ENABLED_POLICIES:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4,),
                headers={"X-Backend-Storage-Policy-Index": int(policy)},
            )
            if "x-backend-timestamp" in metadata:
                if found_in_policy:
                    self.fail("found object in %s and also %s" % (found_in_policy, policy))
                found_in_policy = policy
                self.assertTrue("x-backend-timestamp" in metadata)
                self.assertTrue(Timestamp(metadata["x-backend-timestamp"]) > create_timestamp)
Exemplo n.º 17
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """
    def setUp(self):
        p = Popen("resetswift 2>&1", shell=True, stdout=PIPE)
        stdout, _stderr = p.communicate()
        print stdout
        Manager(['all']).stop()
        self.pids = {}
        try:
            self.account_ring = get_ring('account',
                                         self.acct_cont_required_replicas,
                                         self.acct_cont_required_devices)
            self.container_ring = get_ring('container',
                                           self.acct_cont_required_replicas,
                                           self.acct_cont_required_devices)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(self.policy.ring_name,
                                        self.obj_required_replicas,
                                        self.obj_required_devices,
                                        server='object')
            Manager(['main']).start(wait=False)
            self.port2server = {}
            for server, port in [('account', 6002), ('container', 6001),
                                 ('object', 6000)]:
                for number in xrange(1, 9):
                    self.port2server[port + (number * 10)] = \
                        '%s%d' % (server, number)
            for port in self.port2server:
                check_server(port, self.port2server, self.pids)
            self.port2server[8080] = 'proxy'
            self.url, self.token, self.account = \
                check_server(8080, self.port2server, self.pids)
            self.configs = defaultdict(dict)
            for name in ('account', 'container', 'object'):
                for server_name in (name, '%s-replicator' % name):
                    for server in Manager([server_name]):
                        for i, conf in enumerate(server.conf_files(), 1):
                            self.configs[server.server][i] = conf
            self.replicators = Manager([
                'account-replicator', 'container-replicator',
                'object-replicator'
            ])
            self.updaters = Manager(['container-updater', 'object-updater'])
            self.server_port_to_conf = {}
            # get some configs backend daemon configs loaded up
            for server in ('account', 'container', 'object'):
                self.server_port_to_conf[server] = build_port_to_conf(server)
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        conf = self.server_port_to_conf[server][node['port']]
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            node['port'], self.port2server)
        return config_number

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()
Exemplo n.º 18
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def _load_rings_and_configs(self):
        self.ipport2server = {}
        self.configs = defaultdict(dict)
        self.account_ring = get_ring(
            'account',
            self.acct_cont_required_replicas,
            self.acct_cont_required_devices,
            ipport2server=self.ipport2server,
            config_paths=self.configs)
        self.container_ring = get_ring(
            'container',
            self.acct_cont_required_replicas,
            self.acct_cont_required_devices,
            ipport2server=self.ipport2server,
            config_paths=self.configs)
        self.policy = get_policy(**self.policy_requirements)
        self.object_ring = get_ring(
            self.policy.ring_name,
            self.obj_required_replicas,
            self.obj_required_devices,
            server='object',
            ipport2server=self.ipport2server,
            config_paths=self.configs)

    def setUp(self):
        resetswift()
        kill_orphans()
        self._load_rings_and_configs()
        try:
            self.servers_per_port = any(
                int(readconf(c, section_name='object-replicator').get(
                    'servers_per_port', '0'))
                for c in self.configs['object-replicator'].values())

            Manager(['main']).start(wait=True)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server)
            proxy_ipport = ('127.0.0.1', 8080)
            self.ipport2server[proxy_ipport] = 'proxy'
            self.url, self.token, self.account = check_server(
                proxy_ipport, self.ipport2server)
            self.account_1 = {
                'url': self.url, 'token': self.token, 'account': self.account}

            rv = _retry_timeout(_check_proxy, args=(
                proxy_ipport, 'test2:tester2', 'testing2'))
            self.account_2 = {
                k: v for (k, v) in zip(('url', 'token', 'account'), rv)}

            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass
        info_url = "%s://%s/info" % (urlparse(self.url).scheme,
                                     urlparse(self.url).netloc)
        proxy_conn = client.http_connection(info_url)
        self.cluster_info = client.get_capabilities(proxy_conn)

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        repl_server = '%s-replicator' % server_type
        conf = readconf(self.configs[repl_server][config_number],
                        section_name=repl_server)
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1['ip'] == node2['ip']

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1['port'] == node2['port']

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(disabled_name, device)
        else:
            os.system('sudo mount %s' % device)

    def make_internal_client(self):
        tempdir = mkdtemp()
        try:
            conf_path = os.path.join(tempdir, 'internal_client.conf')
            conf_body = """
            [DEFAULT]
            swift_dir = /etc/swift

            [pipeline:main]
            pipeline = catch_errors cache copy proxy-server

            [app:proxy-server]
            use = egg:swift#proxy

            [filter:copy]
            use = egg:swift#copy

            [filter:cache]
            use = egg:swift#memcache

            [filter:catch_errors]
            use = egg:swift#catch_errors
            """
            with open(conf_path, 'w') as f:
                f.write(dedent(conf_body))
            return internal_client.InternalClient(conf_path, 'test', 1)
        finally:
            shutil.rmtree(tempdir)

    def get_all_object_nodes(self):
        """
        Returns a list of all nodes in all object storage policies.

        :return: a list of node dicts.
        """
        all_obj_nodes = {}
        for policy in ENABLED_POLICIES:
            for dev in policy.object_ring.devs:
                all_obj_nodes[dev['device']] = dev
        return all_obj_nodes.values()

    def gather_async_pendings(self, onodes):
        """
        Returns a list of paths to async pending files found on given nodes.

        :param onodes: a list of nodes.
        :return: a list of file paths.
        """
        async_pendings = []
        for onode in onodes:
            device_dir = self.device_dir('', onode)
            for ap_pol_dir in os.listdir(device_dir):
                if not ap_pol_dir.startswith('async_pending'):
                    # skip 'objects', 'containers', etc.
                    continue
                async_pending_dir = os.path.join(device_dir, ap_pol_dir)
                try:
                    ap_dirs = os.listdir(async_pending_dir)
                except OSError as err:
                    if err.errno == errno.ENOENT:
                        pass
                    else:
                        raise
                else:
                    for ap_dir in ap_dirs:
                        ap_dir_fullpath = os.path.join(
                            async_pending_dir, ap_dir)
                        async_pendings.extend([
                            os.path.join(ap_dir_fullpath, ent)
                            for ent in os.listdir(ap_dir_fullpath)])
        return async_pendings
    def test_reconciler_move_object_twice(self):
        # select some policies
        old_policy = random.choice(ENABLED_POLICIES)
        new_policy = random.choice(
            [p for p in ENABLED_POLICIES if p != old_policy])

        # setup a split brain
        self.brain.stop_handoff_half()
        # get old_policy on two primaries
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.start_handoff_half()
        self.brain.stop_primary_half()
        # force a recreate on handoffs
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.delete_container()
        self.brain.put_container(policy_index=int(new_policy))
        self.brain.put_object()  # populate memcache with new_policy
        self.brain.start_primary_half()

        # at this point two primaries have old policy
        container_part, container_nodes = self.container_ring.get_nodes(
            self.account, self.container_name)
        head_responses = [
            (node,
             direct_client.direct_head_container(node, container_part,
                                                 self.account,
                                                 self.container_name))
            for node in container_nodes
        ]
        old_container_nodes = [
            node for node, metadata in head_responses if int(old_policy) ==
            int(metadata['X-Backend-Storage-Policy-Index'])
        ]
        self.assertEqual(2, len(old_container_nodes))

        # hopefully memcache still has the new policy cached
        self.brain.put_object(headers={'x-object-meta-test': 'custom-meta'},
                              contents=b'VERIFY')
        # double-check object correctly written to new policy
        conf_files = []
        for server in Manager(['container-reconciler']).servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        int_client = InternalClient(conf_file, 'probe-test', 3)
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # shutdown the containers that know about the new policy
        self.brain.stop_handoff_half()

        # and get rows enqueued from old nodes
        for server_type in ('container-replicator', 'container-updater'):
            server = Manager([server_type])
            for node in old_container_nodes:
                server.once(number=self.config_number(node))

        # verify entry in the queue for the "misplaced" new_policy
        for container in int_client.iter_containers(MISPLACED_OBJECTS_ACCOUNT):
            for obj in int_client.iter_objects(MISPLACED_OBJECTS_ACCOUNT,
                                               container['name']):
                expected = '%d:/%s/%s/%s' % (new_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # verify object in old_policy
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # verify object is *not* in new_policy
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})

        self.get_to_final_state()

        # verify entry in the queue
        for container in int_client.iter_containers(MISPLACED_OBJECTS_ACCOUNT):
            for obj in int_client.iter_objects(MISPLACED_OBJECTS_ACCOUNT,
                                               container['name']):
                expected = '%d:/%s/%s/%s' % (old_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # and now it flops back
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        int_client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # make sure the queue is settled
        self.get_to_final_state()
        for container in int_client.iter_containers(MISPLACED_OBJECTS_ACCOUNT):
            for obj in int_client.iter_objects(MISPLACED_OBJECTS_ACCOUNT,
                                               container['name']):
                self.fail('Found unexpected object %r in the queue' % obj)

        # verify that the object data read by external client is correct
        headers, data = self._get_object_patiently(int(new_policy))
        self.assertEqual(b'VERIFY', data)
        self.assertEqual('custom-meta', headers['x-object-meta-test'])
Exemplo n.º 20
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def setUp(self):
        resetswift()
        self.pids = {}
        try:
            self.ipport2server = {}
            self.configs = defaultdict(dict)
            self.account_ring = get_ring(
                "account",
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs,
            )
            self.container_ring = get_ring(
                "container",
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs,
            )
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(
                self.policy.ring_name,
                self.obj_required_replicas,
                self.obj_required_devices,
                server="object",
                ipport2server=self.ipport2server,
                config_paths=self.configs,
            )

            self.servers_per_port = any(
                int(readconf(c, section_name="object-replicator").get("servers_per_port", "0"))
                for c in self.configs["object-replicator"].values()
            )

            Manager(["main"]).start(wait=False)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server, self.pids)
            proxy_ipport = ("127.0.0.1", 8080)
            self.ipport2server[proxy_ipport] = "proxy"
            self.url, self.token, self.account = check_server(proxy_ipport, self.ipport2server, self.pids)
            self.replicators = Manager(["account-replicator", "container-replicator", "object-replicator"])
            self.updaters = Manager(["container-updater", "object-updater"])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(["all"]).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(["all"]).kill()

    def device_dir(self, server, node):
        server_type, config_number = get_server_number((node["ip"], node["port"]), self.ipport2server)
        repl_server = "%s-replicator" % server_type
        conf = readconf(self.configs[repl_server][config_number], section_name=repl_server)
        return os.path.join(conf["devices"], node["device"])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number((node["ip"], node["port"]), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1["ip"] == node2["ip"]

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1["port"] == node2["port"]

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system("sudo umount %s" % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(device + "X", device)
        else:
            os.system("sudo mount %s" % device)
Exemplo n.º 21
0
class TestObjectExpirer(unittest.TestCase):

    def setUp(self):
        if len(POLICIES) < 2:
            raise SkipTest('Need more than one policy')

        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        (self.pids, self.port2server, self.account_ring, self.container_ring,
         self.object_ring, self.policy, self.url, self.token,
         self.account, self.configs) = reset_environment()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def test_expirer_object_split_brain(self):
        old_policy = random.choice(list(POLICIES))
        wrong_policy = random.choice([p for p in POLICIES if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assert_('x-backend-timestamp' in metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        for obj in self.client.iter_objects(self.account,
                                            self.container_name):
            if self.object_name == obj['name']:
                break
        else:
            self.fail('Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirier again after replication
        self.expirer.once()

        # object is not in the listing
        for obj in self.client.iter_objects(self.account,
                                            self.container_name):
            if self.object_name == obj['name']:
                self.fail('Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in POLICIES:
            metadata = self.client.get_object_metadata(
                self.account, self.container_name, self.object_name,
                acceptable_statuses=(4,),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assert_('x-backend-timestamp' in metadata)
                self.assert_(Timestamp(metadata['x-backend-timestamp']) >
                             create_timestamp)
Exemplo n.º 22
0
class TestObjectExpirer(unittest.TestCase):
    def setUp(self):
        if len(POLICIES) < 2:
            raise SkipTest('Need more than one policy')

        self.expirer = Manager(['object-expirer'])
        self.expirer.start()
        err = self.expirer.stop()
        if err:
            raise SkipTest('Unable to verify object-expirer service')

        conf_files = []
        for server in self.expirer.servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        self.client = InternalClient(conf_file, 'probe-test', 3)

        (self.pids, self.port2server, self.account_ring, self.container_ring,
         self.object_ring, self.policy, self.url, self.token, self.account,
         self.configs) = reset_environment()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
                                   self.object_name)

    def test_expirer_object_split_brain(self):
        old_policy = random.choice(list(POLICIES))
        wrong_policy = random.choice([p for p in POLICIES if p != old_policy])
        # create an expiring object and a container with the wrong policy
        self.brain.stop_primary_half()
        self.brain.put_container(int(old_policy))
        self.brain.put_object(headers={'X-Delete-After': 2})
        # get the object timestamp
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        create_timestamp = Timestamp(metadata['x-timestamp'])
        self.brain.start_primary_half()
        # get the expiring object updates in their queue, while we have all
        # the servers up
        Manager(['object-updater']).once()
        self.brain.stop_handoff_half()
        self.brain.put_container(int(wrong_policy))
        # don't start handoff servers, only wrong policy is available

        # make sure auto-created containers get in the account listing
        Manager(['container-updater']).once()
        # this guy should no-op since it's unable to expire the object
        self.expirer.once()

        self.brain.start_handoff_half()
        get_to_final_state()

        # validate object is expired
        found_in_policy = None
        metadata = self.client.get_object_metadata(
            self.account,
            self.container_name,
            self.object_name,
            acceptable_statuses=(4, ),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
        self.assert_('x-backend-timestamp' in metadata)
        self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
                         create_timestamp)

        # but it is still in the listing
        for obj in self.client.iter_objects(self.account, self.container_name):
            if self.object_name == obj['name']:
                break
        else:
            self.fail('Did not find listing for %s' % self.object_name)

        # clear proxy cache
        client.post_container(self.url, self.token, self.container_name, {})
        # run the expirier again after replication
        self.expirer.once()

        # object is not in the listing
        for obj in self.client.iter_objects(self.account, self.container_name):
            if self.object_name == obj['name']:
                self.fail('Found listing for %s' % self.object_name)

        # and validate object is tombstoned
        found_in_policy = None
        for policy in POLICIES:
            metadata = self.client.get_object_metadata(
                self.account,
                self.container_name,
                self.object_name,
                acceptable_statuses=(4, ),
                headers={'X-Backend-Storage-Policy-Index': int(policy)})
            if 'x-backend-timestamp' in metadata:
                if found_in_policy:
                    self.fail('found object in %s and also %s' %
                              (found_in_policy, policy))
                found_in_policy = policy
                self.assert_('x-backend-timestamp' in metadata)
                self.assert_(
                    Timestamp(metadata['x-backend-timestamp']) >
                    create_timestamp)
Exemplo n.º 23
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """
    def setUp(self):
        p = Popen("resetswift 2>&1", shell=True, stdout=PIPE)
        stdout, _stderr = p.communicate()
        print stdout
        Manager(['all']).stop()
        self.pids = {}
        try:
            self.ipport2server = {}
            self.configs = defaultdict(dict)
            self.account_ring = get_ring('account',
                                         self.acct_cont_required_replicas,
                                         self.acct_cont_required_devices,
                                         ipport2server=self.ipport2server,
                                         config_paths=self.configs)
            self.container_ring = get_ring('container',
                                           self.acct_cont_required_replicas,
                                           self.acct_cont_required_devices,
                                           ipport2server=self.ipport2server,
                                           config_paths=self.configs)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(self.policy.ring_name,
                                        self.obj_required_replicas,
                                        self.obj_required_devices,
                                        server='object',
                                        ipport2server=self.ipport2server,
                                        config_paths=self.configs)

            self.servers_per_port = any(
                int(
                    readconf(c, section_name='object-replicator').get(
                        'servers_per_port', '0'))
                for c in self.configs['object-replicator'].values())

            Manager(['main']).start(wait=False)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server, self.pids)
            proxy_ipport = ('127.0.0.1', 8080)
            self.ipport2server[proxy_ipport] = 'proxy'
            self.url, self.token, self.account = check_server(
                proxy_ipport, self.ipport2server, self.pids)
            self.replicators = Manager([
                'account-replicator', 'container-replicator',
                'object-replicator'
            ])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        repl_server = '%s-replicator' % server_type
        conf = readconf(self.configs[repl_server][config_number],
                        section_name=repl_server)
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1['ip'] == node2['ip']

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1['port'] == node2['port']

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(device + "X", device)
        else:
            os.system('sudo mount %s' % device)
Exemplo n.º 24
0
class TestReconstructorPropDurable(ECProbeTest):
    def setUp(self):
        super(TestReconstructorPropDurable, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(node,
                                                        part,
                                                        self.account,
                                                        self.container_name,
                                                        self.object_name,
                                                        headers=req_headers,
                                                        resp_chunk_size=64 *
                                                        2**20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)

        # remove the .durable from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        for dirs, subdirs, files in os.walk(part_dir):
            for fname in files:
                if fname.endswith('.durable'):
                    durable = os.path.join(dirs, fname)
                    os.remove(durable)
                    break
        try:
            os.remove(os.path.join(part_dir, 'hashes.pkl'))
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

        # fire up reconstructor to propogate the .durable
        self.reconstructor.once()

        # fragment is still exactly as it was before!
        self.assertEqual(fragment_archive_etag, self.direct_get(node, part))

        # check meta
        meta = client.head_object(self.url, self.token, self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}

        etag = client.put_object(self.url,
                                 self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents,
                                 headers=headers)
        client.post_object(self.url,
                           self.token,
                           self.container_name,
                           self.object_name,
                           headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?

        # built up a list of node lists to kill a .durable from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]

        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' %
                        (self._format_node(onode),
                         [self._format_node(n) for n in node_list]))
Exemplo n.º 25
0
class TestReconstructorRebuild(ECProbeTest):

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object and POST some metadata
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        self.headers_post = {'x-object-meta-bar': 'meta-bar'}

        self.etag = client.put_object(self.url, self.token,
                                      self.container_name,
                                      self.object_name,
                                      contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=dict(self.headers_post))

        self.opart, self.onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # stash frag etags and metadata for later comparison
        self.frag_headers, self.frag_etags = self._assert_all_nodes_have_frag()
        for node_index, hdrs in self.frag_headers.items():
            # sanity check
            self.assertIn(
                'X-Backend-Durable-Timestamp', hdrs,
                'Missing durable timestamp in %r' % self.frag_headers)

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return headers, resp_checksum.hexdigest()

    def direct_get(self, node, part, require_durable=True):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        if not require_durable:
            req_headers.update(
                {'X-Backend-Fragment-Preferences': json.dumps([])})
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return headers, hasher.hexdigest()

    def _break_nodes(self, failed, non_durable):
        # delete partitions on the failed nodes and remove durable marker from
        # non-durable nodes
        for i, node in enumerate(self.onodes):
            part_dir = self.storage_dir('object', node, part=self.opart)
            if i in failed:
                shutil.rmtree(part_dir, True)
                try:
                    self.direct_get(node, self.opart)
                except direct_client.DirectClientException as err:
                    self.assertEqual(err.http_status, 404)
            elif i in non_durable:
                for dirs, subdirs, files in os.walk(part_dir):
                    for fname in files:
                        if fname.endswith('.data'):
                            non_durable_fname = fname.replace('#d', '')
                            os.rename(os.path.join(dirs, fname),
                                      os.path.join(dirs, non_durable_fname))
                            break
                headers, etag = self.direct_get(node, self.opart,
                                                require_durable=False)
                self.assertNotIn('X-Backend-Durable-Timestamp', headers)
            try:
                os.remove(os.path.join(part_dir, 'hashes.pkl'))
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def _assert_all_nodes_have_frag(self):
        # check all frags are in place
        failures = []
        frag_etags = {}
        frag_headers = {}
        for node in self.onodes:
            try:
                headers, etag = self.direct_get(node, self.opart)
                frag_etags[node['index']] = etag
                del headers['Date']  # Date header will vary so remove it
                frag_headers[node['index']] = headers
            except direct_client.DirectClientException as err:
                failures.append((node, err))
        if failures:
            self.fail('\n'.join(['    Node %r raised %r' %
                                 (self._format_node(node), exc)
                                 for (node, exc) in failures]))
        return frag_headers, frag_etags

    @contextmanager
    def _annotate_failure_with_scenario(self, failed, non_durable):
        try:
            yield
        except (AssertionError, ClientException) as err:
            self.fail(
                'Scenario with failed nodes: %r, non-durable nodes: %r\n'
                ' failed with:\n%s' %
                ([self._format_node(self.onodes[n]) for n in failed],
                 [self._format_node(self.onodes[n]) for n in non_durable], err)
            )

    def _test_rebuild_scenario(self, failed, non_durable,
                               reconstructor_cycles):
        # helper method to test a scenario with some nodes missing their
        # fragment and some nodes having non-durable fragments
        with self._annotate_failure_with_scenario(failed, non_durable):
            self._break_nodes(failed, non_durable)

        # make sure we can still GET the object and it is correct; the
        # proxy is doing decode on remaining fragments to get the obj
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])

        # fire up reconstructor
        for i in range(reconstructor_cycles):
            self.reconstructor.once()

        # check GET via proxy returns expected data and metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])
        # check all frags are intact, durable and have expected metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            frag_headers, frag_etags = self._assert_all_nodes_have_frag()
            self.assertEqual(self.frag_etags, frag_etags)
            # self._frag_headers include X-Backend-Durable-Timestamp so this
            # assertion confirms that the rebuilt frags are all durable
            self.assertEqual(self.frag_headers, frag_headers)

    def test_rebuild_missing_frags(self):
        # build up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5),)
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for failed_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario(failed_nodes, [], 1)

    def test_rebuild_non_durable_frags(self):
        # build up a list of node lists to make non-durable,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5),)
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for non_durable_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario([], non_durable_nodes, 1)

    def test_rebuild_with_missing_frags_and_non_durable_frags(self):
        # pick some nodes with parts deleted, some with non-durable fragments
        scenarios = [
            # failed, non-durable
            ((0, 2), (4,)),
            ((0, 4), (2,)),
        ]
        for failed, non_durable in scenarios:
            self._test_rebuild_scenario(failed, non_durable, 3)
        scenarios = [
            # failed, non-durable
            ((0, 1), (2,)),
            ((0, 2), (1,)),
        ]
        for failed, non_durable in scenarios:
            # why 2 repeats? consider missing fragment on nodes 0, 1  and
            # missing durable on node 2: first reconstructor cycle on node 3
            # will make node 2 durable, first cycle on node 5 will rebuild on
            # node 0; second cycle on node 0 or 2 will rebuild on node 1. Note
            # that it is possible, that reconstructor processes on each node
            # run in order such that all rebuild complete in once cycle, but
            # that is not guaranteed, we allow 2 cycles to be sure.
            self._test_rebuild_scenario(failed, non_durable, 2)
        scenarios = [
            # failed, non-durable
            ((0, 2), (1, 3, 5)),
            ((0,), (1, 2, 4, 5)),
        ]
        for failed, non_durable in scenarios:
            # why 3 repeats? consider missing fragment on node 0 and single
            # durable on node 3: first reconstructor cycle on node 3 will make
            # nodes 2 and 4 durable, second cycle on nodes 2 and 4 will make
            # node 1 and 5 durable, third cycle on nodes 1 or 5 will
            # reconstruct the missing fragment on node 0.
            self._test_rebuild_scenario(failed, non_durable, 3)

    def test_rebuild_partner_down(self):
        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in self.onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(_get_partners(
            primary_node['index'], self.onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [n for n in self.onodes if n['id'] not in
                          (primary_node['id'], partner_node['id'])][0]
        # ... capture it's fragment etag
        failed_primary_meta, failed_primary_etag = self.direct_get(
            failed_primary, self.opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=self.opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        failed_primary_meta_new, failed_primary_etag_new = self.direct_get(
            failed_primary, self.opart)
        del failed_primary_meta['Date']
        del failed_primary_meta_new['Date']
        self.assertEqual(failed_primary_etag, failed_primary_etag_new)
        self.assertEqual(failed_primary_meta, failed_primary_meta_new)

        # just to be nice
        self.revive_drive(device_path)
Exemplo n.º 26
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def setUp(self):
        resetswift()
        self.pids = {}
        try:
            self.ipport2server = {}
            self.configs = defaultdict(dict)
            self.account_ring = get_ring(
                'account',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs)
            self.container_ring = get_ring(
                'container',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(
                self.policy.ring_name,
                self.obj_required_replicas,
                self.obj_required_devices,
                server='object',
                ipport2server=self.ipport2server,
                config_paths=self.configs)

            self.servers_per_port = any(
                int(readconf(c, section_name='object-replicator').get(
                    'servers_per_port', '0'))
                for c in self.configs['object-replicator'].values())

            Manager(['main']).start(wait=False)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server, self.pids)
            proxy_ipport = ('127.0.0.1', 8080)
            self.ipport2server[proxy_ipport] = 'proxy'
            self.url, self.token, self.account = check_server(
                proxy_ipport, self.ipport2server, self.pids)
            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        repl_server = '%s-replicator' % server_type
        conf = readconf(self.configs[repl_server][config_number],
                        section_name=repl_server)
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1['ip'] == node2['ip']

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1['port'] == node2['port']

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(device + "X", device)
        else:
            os.system('sudo mount %s' % device)

    def make_internal_client(self, object_post_as_copy=True):
        tempdir = mkdtemp()
        try:
            conf_path = os.path.join(tempdir, 'internal_client.conf')
            conf_body = """
            [DEFAULT]
            swift_dir = /etc/swift

            [pipeline:main]
            pipeline = catch_errors cache proxy-server

            [app:proxy-server]
            use = egg:swift#proxy
            object_post_as_copy = %s

            [filter:cache]
            use = egg:swift#memcache

            [filter:catch_errors]
            use = egg:swift#catch_errors
            """ % object_post_as_copy
            with open(conf_path, 'w') as f:
                f.write(dedent(conf_body))
            return internal_client.InternalClient(conf_path, 'test', 1)
        finally:
            shutil.rmtree(tempdir)
Exemplo n.º 27
0
class TestReconstructorRebuild(ECProbeTest):
    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url,
                                          self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2**10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(node,
                                                        part,
                                                        self.account,
                                                        self.container_name,
                                                        self.object_name,
                                                        headers=req_headers,
                                                        resp_chunk_size=64 *
                                                        2**20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)

        # remove data from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        shutil.rmtree(part_dir, True)

        # this node can't servce the data any more
        try:
            self.direct_get(node, part)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' % (node, ))

        # make sure we can still GET the object and its correct, the
        # proxy is doing decode on remaining fragments to get the obj
        self.assertEqual(etag, self.proxy_get())

        # fire up reconstructor
        self.reconstructor.once()

        # fragment is rebuilt exactly as it was before!
        self.assertEqual(fragment_archive_etag, self.direct_get(node, part))

        # check meta
        meta = client.head_object(self.url, self.token, self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertIn(key, meta)
            self.assertEqual(meta[key], headers_post[key])

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}

        etag = client.put_object(self.url,
                                 self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents,
                                 headers=headers)
        client.post_object(self.url,
                           self.token,
                           self.container_name,
                           self.object_name,
                           headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?

        # built up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]

        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' %
                        (self._format_node(onode),
                         [self._format_node(n) for n in node_list]))

    def test_rebuild_partner_down(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents)

        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(
            _get_partners(primary_node['index'], onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [
            n for n in onodes
            if n['id'] not in (primary_node['id'], partner_node['id'])
        ][0]
        # ... capture it's fragment etag
        failed_primary_etag = self.direct_get(failed_primary, opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        self.assertEqual(failed_primary_etag,
                         self.direct_get(failed_primary, opart))

        # just to be nice
        self.revive_drive(device_path)
class TestReconstructorPropDurable(ECProbeTest):

    def setUp(self):
        super(TestReconstructorPropDurable, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)

        # remove the .durable from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        for dirs, subdirs, files in os.walk(part_dir):
            for fname in files:
                if fname.endswith('.durable'):
                    durable = os.path.join(dirs, fname)
                    os.remove(durable)
                    break
        try:
            os.remove(os.path.join(part_dir, 'hashes.pkl'))
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

        # fire up reconstructor to propogate the .durable
        self.reconstructor.once()

        # fragment is still exactly as it was before!
        self.assertEqual(fragment_archive_etag,
                         self.direct_get(node, part))

        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}

        etag = client.put_object(self.url, self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?

        # built up a list of node lists to kill a .durable from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]

        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' % (
                            self._format_node(onode),
                            [self._format_node(n) for n in node_list]))
    def test_reconciler_move_object_twice(self):
        # select some policies
        old_policy = random.choice(ENABLED_POLICIES)
        new_policy = random.choice([p for p in ENABLED_POLICIES
                                    if p != old_policy])

        # setup a split brain
        self.brain.stop_handoff_half()
        # get old_policy on two primaries
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.start_handoff_half()
        self.brain.stop_primary_half()
        # force a recreate on handoffs
        self.brain.put_container(policy_index=int(old_policy))
        self.brain.delete_container()
        self.brain.put_container(policy_index=int(new_policy))
        self.brain.put_object()  # populate memcache with new_policy
        self.brain.start_primary_half()

        # at this point two primaries have old policy
        container_part, container_nodes = self.container_ring.get_nodes(
            self.account, self.container_name)
        head_responses = []
        for node in container_nodes:
            metadata = direct_client.direct_head_container(
                node, container_part, self.account, self.container_name)
            head_responses.append((node, metadata))
        old_container_node_ids = [
            node['id'] for node, metadata in head_responses
            if int(old_policy) ==
            int(metadata['X-Backend-Storage-Policy-Index'])]
        self.assertEqual(2, len(old_container_node_ids))

        # hopefully memcache still has the new policy cached
        self.brain.put_object(headers={'x-object-meta-test': 'custom-meta'},
                              contents='VERIFY')
        # double-check object correctly written to new policy
        conf_files = []
        for server in Manager(['container-reconciler']).servers:
            conf_files.extend(server.conf_files())
        conf_file = conf_files[0]
        int_client = InternalClient(conf_file, 'probe-test', 3)
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # shutdown the containers that know about the new policy
        self.brain.stop_handoff_half()

        # and get rows enqueued from old nodes
        for server_type in ('container-replicator', 'container-updater'):
            server = Manager([server_type])
            tuple(server.once(number=n + 1) for n in old_container_node_ids)

        # verify entry in the queue for the "misplaced" new_policy
        for container in int_client.iter_containers('.misplaced_objects'):
            for obj in int_client.iter_objects('.misplaced_objects',
                                               container['name']):
                expected = '%d:/%s/%s/%s' % (new_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # verify object in old_policy
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # verify object is *not* in new_policy
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})

        self.get_to_final_state()

        # verify entry in the queue
        for container in int_client.iter_containers('.misplaced_objects'):
            for obj in int_client.iter_objects('.misplaced_objects',
                                               container['name']):
                expected = '%d:/%s/%s/%s' % (old_policy, self.account,
                                             self.container_name,
                                             self.object_name)
                self.assertEqual(obj['name'], expected)

        Manager(['container-reconciler']).once()

        # and now it flops back
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            headers={'X-Backend-Storage-Policy-Index': int(new_policy)})
        int_client.get_object_metadata(
            self.account, self.container_name, self.object_name,
            acceptable_statuses=(4,),
            headers={'X-Backend-Storage-Policy-Index': int(old_policy)})

        # make sure the queue is settled
        self.get_to_final_state()
        for container in int_client.iter_containers('.misplaced_objects'):
            for obj in int_client.iter_objects('.misplaced_objects',
                                               container['name']):
                self.fail('Found unexpected object %r in the queue' % obj)

        # verify that the object data read by external client is correct
        headers, data = self._get_object_patiently(int(new_policy))
        self.assertEqual('VERIFY', data)
        self.assertEqual('custom-meta', headers['x-object-meta-test'])
Exemplo n.º 30
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def setUp(self):
        p = Popen("resetswift 2>&1", shell=True, stdout=PIPE)
        stdout, _stderr = p.communicate()
        print stdout
        Manager(['all']).stop()
        self.pids = {}
        try:
            self.account_ring = get_ring(
                'account',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices)
            self.container_ring = get_ring(
                'container',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(
                self.policy.ring_name,
                self.obj_required_replicas,
                self.obj_required_devices,
                server='object')
            Manager(['main']).start(wait=False)
            self.port2server = {}
            for server, port in [('account', 6002), ('container', 6001),
                                 ('object', 6000)]:
                for number in xrange(1, 9):
                    self.port2server[port + (number * 10)] = \
                        '%s%d' % (server, number)
            for port in self.port2server:
                check_server(port, self.port2server, self.pids)
            self.port2server[8080] = 'proxy'
            self.url, self.token, self.account = \
                check_server(8080, self.port2server, self.pids)
            self.configs = defaultdict(dict)
            for name in ('account', 'container', 'object'):
                for server_name in (name, '%s-replicator' % name):
                    for server in Manager([server_name]):
                        for i, conf in enumerate(server.conf_files(), 1):
                            self.configs[server.server][i] = conf
            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()
Exemplo n.º 31
0
class TestReconstructorRebuild(ECProbeTest):

    def _make_name(self, prefix):
        return '%s%s' % (prefix, uuid.uuid4())

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = self._make_name('container-')
        self.object_name = self._make_name('object-')
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object and POST some metadata
        self.proxy_put()
        self.headers_post = {
            self._make_name('x-object-meta-').decode('utf8'):
                self._make_name('meta-bar-').decode('utf8')}
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=dict(self.headers_post))

        self.opart, self.onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # stash frag etags and metadata for later comparison
        self.frag_headers, self.frag_etags = self._assert_all_nodes_have_frag()
        for node_index, hdrs in self.frag_headers.items():
            # sanity check
            self.assertIn(
                'X-Backend-Durable-Timestamp', hdrs,
                'Missing durable timestamp in %r' % self.frag_headers)

    def proxy_put(self, extra_headers=None):
        contents = Body()
        headers = {
            self._make_name('x-object-meta-').decode('utf8'):
                self._make_name('meta-foo-').decode('utf8'),
        }
        if extra_headers:
            headers.update(extra_headers)
        self.etag = client.put_object(self.url, self.token,
                                      self.container_name,
                                      self.object_name,
                                      contents=contents, headers=headers)

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return headers, resp_checksum.hexdigest()

    def direct_get(self, node, part, require_durable=True, extra_headers=None):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        if extra_headers:
            req_headers.update(extra_headers)
        if not require_durable:
            req_headers.update(
                {'X-Backend-Fragment-Preferences': json.dumps([])})
        # node dict has unicode values so utf8 decode our path parts too in
        # case they have non-ascii characters
        headers, data = direct_client.direct_get_object(
            node, part, self.account.decode('utf8'),
            self.container_name.decode('utf8'),
            self.object_name.decode('utf8'), headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return headers, hasher.hexdigest()

    def _break_nodes(self, failed, non_durable):
        # delete partitions on the failed nodes and remove durable marker from
        # non-durable nodes
        for i, node in enumerate(self.onodes):
            part_dir = self.storage_dir('object', node, part=self.opart)
            if i in failed:
                shutil.rmtree(part_dir, True)
                try:
                    self.direct_get(node, self.opart)
                except direct_client.DirectClientException as err:
                    self.assertEqual(err.http_status, 404)
            elif i in non_durable:
                for dirs, subdirs, files in os.walk(part_dir):
                    for fname in files:
                        if fname.endswith('.data'):
                            non_durable_fname = fname.replace('#d', '')
                            os.rename(os.path.join(dirs, fname),
                                      os.path.join(dirs, non_durable_fname))
                            break
                headers, etag = self.direct_get(node, self.opart,
                                                require_durable=False)
                self.assertNotIn('X-Backend-Durable-Timestamp', headers)
            try:
                os.remove(os.path.join(part_dir, 'hashes.pkl'))
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def _assert_all_nodes_have_frag(self, extra_headers=None):
        # check all frags are in place
        failures = []
        frag_etags = {}
        frag_headers = {}
        for node in self.onodes:
            try:
                headers, etag = self.direct_get(node, self.opart,
                                                extra_headers=extra_headers)
                frag_etags[node['index']] = etag
                del headers['Date']  # Date header will vary so remove it
                frag_headers[node['index']] = headers
            except direct_client.DirectClientException as err:
                failures.append((node, err))
        if failures:
            self.fail('\n'.join(['    Node %r raised %r' %
                                 (self._format_node(node), exc)
                                 for (node, exc) in failures]))
        return frag_headers, frag_etags

    @contextmanager
    def _annotate_failure_with_scenario(self, failed, non_durable):
        try:
            yield
        except (AssertionError, ClientException) as err:
            self.fail(
                'Scenario with failed nodes: %r, non-durable nodes: %r\n'
                ' failed with:\n%s' %
                ([self._format_node(self.onodes[n]) for n in failed],
                 [self._format_node(self.onodes[n]) for n in non_durable], err)
            )

    def _test_rebuild_scenario(self, failed, non_durable,
                               reconstructor_cycles):
        # helper method to test a scenario with some nodes missing their
        # fragment and some nodes having non-durable fragments
        with self._annotate_failure_with_scenario(failed, non_durable):
            self._break_nodes(failed, non_durable)

        # make sure we can still GET the object and it is correct; the
        # proxy is doing decode on remaining fragments to get the obj
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])

        # fire up reconstructor
        for i in range(reconstructor_cycles):
            self.reconstructor.once()

        # check GET via proxy returns expected data and metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            headers, etag = self.proxy_get()
            self.assertEqual(self.etag, etag)
            for key in self.headers_post:
                self.assertIn(key, headers)
                self.assertEqual(self.headers_post[key], headers[key])
        # check all frags are intact, durable and have expected metadata
        with self._annotate_failure_with_scenario(failed, non_durable):
            frag_headers, frag_etags = self._assert_all_nodes_have_frag()
            self.assertEqual(self.frag_etags, frag_etags)
            # self._frag_headers include X-Backend-Durable-Timestamp so this
            # assertion confirms that the rebuilt frags are all durable
            self.assertEqual(self.frag_headers, frag_headers)

    def test_rebuild_missing_frags(self):
        # build up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5),)
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for failed_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario(failed_nodes, [], 1)

    def test_rebuild_non_durable_frags(self):
        # build up a list of node lists to make non-durable,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        single_node = (random.randint(0, 5),)
        adj_nodes = (0, 5)
        far_nodes = (0, 4)

        for non_durable_nodes in [single_node, adj_nodes, far_nodes]:
            self._test_rebuild_scenario([], non_durable_nodes, 1)

    def test_rebuild_with_missing_frags_and_non_durable_frags(self):
        # pick some nodes with parts deleted, some with non-durable fragments
        scenarios = [
            # failed, non-durable
            ((0, 2), (4,)),
            ((0, 4), (2,)),
        ]
        for failed, non_durable in scenarios:
            self._test_rebuild_scenario(failed, non_durable, 3)
        scenarios = [
            # failed, non-durable
            ((0, 1), (2,)),
            ((0, 2), (1,)),
        ]
        for failed, non_durable in scenarios:
            # why 2 repeats? consider missing fragment on nodes 0, 1  and
            # missing durable on node 2: first reconstructor cycle on node 3
            # will make node 2 durable, first cycle on node 5 will rebuild on
            # node 0; second cycle on node 0 or 2 will rebuild on node 1. Note
            # that it is possible, that reconstructor processes on each node
            # run in order such that all rebuild complete in once cycle, but
            # that is not guaranteed, we allow 2 cycles to be sure.
            self._test_rebuild_scenario(failed, non_durable, 2)
        scenarios = [
            # failed, non-durable
            ((0, 2), (1, 3, 5)),
            ((0,), (1, 2, 4, 5)),
        ]
        for failed, non_durable in scenarios:
            # why 3 repeats? consider missing fragment on node 0 and single
            # durable on node 3: first reconstructor cycle on node 3 will make
            # nodes 2 and 4 durable, second cycle on nodes 2 and 4 will make
            # node 1 and 5 durable, third cycle on nodes 1 or 5 will
            # reconstruct the missing fragment on node 0.
            self._test_rebuild_scenario(failed, non_durable, 3)

    def test_rebuild_partner_down(self):
        # we have to pick a lower index because we have few handoffs
        nodes = self.onodes[:2]
        random.shuffle(nodes)  # left or right is fine
        primary_node, partner_node = nodes

        # capture fragment etag from partner
        failed_partner_meta, failed_partner_etag = self.direct_get(
            partner_node, self.opart)

        # and 507 the failed partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # a handoff will pickup the rebuild
        hnodes = list(self.object_ring.get_more_nodes(self.opart))
        for node in hnodes:
            try:
                found_meta, found_etag = self.direct_get(
                    node, self.opart)
            except DirectClientException as e:
                if e.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail('Unable to fetch rebuilt frag from handoffs %r '
                      'given primary nodes %r with %s unmounted '
                      'trying to rebuild from %s' % (
                          [h['device'] for h in hnodes],
                          [n['device'] for n in self.onodes],
                          partner_node['device'],
                          primary_node['device'],
                      ))
        self.assertEqual(failed_partner_etag, found_etag)
        del failed_partner_meta['Date']
        del found_meta['Date']
        self.assertEqual(failed_partner_meta, found_meta)

        # just to be nice
        self.revive_drive(device_path)

    def test_sync_expired_object(self):
        # verify that missing frag can be rebuilt for an expired object
        delete_after = 2
        self.proxy_put(extra_headers={'x-delete-after': delete_after})
        self.proxy_get()  # sanity check
        orig_frag_headers, orig_frag_etags = self._assert_all_nodes_have_frag(
            extra_headers={'X-Backend-Replication': 'True'})

        # wait for object to expire
        timeout = time.time() + delete_after + 1
        while time.time() < timeout:
            try:
                self.proxy_get()
            except ClientException as e:
                if e.http_status == 404:
                    break
                else:
                    raise
        else:
            self.fail('Timed out waiting for %s/%s to expire after %ss' % (
                self.container_name, self.object_name, delete_after))

        # sanity check - X-Backend-Replication let's us get expired frag...
        fail_node = random.choice(self.onodes)
        self.direct_get(fail_node, self.opart,
                        extra_headers={'X-Backend-Replication': 'True'})
        # ...until we remove the frag from fail_node
        self._break_nodes([self.onodes.index(fail_node)], [])
        # ...now it's really gone
        with self.assertRaises(DirectClientException) as cm:
            self.direct_get(fail_node, self.opart,
                            extra_headers={'X-Backend-Replication': 'True'})
        self.assertEqual(404, cm.exception.http_status)
        self.assertNotIn('X-Backend-Timestamp', cm.exception.http_headers)

        # run the reconstructor
        self.reconstructor.once()

        # the missing frag is now in place but expired
        with self.assertRaises(DirectClientException) as cm:
            self.direct_get(fail_node, self.opart)
        self.assertEqual(404, cm.exception.http_status)
        self.assertIn('X-Backend-Timestamp', cm.exception.http_headers)

        # check all frags are intact, durable and have expected metadata
        frag_headers, frag_etags = self._assert_all_nodes_have_frag(
            extra_headers={'X-Backend-Replication': 'True'})
        self.assertEqual(orig_frag_etags, frag_etags)
        self.maxDiff = None
        self.assertEqual(orig_frag_headers, frag_headers)

    def test_sync_unexpired_object_metadata(self):
        # verify that metadata can be sync'd to a frag that has missed a POST
        # and consequently that frag appears to be expired, when in fact the
        # POST removed the x-delete-at header
        client.put_container(self.url, self.token, self.container_name,
                             headers={'x-storage-policy': self.policy.name})
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        delete_at = int(time.time() + 3)
        contents = 'body-%s' % uuid.uuid4()
        headers = {'x-delete-at': delete_at}
        client.put_object(self.url, self.token, self.container_name,
                          self.object_name, headers=headers, contents=contents)
        # fail a primary
        post_fail_node = random.choice(onodes)
        post_fail_path = self.device_dir('object', post_fail_node)
        self.kill_drive(post_fail_path)
        # post over w/o x-delete-at
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, {'content-type': 'something-new'})
        # revive failed primary
        self.revive_drive(post_fail_path)
        # wait for the delete_at to pass, and check that it thinks the object
        # is expired
        timeout = time.time() + 5
        while time.time() < timeout:
            try:
                direct_client.direct_head_object(
                    post_fail_node, opart, self.account, self.container_name,
                    self.object_name, headers={
                        'X-Backend-Storage-Policy-Index': int(self.policy)})
            except direct_client.ClientException as err:
                if err.http_status != 404:
                    raise
                break
            else:
                time.sleep(0.1)
        else:
            self.fail('Failed to get a 404 from node with expired object')
        self.assertEqual(err.http_status, 404)
        self.assertIn('X-Backend-Timestamp', err.http_headers)

        # but from the proxy we've got the whole story
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name)
        self.assertNotIn('X-Delete-At', headers)
        self.reconstructor.once()

        # ... and all the nodes have the final unexpired state
        for node in onodes:
            headers = direct_client.direct_head_object(
                node, opart, self.account, self.container_name,
                self.object_name, headers={
                    'X-Backend-Storage-Policy-Index': int(self.policy)})
            self.assertNotIn('X-Delete-At', headers)
Exemplo n.º 32
0
class TestReconstructorRebuild(ECProbeTest):

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)

        # remove data from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        shutil.rmtree(part_dir, True)

        # this node can't servce the data any more
        try:
            self.direct_get(node, part)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (node,))

        # make sure we can still GET the object and its correct, the
        # proxy is doing decode on remaining fragments to get the obj
        self.assertEqual(etag, self.proxy_get())

        # fire up reconstructor
        self.reconstructor.once()

        # fragment is rebuilt exactly as it was before!
        self.assertEqual(fragment_archive_etag,
                         self.direct_get(node, part))

        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}

        etag = client.put_object(self.url, self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?

        # built up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]

        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' % (
                            self._format_node(onode),
                            [self._format_node(n) for n in node_list]))
Exemplo n.º 33
0
class ProbeTest(unittest.TestCase):
    """
    Don't instantiate this directly, use a child class instead.
    """

    def setUp(self):
        resetswift()
        try:
            self.ipport2server = {}
            self.configs = defaultdict(dict)
            self.account_ring = get_ring(
                'account',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs)
            self.container_ring = get_ring(
                'container',
                self.acct_cont_required_replicas,
                self.acct_cont_required_devices,
                ipport2server=self.ipport2server,
                config_paths=self.configs)
            self.policy = get_policy(**self.policy_requirements)
            self.object_ring = get_ring(
                self.policy.ring_name,
                self.obj_required_replicas,
                self.obj_required_devices,
                server='object',
                ipport2server=self.ipport2server,
                config_paths=self.configs)

            self.servers_per_port = any(
                int(readconf(c, section_name='object-replicator').get(
                    'servers_per_port', '0'))
                for c in self.configs['object-replicator'].values())

            Manager(['main']).start(wait=True)
            for ipport in self.ipport2server:
                check_server(ipport, self.ipport2server)
            proxy_ipport = ('127.0.0.1', 8080)
            self.ipport2server[proxy_ipport] = 'proxy'
            self.url, self.token, self.account = check_server(
                proxy_ipport, self.ipport2server)
            self.account_1 = {
                'url': self.url, 'token': self.token, 'account': self.account}

            rv = _retry_timeout(_check_proxy, args=(
                proxy_ipport, 'test2:tester2', 'testing2'))
            self.account_2 = {
                k: v for (k, v) in zip(('url', 'token', 'account'), rv)}

            self.replicators = Manager(
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
        except BaseException:
            try:
                raise
            finally:
                try:
                    Manager(['all']).kill()
                except Exception:
                    pass

    def tearDown(self):
        Manager(['all']).kill()

    def device_dir(self, server, node):
        server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        repl_server = '%s-replicator' % server_type
        conf = readconf(self.configs[repl_server][config_number],
                        section_name=repl_server)
        return os.path.join(conf['devices'], node['device'])

    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)

    def config_number(self, node):
        _server_type, config_number = get_server_number(
            (node['ip'], node['port']), self.ipport2server)
        return config_number

    def is_local_to(self, node1, node2):
        """
        Return True if both ring devices are "local" to each other (on the same
        "server".
        """
        if self.servers_per_port:
            return node1['ip'] == node2['ip']

        # Without a disambiguating IP, for SAIOs, we have to assume ports
        # uniquely identify "servers".  SAIOs should be configured to *either*
        # have unique IPs per node (e.g. 127.0.0.1, 127.0.0.2, etc.) OR unique
        # ports per server (i.e. sdb1 & sdb5 would have same port numbers in
        # the 8-disk EC ring).
        return node1['port'] == node2['port']

    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
        self.replicators.stop()
        self.updaters.stop()

        self.replicators.once()
        self.updaters.once()
        self.replicators.once()

    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")

    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(disabled_name, device)
        else:
            os.system('sudo mount %s' % device)

    def make_internal_client(self):
        tempdir = mkdtemp()
        try:
            conf_path = os.path.join(tempdir, 'internal_client.conf')
            conf_body = """
            [DEFAULT]
            swift_dir = /etc/swift

            [pipeline:main]
            pipeline = catch_errors cache copy proxy-server

            [app:proxy-server]
            use = egg:swift#proxy

            [filter:copy]
            use = egg:swift#copy

            [filter:cache]
            use = egg:swift#memcache

            [filter:catch_errors]
            use = egg:swift#catch_errors
            """
            with open(conf_path, 'w') as f:
                f.write(dedent(conf_body))
            return internal_client.InternalClient(conf_path, 'test', 1)
        finally:
            shutil.rmtree(tempdir)
Exemplo n.º 34
0
class TestReconstructorRebuild(ECProbeTest):

    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])

    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()

    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()

    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)

        # remove data from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        shutil.rmtree(part_dir, True)

        # this node can't servce the data any more
        try:
            self.direct_get(node, part)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (node,))

        # make sure we can still GET the object and its correct, the
        # proxy is doing decode on remaining fragments to get the obj
        self.assertEqual(etag, self.proxy_get())

        # fire up reconstructor
        self.reconstructor.once()

        # fragment is rebuilt exactly as it was before!
        self.assertEqual(fragment_archive_etag,
                         self.direct_get(node, part))

        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])

    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])

    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}

        etag = client.put_object(self.url, self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?

        # built up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]

        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' % (
                            self._format_node(onode),
                            [self._format_node(n) for n in node_list]))

    def test_rebuild_partner_down(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        client.put_object(self.url, self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents)

        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(_get_partners(
            primary_node['index'], onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [n for n in onodes if n['id'] not in
                          (primary_node['id'], partner_node['id'])][0]
        # ... capture it's fragment etag
        failed_primary_etag = self.direct_get(failed_primary, opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        self.assertEqual(failed_primary_etag,
                         self.direct_get(failed_primary, opart))

        # just to be nice
        self.revive_drive(device_path)