class TestObjectStorageApiPerformance(BaseTestCase):
    def setUp(self):
        super(TestObjectStorageApiPerformance, self).setUp()
        self.api = ObjectStorageApi(self.ns,
                                    endpoint=self.uri,
                                    source_address=('127.0.0.8', 0))
        self.created = list()
        self.containers = set()

    def tearDown(self):
        super(TestObjectStorageApiPerformance, self).tearDown()
        for ct, name in self.created:
            try:
                self.api.object_delete(self.account, ct, name)
                self.containers.add(ct)
            except Exception:
                logging.exception("Failed to delete %s/%s/%s//%s", self.ns,
                                  self.account, ct, name)
        for ct in self.containers:
            try:
                self.api.container_delete(self.account, ct)
            except Exception:
                logging.exception('Failed to delete %s/%s/%s', self.ns,
                                  self.account, ct)

    def test_object_create_32_md5_checksum(self):
        container = self.__class__.__name__ + random_str(8)
        for i in range(32):
            obj = "obj-%03d" % i
            self.api.object_create(self.account,
                                   container,
                                   obj_name=obj,
                                   data=obj,
                                   chunk_checksum_algo='md5')
            self.created.append((container, obj))

    def test_object_create_32_no_checksum(self):
        container = self.__class__.__name__ + random_str(8)
        for i in range(32):
            obj = "obj-%03d" % i
            self.api.object_create(self.account,
                                   container,
                                   obj_name=obj,
                                   data=obj,
                                   chunk_checksum_algo=None)
            self.created.append((container, obj))

    def test_object_list_empty_container(self):
        """
        Ensure object listing of an empty container takes less than 35ms.
        """
        container = self.__class__.__name__ + random_str(8)
        self.api.container_create(self.account, container)
        self.containers.add(container)
        for _ in range(8):
            start = monotonic_time()
            self.api.object_list(self.account, container)
            duration = monotonic_time() - start
            logging.info("Object list took %.6fs", duration)
            self.assertLess(duration, 0.035)
Exemplo n.º 2
0
def worker_objects():
    proxy = ObjectStorageApi(NS)
    while True:
        try:
            name = QUEUE.get(timeout=TIMEOUT)
        except eventlet.queue.Empty:
            if VERBOSE:
                print("Leaving worker")
            break

        while True:
            try:
                items = proxy.object_list(ACCOUNT, name)
                objs = [_item['name'] for _item in items['objects']]
                size = sum([_item['size'] for _item in items['objects']])
                if len(objs) == 0:
                    break
                if VERBOSE:
                    print("Deleting", len(objs), "objects")
                proxy.object_delete_many(ACCOUNT, name, objs=objs)
                COUNTERS.add(len(objs), size)
                break
            except Exception as ex:
                if "Election failed" in str(ex):
                    # wait default Election wait delay
                    ELECTIONS.add(1, 0)
                    time.sleep(20)
                    continue
                print("Objs %s: %s" % (name, str(ex)), file=sys.stderr)
                break

        QUEUE.task_done()
class TestContentVersioning(BaseTestCase):

    def setUp(self):
        super(TestContentVersioning, self).setUp()
        self.api = ObjectStorageApi(self.conf['namespace'])
        self.container = random_str(8)
        system = {'sys.m2.policy.version': '3'}
        self.api.container_create(self.account, self.container, system=system)

    def test_versioning_enabled(self):
        props = self.api.container_get_properties(
            self.account, self.container)
        self.assertEqual('3', props['system']['sys.m2.policy.version'])

    def test_list_versions(self):
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content0")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content1")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(2, len(objects))
        self.assertNotEqual(objects[0]['version'], objects[1]['version'])

    def test_purge(self):
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content0")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content1")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content2")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content3")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(4, len(objects))
        oldest_version = min(objects, lambda x: x['version'])

        self.api.container.container_purge(self.account, self.container)
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(3, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])
Exemplo n.º 4
0
def main():
    args = options()

    global ACCOUNT, PROXY
    ACCOUNT = args.account
    PROXY = ObjectStorageApi("OPENIO")

    args.path = args.path.rstrip('/')
    if '/' in args.path:
        bucket, path = args.path.split('/', 1)
    else:
        bucket = args.path
        path = ""

    containers = []

    _bucket = container_hierarchy(bucket, path)
    # we don't use placeholders, we use prefix path as prefix
    for entry in full_list(prefix=container_hierarchy(bucket, path)):
        name, _files, _size, _ = entry
        if name != _bucket and not name.startswith(_bucket + '%2F'):
            continue

        if _files:
            items = PROXY.object_list(ACCOUNT, name)
            objs = [_item['name'] for _item in items['objects']]
            PROXY.object_delete_many(ACCOUNT, name, objs=objs)
            print("Deleting", len(objs), "objects")

        containers.append(name)

    print("We have to delete", len(containers), "containers")

    for container in containers:
        print("Deleting", container)
        PROXY.container_delete(ACCOUNT, container)
Exemplo n.º 5
0
class ContainerBackup(RedisConnection, WerkzeugApp):
    """WSGI Application to dump or restore a container."""

    REDIS_TIMEOUT = 3600 * 24  # Redis keys will expire after one day
    STREAMING = 52428800  # 50 MB

    # Number of blocks to serve to avoid splitting headers (1MiB)
    BLOCK_ALIGNMENT = 2048

    def __init__(self, conf):
        if conf:
            self.conf = read_conf(conf['key_file'],
                                  section_name="admin-server")
        else:
            self.conf = {}
        self.logger = get_logger(self.conf, name="ContainerBackup")

        self.proxy = ObjectStorageApi(self.conf.get("namespace", NS),
                                      logger=self.logger)
        self.url_map = Map([
            Rule('/v1.0/container/dump', endpoint='dump'),
            Rule('/v1.0/container/restore', endpoint='restore'),
        ])
        self.REDIS_TIMEOUT = self.conf.get("redis_cache_timeout",
                                           self.REDIS_TIMEOUT)

        redis_conf = {
            k[6:]: v
            for k, v in self.conf.items() if k.startswith("redis_")
        }
        redis_host = redis_conf.pop('host', None)
        if redis_host:
            parsed = urlparse('http://' + redis_host)
            if parsed.port is None:
                redis_host = '%s:%s' % (redis_host,
                                        redis_conf.pop('port', '6379'))
        redis_sentinel_hosts = redis_conf.pop(
            'sentinel_hosts',
            # TODO(adu): Delete when it will no longer be used
            self.conf.get('sentinel_hosts'))
        redis_sentinel_name = redis_conf.pop(
            'sentinel_name',
            # TODO(adu): Delete when it will no longer be used
            self.conf.get('sentinel_master_name'))
        RedisConnection.__init__(self,
                                 host=redis_host,
                                 sentinel_hosts=redis_sentinel_hosts,
                                 sentinel_name=redis_sentinel_name,
                                 **redis_conf)
        WerkzeugApp.__init__(self, self.url_map, self.logger)

    @property
    def redis(self):
        """Redis connection object"""
        return self.conn

    @redis_cnx
    def generate_manifest(self, account, container):
        """
        Generate a static manifest of a container.
        It will help to find quickly which part of object app have to serve
        Manifest is cached into Redis with REDIS_TIMEOUT delay
        """
        if not container:
            raise exc.NoSuchContainer()

        # TODO hash_map should contains if deleted or version flags are set
        hash_map = "container_streaming:{0}/{1}".format(account, container)
        cache = self.redis.get(hash_map)
        if cache:
            self.logger.debug("using cache")
            return json.loads(cache, object_pairs_hook=OrderedDict)

        map_objs = []
        start_block = 0

        meta = self.proxy.container_get_properties(account, container)
        if meta['properties']:
            # create special file to save properties of container
            tar = OioTarEntry(self.proxy,
                              account,
                              container,
                              CONTAINER_PROPERTIES,
                              data=meta)
            entry = {
                'name': CONTAINER_PROPERTIES,
                'size': tar.filesize,
                'hdr_blocks': tar.header_blocks,
                'blocks': tar.header_blocks + tar.data_blocks,
                'start_block': start_block,
            }
            start_block += entry['blocks']
            entry['end_block'] = start_block - 1
            map_objs.append(entry)

        objs = self.proxy.object_list(account, container)
        for obj in sorted(objs['objects'], key=lambda x: x['name']):
            # FIXME: should we backup deleted objects?
            if obj['deleted']:
                continue
            tar = OioTarEntry(self.proxy, account, container, obj['name'])
            if (start_block / self.BLOCK_ALIGNMENT) != \
                    ((start_block + tar.header_blocks) / self.BLOCK_ALIGNMENT):
                # header is over boundary, we have to add padding blocks
                padding = (self.BLOCK_ALIGNMENT -
                           divmod(start_block, self.BLOCK_ALIGNMENT)[1])
                map_objs.append({
                    'blocks': padding,
                    'size': padding * BLOCKSIZE,
                    'start_block': start_block,
                    'slo': None,
                    'hdr_blocks': padding,
                    'end_block': start_block + padding - 1
                })
                start_block += padding
            entry = {
                'name': obj['name'],
                'size': tar.filesize,
                'hdr_blocks': tar.header_blocks,
                'blocks': tar.header_blocks + tar.data_blocks,
                'start_block': start_block,
                'slo': tar.slo,
                'checksums': tar.checksums,
            }
            start_block += entry['blocks']
            entry['end_block'] = start_block - 1
            map_objs.append(entry)

        if not map_objs:
            return map_objs

        entry = {
            'name': CONTAINER_MANIFEST,
            'size': 0,
            'hdr_blocks': 1,  # a simple PAX header consume only 1 block
            'blocks': 0,
            'start_block': 0,
            'slo': None,
        }
        map_objs.insert(0, entry)

        entry['size'] = len(json.dumps(map_objs, sort_keys=True))
        # ensure that we reserved enough blocks after recomputing offset
        entry['blocks'] = \
            1 + int(math.ceil(entry['size'] / float(BLOCKSIZE))) * 2

        tar = OioTarEntry(self.proxy,
                          account,
                          container,
                          CONTAINER_MANIFEST,
                          data=map_objs)

        assert tar.header_blocks == 1, "Incorrect size for hdr_blocks"
        assert tar.data_blocks <= entry['blocks']

        # fix start_block and end_block
        start = 0
        for _entry in map_objs:
            _entry['start_block'] = start
            start += _entry['blocks']
            _entry['end_block'] = start - 1

        tar2 = OioTarEntry(self.proxy,
                           account,
                           container,
                           CONTAINER_MANIFEST,
                           data=map_objs)
        entry['size'] = tar2.filesize

        assert tar2.header_blocks == tar.header_blocks
        assert tar2.data_blocks <= entry['blocks'], \
            "got %d instead of %d" % (tar2.data_blocks, tar.data_blocks)

        self.logger.debug("add entry to cache")
        self.redis.set(hash_map,
                       json.dumps(map_objs, sort_keys=True),
                       ex=self.REDIS_TIMEOUT)
        return map_objs

    def _do_head(self, _, account, container):
        """
        Manage HEAD method and response number of block
        Note: Range header is unmanaged
        """
        try:
            results = self.generate_manifest(account, container)
        except exc.NoSuchContainer:
            self.logger.info("%s %s not found", account, container)
            return Response(status=404)

        if not results:
            self.logger.info("no data for %s %s", account, container)
            return Response(status=204)

        hdrs = {
            'X-Blocks': sum([i['blocks'] for i in results]),
            'Content-Length': sum([i['blocks'] for i in results]) * BLOCKSIZE,
            'Accept-Ranges': 'bytes',
            'Content-Type': 'application/tar',
        }
        return Response(headers=hdrs, status=200)

    @classmethod
    def _extract_range(cls, req, blocks):
        """Convert byte range into block an performs validity check"""
        # accept only single part range
        val = req.headers['Range']
        match = RANGE_RE.match(val)
        if match is None:
            raise RequestedRangeNotSatisfiable()
        start = int(match.group(1))
        end = int(match.group(2))
        if start >= end:
            raise RequestedRangeNotSatisfiable()

        def check_range(value):
            block, remainder = divmod(value, BLOCKSIZE)
            if remainder or block < 0 or (blocks and block > blocks):
                raise RequestedRangeNotSatisfiable()
            return block

        block_start = check_range(start)
        block_end = check_range(end + 1)  # Check Range RFC

        return start, end, block_start, block_end

    def _do_get(self, req, account, container):
        """Manage GET method to dump a container"""
        try:
            results = self.generate_manifest(account, container)
        except exc.NoSuchContainer:
            self.logger.info("%s %s not found", account, container)
            return Response(status=404)

        if not results:
            self.logger.info("no data for %s %s", account, container)
            return Response(status=204)

        blocks = sum([i['blocks'] for i in results])
        length = blocks * BLOCKSIZE

        if 'Range' not in req.headers:
            tar = ContainerTarFile(self.proxy, account, container,
                                   (0, blocks - 1), results, self.logger)
            return Response(wrap_file(req.environ,
                                      tar,
                                      buffer_size=self.STREAMING),
                            headers={
                                'Accept-Ranges': 'bytes',
                                'Content-Type': 'application/tar',
                                'Content-Length': length,
                            },
                            status=200)

        start, end, block_start, block_end = self._extract_range(req, blocks)

        tar = ContainerTarFile(self.proxy, account, container,
                               (block_start, block_end - 1), results,
                               self.logger)
        return Response(wrap_file(req.environ, tar,
                                  buffer_size=self.STREAMING),
                        headers={
                            'Accept-Ranges':
                            'bytes',
                            'Content-Type':
                            'application/tar',
                            'Content-Range':
                            'bytes %d-%d/%d' % (start, end, length),
                            'Content-Length':
                            end - start + 1,
                        },
                        status=206)

    def on_dump(self, req):
        """Entry point for dump rule"""
        # extract account and container
        account = req.args.get('acct')
        container = req.args.get('ref')

        if not account:
            raise BadRequest('Missing Account name')
        if not container:
            raise BadRequest('Missing Container name')

        if req.method == 'HEAD':
            return self._do_head(req, account, container)

        if req.method == 'GET':
            return self._do_get(req, account, container)

        return Response("Not supported", 405)

    @redis_cnx
    def _do_put_head(self, req, account, container):
        results = self.redis.get("restore:%s:%s" % (account, container))
        if not results:
            return UnprocessableEntity("No restoration in progress")
        in_progress = self.redis.get('restore:%s:%s:lock' %
                                     (account, container)) or '0'
        results = json.loads(results)
        blocks = sum(i['blocks'] for i in results['manifest'])
        return Response(headers={
            'X-Tar-Size': blocks * BLOCKSIZE,
            'X-Consumed-Size': results['end'] * BLOCKSIZE,
            'X-Upload-In-Progress': in_progress
        },
                        status=200)

    @redis_cnx
    def _do_put(self, req, account, container):
        """Manage PUT method for restoring a container"""
        obj = ContainerRestore(self.redis, self.proxy, self.logger)
        key = "restore:%s:%s:lock" % (account, container)
        if not self.redis.set(key, 1, nx=True):
            raise UnprocessableEntity("A restore is already in progress")

        try:
            return obj.restore(req, account, container)
        finally:
            self.redis.delete(key)

    def on_restore(self, req):
        """Entry point for restore rule"""
        account = req.args.get('acct')
        container = req.args.get('ref')

        if not account:
            raise BadRequest('Missing Account name')
        if not container:
            raise BadRequest('Missing Container name')

        if req.method not in ('PUT', 'HEAD'):
            return Response("Not supported", 405)

        try:
            self.proxy.container_get_properties(account, container)
            if not req.headers.get('range') and req.method == 'PUT':
                raise Conflict('Container already exists')
        except exc.NoSuchContainer:
            pass
        except Conflict:
            raise
        except Exception:
            raise BadRequest('Fail to verify container')

        if req.method == 'HEAD':
            return self._do_put_head(req, account, container)

        return self._do_put(req, account, container)
Exemplo n.º 6
0
class TestContentVersioning(BaseTestCase):

    def setUp(self):
        super(TestContentVersioning, self).setUp()
        self.api = ObjectStorageApi(self.conf['namespace'])
        self.container = random_str(8)
        system = {'sys.m2.policy.version': '3'}
        self.api.container_create(self.account, self.container, system=system)

    def test_versioning_enabled(self):
        props = self.api.container_get_properties(
            self.account, self.container)
        self.assertEqual('3', props['system']['sys.m2.policy.version'])

    def test_list_versions(self):
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content0")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content1")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(2, len(objects))
        self.assertNotEqual(objects[0]['version'], objects[1]['version'])

    def test_container_purge(self):
        # many contents
        for i in range(0, 4):
            self.api.object_create(self.account, self.container,
                                   obj_name="versioned", data="content")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(4, len(objects))
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the container configuration
        self.api.container_purge(self.account, self.container)
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(3, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the request
        self.api.container_purge(self.account, self.container, maxvers=1)
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(1, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])

    def test_content_purge(self):
        # many contents
        for i in range(0, 4):
            self.api.object_create(self.account, self.container,
                                   obj_name="versioned", data="content")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(4, len(objects))
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the container configuration
        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(3, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the request
        self.api.container.content_purge(self.account, self.container,
                                         "versioned", maxvers=1)
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(1, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])

        # other contents
        for i in range(0, 4):
            self.api.object_create(self.account, self.container,
                                   obj_name="versioned2",
                                   data="content"+str(i))
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(5, len(objects))

        # use the maxvers of the container configuration
        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        listing = self.api.object_list(self.account, self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(5, len(objects))

    def test_delete_exceeding_version(self):
        def check_num_objects_and_get_oldest_version(expected):
            listing = self.api.object_list(self.account, self.container,
                                           versions=True)
            objects = listing['objects']
            self.assertEqual(expected, len(objects))
            return min(objects, key=lambda x: x['version'])

        system = {'sys.m2.policy.version.delete_exceeding': '1'}
        self.api.container_set_properties(self.account, self.container,
                                          system=system)
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content0")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content1")
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content2")
        oldest_version = check_num_objects_and_get_oldest_version(3)

        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content3")
        new_oldest_version = check_num_objects_and_get_oldest_version(3)
        self.assertLess(oldest_version['version'],
                        new_oldest_version['version'])

    def test_change_flag_delete_exceeding_versions(self):
        def check_num_objects(expected):
            listing = self.api.object_list(self.account, self.container,
                                           versions=True)
            objects = listing['objects']
            self.assertEqual(expected, len(objects))

        for i in range(5):
            self.api.object_create(self.account, self.container,
                                   obj_name="versioned", data="content"+str(i))
        check_num_objects(5)

        system = {'sys.m2.policy.version.delete_exceeding': '1'}
        self.api.container_set_properties(self.account, self.container,
                                          system=system)
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content5")
        check_num_objects(3)
        for i in range(6, 10):
            self.api.object_create(self.account, self.container,
                                   obj_name="versioned", data="content"+str(i))
        check_num_objects(3)

        system['sys.m2.policy.version.delete_exceeding'] = '0'
        self.api.container_set_properties(self.account, self.container,
                                          system=system)
        self.api.object_create(self.account, self.container,
                               obj_name="versioned", data="content11")
        check_num_objects(4)
Exemplo n.º 7
0
class Checker(object):
    def __init__(self,
                 namespace,
                 concurrency=50,
                 error_file=None,
                 rebuild_file=None,
                 check_xattr=True,
                 limit_listings=0,
                 request_attempts=1,
                 logger=None,
                 verbose=False,
                 check_hash=False,
                 min_time_in_error=0.0,
                 required_confirmations=0,
                 beanstalkd_addr=None,
                 beanstalkd_tube=BlobRebuilder.DEFAULT_BEANSTALKD_WORKER_TUBE,
                 cache_size=2**24,
                 **_kwargs):
        self.pool = GreenPool(concurrency)
        self.error_file = error_file
        self.error_sender = None
        self.check_xattr = bool(check_xattr)
        self.check_hash = bool(check_hash)
        self.logger = logger or get_logger(
            {'namespace': namespace}, name='integrity', verbose=verbose)
        # Optimisation for when we are only checking one object
        # or one container.
        # 0 -> do not limit
        # 1 -> limit account listings (list of containers)
        # 2 -> limit container listings (list of objects)
        self.limit_listings = limit_listings
        if self.error_file:
            outfile = open(self.error_file, 'a')
            self.error_writer = csv.writer(outfile, delimiter=' ')

        self.rebuild_file = rebuild_file
        if self.rebuild_file:
            self.fd = open(self.rebuild_file, 'a')
            self.rebuild_writer = csv.writer(self.fd, delimiter='|')

        if beanstalkd_addr:
            self.error_sender = BeanstalkdSender(beanstalkd_addr,
                                                 beanstalkd_tube, self.logger)

        self.api = ObjectStorageApi(namespace,
                                    logger=self.logger,
                                    max_retries=request_attempts - 1,
                                    request_attempts=request_attempts)
        self.rdir_client = RdirClient({"namespace": namespace},
                                      logger=self.logger)

        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

        self.list_cache = CacheDict(cache_size)
        self.running_tasks = {}
        self.running_lock = Semaphore(1)
        self.result_queue = LightQueue(concurrency)

        self.running = True
        self.run_time = 0

        # Set of targets which must be checked again, to confirm
        # or deny the issues reported by previous passes.
        self.delayed_targets = dict()
        # Minimum time in error and number of confirmations of the error
        # before triggering a reconstruction action.
        self.min_time_in_error = min_time_in_error
        self.required_confirmations = required_confirmations

    def reset_stats(self):
        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

    def _spawn(self, func, target, *args, **kwargs):
        """
        Spawn a task on the internal GreenPool.
        Discards the task if the pool is no more running.
        """
        if self.running:
            return self.pool.spawn(func, target, *args, **kwargs)
        self.logger.info("Discarding %s", target)
        return None

    def _spawn_n(self, func, target, *args, **kwargs):
        """
        Spawn a task on the internal GreenPool, do not wait for the result.
        Discards the task if the pool is no more running.
        """
        if self.running:
            return self.pool.spawn_n(func, target, *args, **kwargs)
        self.logger.info("Discarding %s", target)
        return None

    def complete_target_from_chunk_metadata(self, target, xattr_meta):
        """
        Complete a Target object from metadata found in chunk's extended
        attributes. In case the "fullpath" is not available, try to read
        legacy metadata, and maybe ask meta1 to resolve the CID into
        account and container names.
        """
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            acct, ct, path, vers, content_id = \
                decode_fullpath(xattr_meta['full_path'])
            target.account = acct
            target.container = ct
            target.obj = path
            target.content_id = content_id
            target.version = vers
        except KeyError:
            # No fullpath header, try legacy headers
            if 'content_path' in xattr_meta:
                target.obj = xattr_meta['content_path']
            if 'content_id' in xattr_meta:
                target.content_id = xattr_meta['content_id']
            if 'content_version' in xattr_meta:
                target.version = xattr_meta['content_version']
            cid = xattr_meta.get('container_id')
            if cid:
                try:
                    md = self.api.directory.show(cid=cid)
                    acct = md.get('account')
                    ct = md.get('name')
                    if acct:
                        target.account = acct
                    if ct:
                        target.container = ct
                except Exception as err:
                    self.logger.warn(
                        "Failed to resolve CID %s into account "
                        "and container names: %s", cid, err)

    def recover_and_complete_object_meta(self, target, chunk):
        _, rawx_service, chunk_id = chunk.rsplit('/', 2)
        # 1. Fetch chunk list from rdir (could be cached).
        # Unfortunately we cannot seek for a chunk ID.
        entries = [
            x for x in self.rdir_client.chunk_fetch(rawx_service, limit=-1)
            if x[2] == chunk_id
        ]
        if not entries:
            self.logger.warn('Chunk %s not found in rdir' % chunk_id)
            return
        elif len(entries) > 1:
            self.logger.info('Chunk %s appears in %d objects', chunk_id,
                             len(entries))
        # 2. Find content and container IDs
        target.cid, target.content_id = entries[0][0:2]
        meta = self.api.object_get_properties(None,
                                              None,
                                              None,
                                              cid=target.cid,
                                              content=target.content_id)
        target.obj = meta['name']
        target.version = meta['version']
        target.account, target.container = self.api.resolve_cid(target.cid)

    def send_result(self, target, errors=None, irreparable=False):
        """
        Put an item in the result queue.
        """
        # TODO(FVE): send to an external queue.
        target.append_result(ItemResult(errors, irreparable))
        self.result_queue.put(target)

    def send_chunk_job(self, target, irreparable=False):
        """
        Send a "content broken" event, to trigger the
        reconstruction of the chunk.
        """
        item = (self.api.namespace, target.cid, target.content_id,
                target.chunk)
        ev_dict = BlobRebuilder.task_event_from_item(item)
        if irreparable:
            ev_dict['data']['irreparable'] = irreparable
        job = json.dumps(ev_dict)
        self.error_sender.send_job(job)
        self.error_sender.job_done()  # Don't expect any response

    def write_error(self, target, irreparable=False):
        if not self.error_file:
            return
        error = list()
        if irreparable:
            error.append(IRREPARABLE_PREFIX)
        error.append(target.account)
        if target.container:
            error.append(target.container)
        if target.obj:
            error.append(target.obj)
        if target.chunk:
            error.append(target.chunk)
        self.error_writer.writerow(error)

    def write_rebuilder_input(self, target, irreparable=False):
        error = list()
        if irreparable:
            error.append(IRREPARABLE_PREFIX)
        error.append(target.cid)
        # FIXME(FVE): ensure we always resolve content_id,
        # or pass object version along with object name.
        error.append(target.content_id or target.obj)
        error.append(target.chunk)
        self.rebuild_writer.writerow(error)

    def write_chunk_error(self, target, chunk=None, irreparable=False):
        if chunk is not None:
            target = target.copy()
            target.chunk = chunk
        self.write_error(target, irreparable=irreparable)
        if self.rebuild_file:
            self.write_rebuilder_input(target, irreparable=irreparable)
        if self.error_sender:
            self.send_chunk_job(target, irreparable=irreparable)

    def _check_chunk_xattr(self, target, obj_meta, xattr_meta):
        """
        Check coherency of chunk extended attributes with object metadata.

        :returns: a list of errors
        """
        errors = list()
        # Composed position -> erasure coding
        attr_prefix = 'meta' if '.' in obj_meta['pos'] else ''

        attr_key = attr_prefix + 'chunk_size'
        if str(obj_meta['size']) != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from size in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['size']))

        attr_key = attr_prefix + 'chunk_hash'
        if obj_meta['hash'] != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from hash in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['hash']))
        return errors

    def _check_chunk(self, target):
        """
        Execute various checks on a chunk:
        - does it appear in object's chunk list?
        - is it reachable?
        - are its extended attributes coherent?

        :returns: the list of errors encountered,
            and the chunk's owner object metadata.
        """
        chunk = target.chunk
        errors = list()
        obj_meta = None
        xattr_meta = None

        cached = self._get_cached_or_lock(chunk)
        if cached is not None:
            return cached + (True, )

        self.logger.debug('Checking chunk "%s"', target)
        try:
            xattr_meta = self.api.blob_client.chunk_head(
                chunk, xattr=self.check_xattr, check_hash=self.check_hash)
        except exc.NotFound as err:
            self.chunk_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except exc.FaultyChunk as err:
            self.chunk_exceptions += 1
            errors.append('Faulty: %r' % (err, ))
        except Exception as err:
            self.chunk_exceptions += 1
            errors.append('Check failed: %s' % (err, ))

        if not target.obj:
            if xattr_meta:
                self.complete_target_from_chunk_metadata(target, xattr_meta)
            else:
                self.recover_and_complete_object_meta(target, chunk)

        if target.obj:
            obj_listing, obj_meta = self.check_obj(target.copy_object())
            if chunk not in obj_listing:
                errors.append('Missing from object listing')
                db_meta = dict()
            else:
                db_meta = obj_listing[chunk]

            if db_meta and xattr_meta and self.check_xattr:
                errors.extend(
                    self._check_chunk_xattr(target, db_meta, xattr_meta))

        self.list_cache[chunk] = errors, obj_meta
        self._unlock(chunk)

        # Do not send errors directly, let the caller do it.
        # Indeed, it may want to check if the chunks can be repaired or not.
        self.chunks_checked += 1
        return errors, obj_meta, False

    def check_chunk(self, target):
        errors, _obj_meta, from_cache = self._check_chunk(target)
        # If the result comes from the cache, we already reported it.
        if not from_cache:
            self.send_result(target, errors, target.irreparable)
        return errors

    def _check_metachunk(self, target, stg_met, pos, chunks, recurse=0):
        """
        Check that a metachunk has the right number of chunks.

        :returns: the list of errors
        """
        required = stg_met.expected_chunks
        errors = list()
        chunk_results = list()

        if len(chunks) < required:
            missing_chunks = required - len(chunks)
            if stg_met.ec:
                subs = {x['num'] for x in chunks}
                for sub in range(required):
                    if sub not in subs:
                        chkt = target.copy()
                        chkt.chunk = '%d.%d' % (pos, sub)
                        err = "Missing chunk at position %s" % chkt.chunk
                        chunk_results.append((chkt, [err], False))
                        errors.append(err)
            else:
                for _ in range(missing_chunks):
                    chkt = target.copy()
                    chkt.chunk = '%d.%d' % (pos, sub)
                    err = "Missing chunk at position %d" % pos
                    chunk_results.append((chkt, [err], False))
                    errors.append(err)

        if recurse > 0:
            for chunk in chunks:
                tcopy = target.copy()
                tcopy.chunk = chunk['url']
                chunk_errors, _, from_cache = self._check_chunk(tcopy)
                chunk_results.append((tcopy, chunk_errors, from_cache))
                if chunk_errors:
                    errors.append("Unusable chunk %s at position %s" %
                                  (chunk['url'], chunk['pos']))

        irreparable = required - len(errors) < stg_met.min_chunks_to_read
        if irreparable:
            errors.append(
                "Unavailable metachunk at position %s "
                "(%d/%d chunks available, %d/%d required)" %
                (pos, required - len(errors), stg_met.expected_chunks,
                 stg_met.min_chunks_to_read, stg_met.expected_chunks))
        for tgt, errs, from_cache in chunk_results:
            # If the result comes from the cache, we already reported it.
            if not from_cache:
                self.send_result(tgt, errs, irreparable)
        # Since the "metachunk" is not an official item type,
        # this method does not report errors itself. Errors will
        # be reported as object errors.
        return errors

    def _check_obj_policy(self, target, obj_meta, chunks, recurse=0):
        """
        Check that the list of chunks of an object matches
        the object's storage policy.

        :returns: the list of errors encountered
        """
        stg_met = STORAGE_METHODS.load(obj_meta['chunk_method'])
        chunks_by_pos = _sort_chunks(chunks, stg_met.ec)
        tasks = list()
        for pos, pchunks in iteritems(chunks_by_pos):
            tasks.append((pos,
                          self._spawn(self._check_metachunk,
                                      target.copy(),
                                      stg_met,
                                      pos,
                                      pchunks,
                                      recurse=recurse)))
        errors = list()
        for pos, task in tasks:
            if not task and not self.running:
                errors.append("Pos %d skipped: checker is exiting" % pos)
                continue
            try:
                errors.extend(task.wait())
            except Exception as err:
                errors.append("Check failed: pos %d: %s" % (pos, err))
        return errors

    def check_obj_versions(self, target, versions, recurse=0):
        """
        Run checks of all versions of the targeted object in parallel.
        """
        tasks = list()
        for ov in versions:
            tcopy = target.copy_object()
            tcopy.content_id = ov['id']
            tcopy.version = str(ov['version'])
            tasks.append((tcopy.version,
                          self._spawn(self.check_obj, tcopy, recurse=recurse)))
        errors = list()
        for version, task in tasks:
            if not task and not self.running:
                errors.append("Version %s skipped: checker is exiting" %
                              version)
                continue
            try:
                task.wait()
            except Exception as err:
                errors.append("Check failed: version %s: %s" % (version, err))
        if errors:
            # Send a result with the target without version to tell
            # we were not able to check all versions of the object.
            self.send_result(target, errors)

    def _load_obj_meta(self, target, errors):
        """
        Load object metadata and chunks.

        :param target: which object to check.
        :param errors: list of errors that will be appended
            in case any error occurs.
        :returns: a tuple with object metadata and a list of chunks.
        """
        try:
            return self.api.object_locate(target.account,
                                          target.container,
                                          target.obj,
                                          version=target.version,
                                          properties=False)
        except exc.NoSuchObject as err:
            self.object_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except Exception as err:
            self.object_exceptions += 1
            errors.append('Check failed: %s' % (err, ))
        return None, []

    def _get_cached_or_lock(self, lock_key):
        # If something is running, wait for it
        with self.running_lock:
            event = self.running_tasks.get(lock_key)
        if event:
            event.wait()
            event = None

        # Maybe get a cached result
        if lock_key in self.list_cache:
            return self.list_cache[lock_key]

        # No cached result, try to compute the thing ourselves
        while True:
            with self.running_lock:
                # Another check while locked
                if lock_key in self.list_cache:
                    return self.list_cache[lock_key]
                # Still nothing cached
                event = self.running_tasks.get(lock_key)
                if event is None:
                    self.running_tasks[lock_key] = Event()
                    return None
            event.wait()

    def _unlock(self, lock_key):
        with self.running_lock:
            event = self.running_tasks[lock_key]
            del self.running_tasks[lock_key]
            event.send(True)

    def check_obj(self, target, recurse=0):
        """
        Check one object version.
        If no version is specified, all versions of the object will be checked.
        :returns: the result of the check of the most recent version,
            or the one that is explicitly targeted.
        """
        account = target.account
        container = target.container
        obj = target.obj
        vers = target.version  # can be None

        cached = self._get_cached_or_lock((account, container, obj, vers))
        if cached is not None:
            return cached

        self.logger.info('Checking object "%s"', target)
        container_listing, _ = self.check_container(target.copy_container())
        errors = list()
        if obj not in container_listing:
            errors.append('Missing from container listing')
            # checksum = None
        else:
            versions = container_listing[obj]
            if vers is None:
                if target.content_id is None:
                    # No version specified, check all versions
                    self.check_obj_versions(target.copy_object(),
                                            versions,
                                            recurse=recurse)
                    # Now return the cached result of the most recent version
                    target.content_id = versions[0]['id']
                    target.version = str(versions[0]['version'])
                    res = self.check_obj(target, recurse=0)
                    self._unlock((account, container, obj, vers))
                    return res
                else:
                    for ov in versions:
                        if ov['id'] == target.content_id:
                            vers = str(ov['version'])
                            target.version = vers
                            break
                    else:
                        errors.append('Missing from container listing')

            # TODO check checksum match
            # checksum = container_listing[obj]['hash']
            pass

        meta, chunks = self._load_obj_meta(target, errors)

        chunk_listing = {c['url']: c for c in chunks}
        if meta:
            if target.content_id is None:
                target.content_id = meta['id']
            if target.version is None:
                target.version = str(meta['version'])
            self.list_cache[(account, container, obj, vers)] = \
                (chunk_listing, meta)
        self.objects_checked += 1
        self._unlock((account, container, obj, vers))

        # Skip the check if we could not locate the object
        if meta:
            errors.extend(
                self._check_obj_policy(target, meta, chunks, recurse=recurse))

        self.send_result(target, errors)
        return chunk_listing, meta

    def check_container(self, target, recurse=0):
        account = target.account
        container = target.container

        cached = self._get_cached_or_lock((account, container))
        if cached is not None:
            return cached

        self.logger.info('Checking container "%s"', target)
        account_listing = self.check_account(target.copy_account())
        errors = list()
        if container not in account_listing:
            errors.append('Missing from account listing')

        marker = None
        results = []
        ct_meta = dict()
        extra_args = dict()
        if self.limit_listings > 1 and target.obj:
            # When we are explicitly checking one object, start the listing
            # where this object is supposed to be. Do not use a limit,
            # but an end marker, in order to fetch all versions of the object.
            extra_args['prefix'] = target.obj
            extra_args['end_marker'] = target.obj + '\x00'  # HACK
        while True:
            try:
                resp = self.api.object_list(account,
                                            container,
                                            marker=marker,
                                            versions=True,
                                            **extra_args)
            except exc.NoSuchContainer as err:
                self.container_not_found += 1
                errors.append('Not found: %s' % (err, ))
                break
            except Exception as err:
                self.container_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break

            truncated = resp.get('truncated', False)
            if truncated:
                marker = resp['next_marker']

            if resp['objects']:
                # safeguard, probably useless
                if not marker:
                    marker = resp['objects'][-1]['name']
                results.extend(resp['objects'])
                if not truncated or self.limit_listings > 1:
                    break
            else:
                ct_meta = resp
                ct_meta.pop('objects')
                break

        container_listing = dict()
        # Save all object versions, with the most recent first
        for obj in results:
            container_listing.setdefault(obj['name'], list()).append(obj)
        for versions in container_listing.values():
            versions.sort(key=lambda o: o['version'], reverse=True)

        if self.limit_listings <= 1:
            # We just listed the whole container, keep the result in a cache
            self.containers_checked += 1
            self.list_cache[(account, container)] = container_listing, ct_meta
        self._unlock((account, container))

        if recurse > 0:
            for obj_vers in container_listing.values():
                for obj in obj_vers:
                    tcopy = target.copy_object()
                    tcopy.obj = obj['name']
                    tcopy.content_id = obj['id']
                    tcopy.version = str(obj['version'])
                    self._spawn_n(self.check_obj, tcopy, recurse - 1)
        self.send_result(target, errors)
        return container_listing, ct_meta

    def check_account(self, target, recurse=0):
        account = target.account

        cached = self._get_cached_or_lock(account)
        if cached is not None:
            return cached

        self.logger.info('Checking account "%s"', target)
        errors = list()
        marker = None
        results = []
        extra_args = dict()
        if self.limit_listings > 0 and target.container:
            # When we are explicitly checking one container, start the listing
            # where this container is supposed to be, and list only one
            # container.
            extra_args['prefix'] = target.container
            extra_args['limit'] = 1
        while True:
            try:
                resp = self.api.container_list(account,
                                               marker=marker,
                                               **extra_args)
            except Exception as err:
                self.account_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break
            if resp:
                marker = resp[-1][0]
                results.extend(resp)
                if self.limit_listings > 0:
                    break
            else:
                break

        containers = dict()
        for container in results:
            # Name, number of objects, number of bytes
            containers[container[0]] = (container[1], container[2])

        if self.limit_listings <= 0:
            # We just listed the whole account, keep the result in a cache
            self.accounts_checked += 1
            self.list_cache[account] = containers
        self._unlock(account)

        if recurse > 0:
            for container in containers:
                tcopy = target.copy_account()
                tcopy.container = container
                self._spawn_n(self.check_container, tcopy, recurse - 1)

        self.send_result(target, errors)
        return containers

    def check(self, target, recurse=0):
        if target.type == 'chunk':
            self._spawn_n(self.check_chunk, target)
        elif target.type == 'object':
            self._spawn_n(self.check_obj, target, recurse)
        elif target.type == 'container':
            self._spawn_n(self.check_container, target, recurse)
        else:
            self._spawn_n(self.check_account, target, recurse)

    def check_all_accounts(self, recurse=0):
        all_accounts = self.api.account_list()
        for acct in all_accounts:
            self.check(Target(acct), recurse=recurse)

    def fetch_results(self, rate_limiter=None):
        while self.running and not self.result_queue.empty():
            res = self.result_queue.get(True)
            yield res
            # Rate limiting is done on the result queue for now.
            # Someday we could implement a submission queue instead of
            # letting each worker submit tasks to the pool, and do
            # the rate limiting on this queue.
            if rate_limiter is not None:
                self.run_time = rate_limiter(self.run_time)

    def merge_with_delayed_target(self, target):
        """
        Merge the specified target with a delayed one.

        :returns: the delayed target, if there is one, with an error log
            including the errors of the new target. Return the new target
            otherwise.
        """
        tkey = repr(target)
        prev_target = self.delayed_targets.get(tkey, target)
        if prev_target is not target:
            errors = dict(prev_target.error_log)
            errors.update(target.error_log)
            prev_target.error_log = sorted(errors.items())
        return prev_target

    def log_result(self, target):
        """
        Log a check result, if it shows errors. Dispatch the errors to the
        appropriate destinations (log files, queues, etc.).
        """
        # The result may come from a new target, or from an old target
        # we checked another time, or both.
        target = self.merge_with_delayed_target(target)
        if target.has_errors:
            time_in_error, confirmations = target.time_in_error()
            if (time_in_error < self.min_time_in_error
                    or confirmations < self.required_confirmations):
                self.logger.info("Delaying check for %s, %d/%d confirmations",
                                 target, confirmations,
                                 self.required_confirmations)
                self.delayed_targets[repr(target)] = target
            else:
                if target.type == 'chunk':
                    self.logger.info(
                        "Writing error for %s, %d/%d confirmations", target,
                        confirmations, self.required_confirmations)
                    self.write_chunk_error(target,
                                           irreparable=target.irreparable)
                else:
                    self.write_error(target, irreparable=target.irreparable)
                self.delayed_targets.pop(repr(target), None)
            self.logger.warn(
                '%s:%s\n%s', target,
                ' irreparable' if target.irreparable else '',
                target.latest_error_result().errors_to_str(err_format='  %s'))

    def run(self, rate_limiter=None):
        """
        Fetch results and write logs until all jobs have finished.

        :returns: a generator yielding check results.
        """
        while self.running and (self.pool.running() + self.pool.waiting()):
            for result in self.fetch_results(rate_limiter):
                self.log_result(result)
                yield result
            sleep(0.1)
        if self.running:
            self.pool.waitall()
        # No rate limiting
        for result in self.fetch_results():
            self.log_result(result)
            yield result
        self.list_cache = CacheDict(self.list_cache.size)

    def stop(self):
        self.logger.info("Stopping")
        self.running = False

    def report(self):
        success = True

        def _report_stat(name, stat):
            print("{0:18}: {1}".format(name, stat))

        print()
        print('Report')
        _report_stat("Accounts checked", self.accounts_checked)
        if self.account_not_found:
            success = False
            _report_stat("Missing accounts", self.account_not_found)
        if self.account_exceptions:
            success = False
            _report_stat("Exceptions", self.account_exceptions)
        print()
        _report_stat("Containers checked", self.containers_checked)
        if self.container_not_found:
            success = False
            _report_stat("Missing containers", self.container_not_found)
        if self.container_exceptions:
            success = False
            _report_stat("Exceptions", self.container_exceptions)
        print()
        _report_stat("Objects checked", self.objects_checked)
        if self.object_not_found:
            success = False
            _report_stat("Missing objects", self.object_not_found)
        if self.object_exceptions:
            success = False
            _report_stat("Exceptions", self.object_exceptions)
        print()
        _report_stat("Chunks checked", self.chunks_checked)
        if self.chunk_not_found:
            success = False
            _report_stat("Missing chunks", self.chunk_not_found)
        if self.chunk_exceptions:
            success = False
            _report_stat("Exceptions", self.chunk_exceptions)
        return success
Exemplo n.º 8
0
class TestContainerDownload(BaseTestCase):
    def setUp(self):
        super(TestContainerDownload, self).setUp()
        # FIXME: should we use direct API from BaseTestCase
        #        or still container.client ?
        self.conn = ObjectStorageApi(self.ns)
        self._streaming = 'http://' + self.get_service_url('container')[2]
        self._cnt = random_container()
        self._uri = self.make_uri('dump')
        self._data = {}
        self.conn.container_create(self.account, self._cnt)
        self.raw = ""
        self._slo = []

    def make_uri(self, action, account=None, container=None):
        account = account or self.account
        container = container or self._cnt
        return '%s/v1.0/container/%s?acct=%s&ref=%s' % (
            self._streaming, action, account, container)

    def tearDown(self):
        for name in self._data:
            self.conn.object_delete(self.account, self._cnt, name)
        self.conn.container_delete(self.account, self._cnt)
        super(TestContainerDownload, self).tearDown()

    def _create_data(self,
                     name=gen_names,
                     metadata=None,
                     size=513,
                     append=False):
        for idx, _name in itertools.islice(name(), 5):
            mime = random.choice(MIMETYPE)
            if append and size > 0:
                data = gen_data(size / 2 * idx)
                entry = {'data': data, 'meta': None, 'mime': mime}
                self.conn.object_create(self.account,
                                        self._cnt,
                                        obj_name=_name,
                                        data=data,
                                        mime_type=mime)
                data = gen_data(size / 2 * idx)
                self.conn.object_create(self.account,
                                        self._cnt,
                                        obj_name=_name,
                                        data=data,
                                        mime_type=mime,
                                        append=True)
                entry['data'] += data
            else:
                data = gen_data(size * idx)
                entry = {'data': data, 'meta': None, 'mime': mime}
                self.conn.object_create(self.account,
                                        self._cnt,
                                        obj_name=_name,
                                        data=data,
                                        mime_type=mime)
            if metadata:
                entry['meta'] = {}
                for _ in xrange(10):
                    key, val = metadata()
                    entry['meta'][key] = val
                    self.conn.object_update(self.account, self._cnt, _name,
                                            entry['meta'])
            self._data[_name] = entry

    def _create_s3_slo(self, name=gen_names, metadata=None):
        # create a fake S3 bucket with a SLO object
        chunksize = 10000
        parts = 5
        res = []
        full_data = ""
        self.conn.container_create(self.account, self._cnt + '+segments')
        _name = "toto"
        etag = rand_str(50)
        part_number = 1
        for size in [chunksize] * parts + [444]:
            data = gen_data(size)
            res.append({
                'bytes':
                size,
                'content_type':
                'application/octect-stream',
                'hash':
                md5(data).hexdigest().upper(),
                'last_modified':
                '2017-06-21T12:42:47.000000',
                'name':
                '/%s+segments/%s/%s/%d' % (self._cnt, _name, etag, part_number)
            })
            self.conn.object_create(self.account,
                                    "%s+segments" % self._cnt,
                                    obj_name='%s/%s/%d' %
                                    (_name, etag, part_number),
                                    data=data)
            full_data += data
            part_number += 1

        self._data[_name] = {
            'data': full_data,
            'meta': {
                'x-static-large-object': 'true',
                'x-object-sysmeta-slo-etag': etag,
                'x-object-sysmeta-slo-size': str(len(full_data))
            }
        }
        self._slo.append(_name)
        data = json.dumps(res)
        self.conn.object_create(self.account,
                                self._cnt,
                                obj_name=_name,
                                data=data)
        self.conn.object_update(self.account, self._cnt, _name,
                                self._data[_name]['meta'])

    def _check_tar(self, data):
        raw = BytesIO(data)
        tar = tarfile.open(fileobj=raw, ignore_zeros=True)
        info = self._data.keys()
        for entry in tar.getnames():
            if entry == CONTAINER_MANIFEST:
                # skip special entry
                continue

            self.assertIn(entry, info)

            tmp = tar.extractfile(entry)
            self.assertEqual(self._data[entry]['data'], tmp.read())
            info.remove(entry)

        self.assertEqual(info, [])
        return tar

    def _check_container(self, cnt):
        ret = self.conn.object_list(account=self.account, container=cnt)
        names = self._data.keys()
        for obj in ret['objects']:
            name = obj['name']
            self.assertIn(name, self._data)
            self.assertEqual(obj['size'], len(self._data[name]['data']))
            _, data = self.conn.object_fetch(self.account, cnt, name)
            raw = "".join(data)

            self.assertEqual(
                md5(raw).hexdigest(),
                md5(self._data[name]['data']).hexdigest())
            meta = self.conn.object_get_properties(self.account, cnt, name)
            self.assertEqual(meta['properties'], self._data[name]['meta'])
            names.remove(name)
        self.assertEqual(len(names), 0)

    def _simple_download(self,
                         name=gen_names,
                         metadata=None,
                         size=513,
                         append=False):
        self._create_data(name=name,
                          metadata=metadata,
                          size=size,
                          append=append)

        ret = requests.get(self._uri)
        self.assertGreater(len(ret.content), 0)
        self.assertEqual(ret.status_code, 200)
        self.raw = ret.content

        return self._check_tar(ret.content)

    def _check_metadata(self, tar):
        for entry in tar.getnames():
            if entry == CONTAINER_MANIFEST:
                # skip special entry
                continue
            headers = tar.getmember(entry).pax_headers
            keys = headers.keys()[:]
            for key, val in self._data[entry]['meta'].items():
                key = u"SCHILY.xattr.user." + key.decode('utf-8')
                self.assertIn(key, headers)
                self.assertEqual(val.decode('utf-8'), headers[key])
                keys.remove(key)
            #
            self.assertEqual(self._data[entry]['mime'], headers['mime_type'])
            keys.remove('mime_type')
            #
            self.assertEqual(keys, [])

    def test_missing_container(self):
        ret = requests.get(self._streaming + '/' + random_container("ms-"))
        self.assertEqual(ret.status_code, 404)

    def test_invalid_url(self):
        ret = requests.get(self._streaming)
        self.assertEqual(ret.status_code, 404)

        ret = requests.head(self._streaming + '/' + random_container('inv') +
                            '/' + random_container('inv'))
        self.assertEqual(ret.status_code, 404)

    def test_download_empty_container(self):
        ret = requests.get(self._uri)
        self.assertEqual(ret.status_code, 204)

    def test_simple_download(self):
        self._simple_download()

    def test_check_head(self):
        self._create_data()

        get = requests.get(self._uri)
        head = requests.head(self._uri)

        self.assertEqual(get.headers['content-length'],
                         head.headers['content-length'])

    def test_download_per_range(self):
        self._create_data()

        org = requests.get(self._uri)

        data = []
        for idx in xrange(0, int(org.headers['content-length']), 512):
            ret = requests.get(
                self._uri, headers={'Range': 'bytes=%d-%d' % (idx, idx + 511)})
            self.assertEqual(ret.status_code, 206)
            self.assertEqual(len(ret.content), 512)
            self.assertEqual(ret.content, org.content[idx:idx + 512])
            data.append(ret.content)

        data = "".join(data)
        self.assertGreater(len(data), 0)
        self.assertEqual(md5(data).hexdigest(), md5(org.content).hexdigest())

    def test_invalid_range(self):
        self._create_data()

        ranges = ((-512, 511), (512, 0), (1, 3), (98888, 99999))
        for start, end in ranges:
            ret = requests.get(self._uri,
                               headers={'Range': 'bytes=%d-%d' % (start, end)})
            self.assertEqual(
                ret.status_code, 416,
                "Invalid error code for range %d-%d" % (start, end))

        ret = requests.get(self._uri,
                           headers={'Range': 'bytes=0-511, 512-1023'})
        self.assertEqual(ret.status_code, 416)

    def test_file_metadata(self):
        tar = self._simple_download(metadata=gen_metadata)
        self._check_metadata(tar)

    def test_container_metadata(self):
        key, val = gen_metadata()
        ret = self.conn.container_update(self.account, self._cnt, {key: val})
        ret = self.conn.container_show(self.account, self._cnt)
        ret = requests.get(self._uri)
        self.assertEqual(ret.status_code, 200)

        raw = BytesIO(ret.content)
        tar = tarfile.open(fileobj=raw, ignore_zeros=True)
        self.assertIn(CONTAINER_PROPERTIES, tar.getnames())

        data = json.load(tar.extractfile(CONTAINER_PROPERTIES))
        self.assertIn(key, data)
        self.assertEqual(val, data[key])

    def test_charset_file(self):
        self._simple_download(name=gen_charset_names)

    @unittest.skip("wip")
    def test_byte_metadata(self):
        tar = self._simple_download(metadata=gen_byte_metadata)
        self._check_metadata(tar)

    def test_charset_metadata(self):
        tar = self._simple_download(metadata=gen_charset_metadata)
        self._check_metadata(tar)

    @attr('s3')
    def test_s3_simple_download(self):
        self._create_s3_slo()
        ret = requests.get(self._uri)
        self.assertGreater(len(ret.content), 0)
        self.assertEqual(ret.status_code, 200)
        self.raw = ret.content

        raw = BytesIO(ret.content)
        tar = tarfile.open(fileobj=raw, ignore_zeros=True)
        info = self._data.keys()
        for entry in tar.getnames():
            if entry == CONTAINER_MANIFEST:
                # skip special entry
                continue
            self.assertIn(entry, info)

            tmp = tar.extractfile(entry)
            self.assertEqual(self._data[entry]['data'], tmp.read())
            info.remove(entry)

        self.assertEqual(len(info), 0)
        return tar

    @attr('s3')
    def test_s3_range_download(self):
        self._create_s3_slo()
        org = requests.get(self._uri)
        self.assertEqual(org.status_code, 200)

        data = []
        for idx in xrange(0, int(org.headers['content-length']), 512):
            ret = requests.get(
                self._uri, headers={'Range': 'bytes=%d-%d' % (idx, idx + 511)})
            self.assertEqual(ret.status_code, 206)
            self.assertEqual(len(ret.content), 512)
            self.assertEqual(ret.content, org.content[idx:idx + 512])
            data.append(ret.content)

        data = "".join(data)
        self.assertGreater(len(data), 0)
        self.assertEqual(md5(data).hexdigest(), md5(org.content).hexdigest())

    @attr('s3')
    def test_s3_check_slo_metadata_download(self):
        self._create_s3_slo()

        org = requests.get(self.make_uri('dump'))
        self.assertEqual(org.status_code, 200)

        cnt = rand_str(20)
        res = requests.put(self.make_uri('restore', container=cnt),
                           data=org.content)
        self.assertEqual(org.status_code, 200)

        res = self.conn.object_get_properties(self.account, cnt, self._slo[0])
        props = res['properties']
        self.assertNotIn('x-static-large-object', props)
        self.assertNotIn('x-object-sysmeta-slo-size', props)
        self.assertNotIn('x-object-sysmeta-slo-etag', props)

    @attr('simple')
    def test_simple_restore(self):
        self._create_data(metadata=gen_metadata)
        org = requests.get(self.make_uri('dump'))
        cnt = rand_str(20)

        res = requests.put(self.make_uri('restore', container=cnt),
                           data=org.content)
        self.assertEqual(res.status_code, 201)
        self._check_container(cnt)

    @attr('restore')
    def test_multipart_restore(self):
        self._create_data(metadata=gen_metadata, size=1025 * 1024)
        org = requests.get(self.make_uri('dump'))
        cnt = rand_str(20)
        size = 1014 * 1024
        parts = [
            org.content[x:x + size] for x in xrange(0, len(org.content), size)
        ]
        uri = self.make_uri('restore', container=cnt)
        start = 0
        for part in parts:
            hdrs = {'Range': 'bytes=%d-%d' % (start, start + len(part) - 1)}
            res = requests.put(uri, data=part, headers=hdrs)
            start += len(part)
            self.assertIn(res.status_code, [201, 206])

        self._check_container(cnt)

    @attr('restore')
    def test_multipart_invalid_restore(self):
        self._create_data(metadata=gen_metadata, size=1025 * 1024)
        org = requests.get(self.make_uri('dump'))
        cnt = rand_str(20)
        uri = self.make_uri('restore', container=cnt)
        size = 1014 * 1024
        parts = [
            org.content[x:x + size] for x in xrange(0, len(org.content), size)
        ]
        start = 0

        for part in parts:
            hdrs = {'Range': 'bytes=%d-%d' % (start, start + len(part) - 1)}
            res = requests.put(uri, data=part, headers=hdrs)
            self.assertIn(res.status_code, [201, 206])
            start += len(part)

            # only unfinished restoration expose X-Consumed-Size
            if res.status_code == 206:
                res = requests.head(uri)
                self.assertEqual(int(res.headers['X-Consumed-Size']), start)

            inv = requests.put(uri, data=part, headers=hdrs)
            self.assertEqual(inv.status_code, 422)

            if res.status_code == 206:
                res = requests.head(uri)
                self.assertEqual(int(res.headers['X-Consumed-Size']), start)

        uri = self.make_uri('restore', container=rand_str(20))
        hdrs = {'Range': 'bytes=%d-%d' % (size, size + len(parts[1]) - 1)}
        res = requests.put(uri, data=part, headers=hdrs)
        self.assertEqual(res.status_code, 422)

        self._check_container(cnt)

    @attr('concurrency')
    def test_multipart_concurrency(self):
        self._create_data(metadata=gen_metadata, size=1025 * 1024)
        org = requests.get(self.make_uri('dump'))
        cnt = rand_str(20)
        uri = self.make_uri('restore', container=cnt)
        size = divmod(len(org.content) / 3, 512)[0] * 512
        parts = [
            org.content[x:x + size] for x in xrange(0, len(org.content), size)
        ]
        start = 0

        class StreamWithContentLength(Thread):
            """Thread to send data with delays to restore API"""
            def __init__(self, data, headers):
                self._count = 0
                self._data = data
                self._hdrs = headers
                super(StreamWithContentLength, self).__init__()

            def __len__(self):
                return len(self._data)

            def read(self, *args):
                if self._count < len(self._data):
                    time.sleep(0.5)
                    data = self._data[self._count:self._count + size / 3]
                    self._count += len(data)
                    return data
                return ""

            def run(self):
                self._ret = requests.put(uri, data=self, headers=self._hdrs)

        for idx, part in enumerate(parts):
            hdrs = {'Range': 'bytes=%d-%d' % (start, start + len(part) - 1)}
            if idx == 0:
                res = requests.put(uri, data=part, headers=hdrs)
                self.assertIn(res.status_code, [201, 206])
            else:
                # launch Thread and simulate slow bandwidth
                thr = StreamWithContentLength(part, hdrs)
                thr.start()
                # send data on same range
                time.sleep(0.5)
                res = requests.put(uri, data=part, headers=hdrs)
                self.assertEqual(res.status_code, 422)

                thr.join()
                self.assertIn(thr._ret.status_code, [201, 206])
            start += len(part)

        self._check_container(cnt)

    @attr('disconnected')
    def test_broken_connectivity(self):
        self._create_data(metadata=gen_metadata, size=1025 * 1024)
        org = requests.get(self.make_uri('dump'))
        cnt = rand_str(20)

        class FakeStream(object):
            """Send data and simulate a connectivity issue"""
            def __init__(self, data, size):
                self._count = 0
                self._data = data
                self._size = size

            def __len__(self):
                return len(self._data)

            def read(self, *args):
                if self._count < self._size:
                    data = self._data[self._count:self._count + size / 3]
                    self._count += len(data)
                    return data
                if self._count == len(self._data):
                    return ""
                raise Exception("break connection")

        def wait_lock():
            """When the lock is gone, return current consumed size"""
            nb = 0
            while True:
                time.sleep(0.1)
                req = requests.head(uri)
                if (req.status_code == 200 and req.headers.get(
                        'X-Upload-In-Progress', '1') == '0'):
                    print("Tried before lock free", nb)
                    print("Got consumed-size", req.headers['X-Consumed-Size'])
                    return int(req.headers['X-Consumed-Size'])
                nb += 1
                self.assertLess(nb, 10)

        uri = self.make_uri('restore', container=cnt)
        block = 1000 * 512
        start = 0
        cut = False
        while True:
            if start:
                start = wait_lock()

            stop = min(len(org.content), start + block)
            hdrs = {'Range': 'bytes=%d-%d' % (start, stop - 1)}
            size = stop - start
            if cut:
                size = block / 2
            cut = not cut

            try:
                ret = requests.put(uri,
                                   headers=hdrs,
                                   data=FakeStream(org.content[start:stop],
                                                   size))
            except Exception:
                pass
            else:
                self.assertIn(
                    ret.status_code, (201, 206),
                    "Unexpected %d HTTP response: %s" %
                    (ret.status_code, ret.content))
                start += size
                if ret.status_code == 201:
                    break

        result = requests.get(self.make_uri('dump', container=cnt))
        self._check_tar(result.content)

    @attr('rawtar')
    def test_rawtar(self):
        """Create a normal tar archive and restore it"""
        raw = BytesIO()
        tarfile = TarFile(mode='w', fileobj=raw)

        testdata = rand_str(20) * 5000

        inf = TarInfo("simpletar")
        fileraw = BytesIO()
        fileraw.write(testdata)
        inf.size = len(testdata)
        fileraw.seek(0)

        tarfile.addfile(inf, fileobj=fileraw)
        tarfile.close()

        raw.seek(0)
        data = raw.read()

        cnt = rand_str(20)
        ret = requests.put(self.make_uri("restore", container=cnt), data=data)

        self.assertEqual(ret.status_code, 201)
        meta, stream = self.conn.object_fetch(self.account, cnt, "simpletar")
        self.assertEqual(
            md5("".join(stream)).hexdigest(),
            md5(testdata).hexdigest())

    @attr('invalid')
    def test_checksums(self):
        """Check restore operation with invalid tar"""
        tar = self._simple_download(append=True)

        manifest = json.load(tar.extractfile(CONTAINER_MANIFEST),
                             object_pairs_hook=OrderedDict)
        # => add random bytes inside each file (either header and data)
        for entry in manifest:
            if entry['name'] == CONTAINER_MANIFEST:
                # CONTAINER_MANIFEST does not have checksum at this time
                continue
            inv = self.raw

            # Test with tar entry
            # checksum tar doesn't work very well with SCHILY attributes
            # so only apply changes on regular block entry
            idx = entry['start_block'] * BLOCKSIZE \
                + random.randint(0, BLOCKSIZE)
            # + random.randint(0, entry['hdr_blocks'] * BLOCKSIZE)
            while self.raw[idx] == inv[idx]:
                inv = inv[:idx] + chr(random.randint(0, 255)) + inv[idx + 1:]

            cnt = rand_str(20)
            res = requests.put(self.make_uri('restore', container=cnt),
                               data=inv)
            self.assertEqual(res.status_code, 400)

            # skip emty file
            if entry['size'] == 0:
                continue

            # Test with data blocks
            inv = self.raw
            idx = (entry['start_block'] + entry['hdr_blocks']) * BLOCKSIZE \
                + random.randint(0, entry['size'] - 1)
            while self.raw[idx] == inv[idx]:
                inv = inv[:idx] + chr(random.randint(0, 255)) + inv[idx + 1:]
            cnt = rand_str(20)
            res = requests.put(self.make_uri('restore', container=cnt),
                               data=inv)

            self.assertEqual(res.status_code, 400)
Exemplo n.º 9
0
class Checker(object):
    def __init__(self,
                 namespace,
                 concurrency=50,
                 error_file=None,
                 rebuild_file=None,
                 full=True,
                 limit_listings=0,
                 request_attempts=1,
                 logger=None,
                 verbose=False,
                 integrity=False):
        self.pool = GreenPool(concurrency)
        self.error_file = error_file
        self.full = bool(full)
        self.integrity = bool(integrity)
        # Optimisation for when we are only checking one object
        # or one container.
        # 0 -> do not limit
        # 1 -> limit account listings (list of containers)
        # 2 -> limit container listings (list of objects)
        self.limit_listings = limit_listings
        if self.error_file:
            outfile = open(self.error_file, 'a')
            self.error_writer = csv.writer(outfile, delimiter=' ')

        self.rebuild_file = rebuild_file
        if self.rebuild_file:
            fd = open(self.rebuild_file, 'a')
            self.rebuild_writer = csv.writer(fd, delimiter='|')

        self.logger = logger or get_logger(
            {'namespace': namespace}, name='integrity', verbose=verbose)
        self.api = ObjectStorageApi(namespace,
                                    logger=self.logger,
                                    max_retries=request_attempts - 1,
                                    request_attempts=request_attempts)

        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

        self.list_cache = {}
        self.running = {}
        self.result_queue = Queue()

    def complete_target_from_chunk_metadata(self, target, xattr_meta):
        """
        Complete a Target object from metadata found in chunk's extended
        attributes. In case the "fullpath" is not available, try to read
        legacy metadata, and maybe ask meta1 to resolve the CID into
        account and container names.
        """
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            acct, ct, path, vers, content_id = \
                decode_fullpath(xattr_meta['full_path'])
            target.account = acct
            target.container = ct
            target.obj = path
            target.content_id = content_id
            target.version = vers
        except KeyError:
            # No fullpath header, try legacy headers
            if 'content_path' in xattr_meta:
                target.obj = xattr_meta['content_path']
            if 'content_id' in xattr_meta:
                target.content_id = xattr_meta['content_id']
            if 'content_version' in xattr_meta:
                target.version = xattr_meta['content_version']
            cid = xattr_meta.get('container_id')
            if cid:
                try:
                    md = self.api.directory.show(cid=cid)
                    acct = md.get('account')
                    ct = md.get('name')
                    if acct:
                        target.account = acct
                    if ct:
                        target.container = ct
                except Exception as err:
                    self.logger.warn(
                        "Failed to resolve CID %s into account "
                        "and container names: %s", cid, err)

    def send_result(self, target, errors=None):
        """
        Put an item in the result queue.
        """
        # TODO(FVE): send to an external queue.
        self.result_queue.put(ItemResult(target, errors))

    def write_error(self, target, irreparable=False):
        if not self.error_file:
            return
        error = list()
        if irreparable:
            error.append('#IRREPARABLE')
        error.append(target.account)
        if target.container:
            error.append(target.container)
        if target.obj:
            error.append(target.obj)
        if target.chunk:
            error.append(target.chunk)
        self.error_writer.writerow(error)

    def write_rebuilder_input(self, target, irreparable=False):
        # FIXME(FVE): cid can be computed from account and container names
        ct_meta = self.list_cache[(target.account, target.container)][1]
        try:
            cid = ct_meta['system']['sys.name'].split('.', 1)[0]
        except KeyError:
            cid = ct_meta['properties']['sys.name'].split('.', 1)[0]
        error = list()
        if irreparable:
            error.append('#IRREPARABLE')
        error.append(cid)
        # FIXME(FVE): ensure we always resolve content_id,
        # or pass object version along with object name.
        error.append(target.content_id or target.obj)
        error.append(target.chunk)
        self.rebuild_writer.writerow(error)

    def write_chunk_error(self, target, chunk=None, irreparable=False):
        if chunk is not None:
            target = target.copy()
            target.chunk = chunk
        self.write_error(target, irreparable=irreparable)
        if self.rebuild_file:
            self.write_rebuilder_input(target, irreparable=irreparable)

    def _check_chunk_xattr(self, target, obj_meta, xattr_meta):
        """
        Check coherency of chunk extended attributes with object metadata.

        :returns: a list of errors
        """
        errors = list()
        # Composed position -> erasure coding
        attr_prefix = 'meta' if '.' in obj_meta['pos'] else ''

        attr_key = attr_prefix + 'chunk_size'
        if str(obj_meta['size']) != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from size in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['size']))

        attr_key = attr_prefix + 'chunk_hash'
        if obj_meta['hash'] != xattr_meta.get(attr_key):
            errors.append(
                "'%s' xattr (%s) differs from hash in meta2 (%s)" %
                (attr_key, xattr_meta.get(attr_key), obj_meta['hash']))
        return errors

    def _check_chunk(self, target):
        """
        Execute various checks on a chunk:
        - does it appear in object's chunk list?
        - is it reachable?
        - are its extended attributes coherent?

        :returns: the list of errors encountered,
            and the chunk's owner object metadata.
        """
        chunk = target.chunk
        errors = list()
        obj_meta = None
        xattr_meta = None

        try:
            xattr_meta = self.api.blob_client.chunk_head(
                chunk, xattr=self.full, check_hash=self.integrity)
        except exc.NotFound as err:
            self.chunk_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except exc.FaultyChunk as err:
            self.chunk_exceptions += 1
            errors.append('Faulty: %r' % (err, ))
        except Exception as err:
            self.chunk_exceptions += 1
            errors.append('Check failed: %s' % (err, ))

        if not target.obj and xattr_meta:
            self.complete_target_from_chunk_metadata(target, xattr_meta)

        if target.obj:
            obj_listing, obj_meta = self.check_obj(target.copy_object())
            if chunk not in obj_listing:
                errors.append('Missing from object listing')
                db_meta = dict()
            else:
                db_meta = obj_listing[chunk]

            if db_meta and xattr_meta and self.full:
                errors.extend(
                    self._check_chunk_xattr(target, db_meta, xattr_meta))

        self.send_result(target, errors)
        self.chunks_checked += 1
        return errors, obj_meta

    def check_chunk(self, target):
        errors, _obj_meta = self._check_chunk(target)
        return errors

    def _check_metachunk(self, target, stg_met, pos, chunks, recurse=0):
        """
        Check that a metachunk has the right number of chunks.

        :returns: the list of errors
        """
        required = stg_met.expected_chunks
        errors = list()

        if len(chunks) < required:
            missing_chunks = required - len(chunks)
            if stg_met.ec:
                subs = {x['num'] for x in chunks}
                for sub in range(required):
                    if sub not in subs:
                        errors.append("Missing chunk at position %d.%d" %
                                      (pos, sub))
            else:
                for _ in range(missing_chunks):
                    errors.append("Missing chunk at position %d" % pos)

        if recurse > 0:
            for chunk in chunks:
                tcopy = target.copy()
                tcopy.chunk = chunk['url']
                chunk_errors, _ = self._check_chunk(tcopy)
                if chunk_errors:
                    # The errors have already been reported by _check_chunk,
                    # but we must count this chunk among the unusable chunks
                    # of the current metachunk.
                    errors.append("Unusable chunk %s at position %s" %
                                  (chunk['url'], chunk['pos']))

        irreparable = required - len(errors) < stg_met.min_chunks_to_read
        if irreparable:
            errors.append(
                "Unavailable metachunk at position %s (%d/%d chunks)" %
                (pos, required - len(errors), stg_met.expected_chunks))
        # Since the "metachunk" is not an official item type,
        # this method does not report errors itself. Errors will
        # be reported as object errors.
        return errors

    def _check_obj_policy(self, target, obj_meta, chunks, recurse=0):
        """
        Check that the list of chunks of an object matches
        the object's storage policy.

        :returns: the list of errors encountered
        """
        stg_met = STORAGE_METHODS.load(obj_meta['chunk_method'])
        chunks_by_pos = _sort_chunks(chunks, stg_met.ec)
        tasks = list()
        for pos, chunks in chunks_by_pos.iteritems():
            tasks.append((pos,
                          self.pool.spawn(self._check_metachunk,
                                          target.copy(),
                                          stg_met,
                                          pos,
                                          chunks,
                                          recurse=recurse)))
        errors = list()
        for pos, task in tasks:
            try:
                errors.extend(task.wait())
            except Exception as err:
                errors.append("Check failed: pos %d: %s" % (pos, err))
        return errors

    def check_obj_versions(self, target, versions, recurse=0):
        """
        Run checks of all versions of the targeted object in parallel.
        """
        tasks = list()
        for ov in versions:
            tcopy = target.copy_object()
            tcopy.content_id = ov['id']
            tcopy.version = str(ov['version'])
            tasks.append((tcopy.version,
                          self.pool.spawn(self.check_obj,
                                          tcopy,
                                          recurse=recurse)))
        errors = list()
        for version, task in tasks:
            try:
                task.wait()
            except Exception as err:
                errors.append("Check failed: version %s: %s" % (version, err))
        if errors:
            # Send a result with the target without version to tell
            # we were not able to check all versions of the object.
            self.send_result(target, errors)

    def _load_obj_meta(self, target, errors):
        """
        Load object metadata and chunks.

        :param target: which object to check.
        :param errors: list of errors that will be appended
            in case any error occurs.
        :returns: a tuple with object metadata and a list of chunks.
        """
        try:
            return self.api.object_locate(target.account,
                                          target.container,
                                          target.obj,
                                          version=target.version,
                                          properties=False)
        except exc.NoSuchObject as err:
            self.object_not_found += 1
            errors.append('Not found: %s' % (err, ))
        except Exception as err:
            self.object_exceptions += 1
            errors.append('Check failed: %s' % (err, ))
        return None, []

    def check_obj(self, target, recurse=0):
        """
        Check one object version.
        If no version is specified, all versions of the object will be checked.
        :returns: the result of the check of the most recent version,
            or the one that is explicitly targeted.
        """
        account = target.account
        container = target.container
        obj = target.obj
        vers = target.version  # can be None

        if (account, container, obj, vers) in self.running:
            self.running[(account, container, obj, vers)].wait()
        if (account, container, obj, vers) in self.list_cache:
            return self.list_cache[(account, container, obj, vers)]
        self.running[(account, container, obj, vers)] = Event()
        self.logger.info('Checking object "%s"', target)
        container_listing, _ = self.check_container(target.copy_container())
        errors = list()
        if obj not in container_listing:
            errors.append('Missing from container listing')
            # checksum = None
        else:
            versions = container_listing[obj]
            if vers is None:
                if target.content_id is None:
                    # No version specified, check all versions
                    self.check_obj_versions(target.copy_object(),
                                            versions,
                                            recurse=recurse)
                    # Now return the cached result of the most recent version
                    target.content_id = versions[0]['id']
                    target.version = str(versions[0]['version'])
                    res = self.check_obj(target, recurse=0)
                    self.running[(account, container, obj, vers)].send(True)
                    del self.running[(account, container, obj, vers)]
                    return res
                else:
                    for ov in versions:
                        if ov['id'] == target.content_id:
                            vers = str(ov['version'])
                            target.version = vers
                            break
                    else:
                        errors.append('Missing from container listing')

            # TODO check checksum match
            # checksum = container_listing[obj]['hash']
            pass

        meta, chunks = self._load_obj_meta(target, errors)

        chunk_listing = {c['url']: c for c in chunks}
        if meta:
            self.list_cache[(account, container, obj, vers)] = \
                (chunk_listing, meta)
        self.objects_checked += 1
        self.running[(account, container, obj, vers)].send(True)
        del self.running[(account, container, obj, vers)]

        # Skip the check if we could not locate the object
        if meta:
            errors.extend(
                self._check_obj_policy(target, meta, chunks, recurse=recurse))

        self.send_result(target, errors)
        return chunk_listing, meta

    def check_container(self, target, recurse=0):
        account = target.account
        container = target.container

        if (account, container) in self.running:
            self.running[(account, container)].wait()
        if (account, container) in self.list_cache:
            return self.list_cache[(account, container)]
        self.running[(account, container)] = Event()
        self.logger.info('Checking container "%s"', target)
        account_listing = self.check_account(target.copy_account())
        errors = list()
        if container not in account_listing:
            errors.append('Missing from account listing')

        marker = None
        results = []
        ct_meta = dict()
        extra_args = dict()
        if self.limit_listings > 1 and target.obj:
            # When we are explicitly checking one object, start the listing
            # where this object is supposed to be. Do not use a limit,
            # but an end marker, in order to fetch all versions of the object.
            extra_args['prefix'] = target.obj
            extra_args['end_marker'] = target.obj + '\x00'  # HACK
        while True:
            try:
                resp = self.api.object_list(account,
                                            container,
                                            marker=marker,
                                            versions=True,
                                            **extra_args)
            except exc.NoSuchContainer as err:
                self.container_not_found += 1
                errors.append('Not found: %s' % (err, ))
                break
            except Exception as err:
                self.container_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break

            if resp.get('truncated', False):
                marker = resp['next_marker']

            if resp['objects']:
                # safeguard, probably useless
                if not marker:
                    marker = resp['objects'][-1]['name']
                results.extend(resp['objects'])
                if self.limit_listings > 1:
                    break
            else:
                ct_meta = resp
                ct_meta.pop('objects')
                break

        container_listing = dict()
        # Save all object versions, with the most recent first
        for obj in results:
            container_listing.setdefault(obj['name'], list()).append(obj)
        for versions in container_listing.values():
            versions.sort(key=lambda o: o['version'], reverse=True)

        if self.limit_listings <= 1:
            # We just listed the whole container, keep the result in a cache
            self.containers_checked += 1
            self.list_cache[(account, container)] = container_listing, ct_meta
        self.running[(account, container)].send(True)
        del self.running[(account, container)]

        if recurse > 0:
            for obj_vers in container_listing.values():
                for obj in obj_vers:
                    tcopy = target.copy_object()
                    tcopy.obj = obj['name']
                    tcopy.content_id = obj['id']
                    tcopy.version = str(obj['version'])
                    self.pool.spawn_n(self.check_obj, tcopy, recurse - 1)
        self.send_result(target, errors)
        return container_listing, ct_meta

    def check_account(self, target, recurse=0):
        account = target.account

        if account in self.running:
            self.running[account].wait()
        if account in self.list_cache:
            return self.list_cache[account]
        self.running[account] = Event()
        self.logger.info('Checking account "%s"', target)
        errors = list()
        marker = None
        results = []
        extra_args = dict()
        if self.limit_listings > 0 and target.container:
            # When we are explicitly checking one container, start the listing
            # where this container is supposed to be, and list only one
            # container.
            extra_args['prefix'] = target.container
            extra_args['limit'] = 1
        while True:
            try:
                resp = self.api.container_list(account,
                                               marker=marker,
                                               **extra_args)
            except Exception as err:
                self.account_exceptions += 1
                errors.append('Check failed: %s' % (err, ))
                break
            if resp:
                marker = resp[-1][0]
                results.extend(resp)
                if self.limit_listings > 0:
                    break
            else:
                break

        containers = dict()
        for container in results:
            # Name, number of objects, number of bytes
            containers[container[0]] = (container[1], container[2])

        if self.limit_listings <= 0:
            # We just listed the whole account, keep the result in a cache
            self.accounts_checked += 1
            self.list_cache[account] = containers
        self.running[account].send(True)
        del self.running[account]

        if recurse > 0:
            for container in containers:
                tcopy = target.copy_account()
                tcopy.container = container
                self.pool.spawn_n(self.check_container, tcopy, recurse - 1)

        self.send_result(target, errors)
        return containers

    def check(self, target, recurse=0):
        if target.type == 'chunk':
            self.pool.spawn_n(self.check_chunk, target)
        elif target.type == 'object':
            self.pool.spawn_n(self.check_obj, target, recurse)
        elif target.type == 'container':
            self.pool.spawn_n(self.check_container, target, recurse)
        else:
            self.pool.spawn_n(self.check_account, target, recurse)

    def fetch_results(self):
        while not self.result_queue.empty():
            res = self.result_queue.get(True)
            yield res

    def log_result(self, result):
        if result.errors:
            if result.target.type == 'chunk':
                # FIXME(FVE): check error criticity
                # and set the irreparable flag.
                self.write_chunk_error(result.target)
            else:
                self.write_error(result.target)
            self.logger.warn('%s:\n%s', result.target,
                             result.errors_to_str(err_format='  %s'))

    def run(self):
        """
        Fetch results and write logs until all jobs have finished.

        :returns: a generator yielding check results.
        """
        while self.pool.running() + self.pool.waiting():
            for result in self.fetch_results():
                self.log_result(result)
                yield result
            sleep(0.1)
        self.pool.waitall()
        for result in self.fetch_results():
            self.log_result(result)
            yield result

    def report(self):
        success = True

        def _report_stat(name, stat):
            print("{0:18}: {1}".format(name, stat))

        print()
        print('Report')
        _report_stat("Accounts checked", self.accounts_checked)
        if self.account_not_found:
            success = False
            _report_stat("Missing accounts", self.account_not_found)
        if self.account_exceptions:
            success = False
            _report_stat("Exceptions", self.account_exceptions)
        print()
        _report_stat("Containers checked", self.containers_checked)
        if self.container_not_found:
            success = False
            _report_stat("Missing containers", self.container_not_found)
        if self.container_exceptions:
            success = False
            _report_stat("Exceptions", self.container_exceptions)
        print()
        _report_stat("Objects checked", self.objects_checked)
        if self.object_not_found:
            success = False
            _report_stat("Missing objects", self.object_not_found)
        if self.object_exceptions:
            success = False
            _report_stat("Exceptions", self.object_exceptions)
        print()
        _report_stat("Chunks checked", self.chunks_checked)
        if self.chunk_not_found:
            success = False
            _report_stat("Missing chunks", self.chunk_not_found)
        if self.chunk_exceptions:
            success = False
            _report_stat("Exceptions", self.chunk_exceptions)
        return success
Exemplo n.º 10
0
class TestContentVersioning(BaseTestCase):
    def setUp(self):
        super(TestContentVersioning, self).setUp()
        self.api = ObjectStorageApi(self.conf['namespace'])
        self.container = random_str(8)
        system = {'sys.m2.policy.version': '3'}
        self.wait_for_score(('meta2', ))
        self.api.container_create(self.account, self.container, system=system)

    def test_versioning_enabled(self):
        props = self.api.container_get_properties(self.account, self.container)
        self.assertEqual('3', props['system']['sys.m2.policy.version'])

    def test_list_versions(self):
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content0")
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content1")
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(2, len(objects))
        self.assertNotEqual(objects[0]['version'], objects[1]['version'])

    def test_container_purge(self):
        # many contents
        for i in range(0, 4):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned",
                                   data="content")
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(4, len(objects))
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the container configuration
        self.api.container_purge(self.account, self.container)
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(3, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the request
        self.api.container_purge(self.account, self.container, maxvers=1)
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(1, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])

    def test_content_purge(self):
        # many contents
        for i in range(0, 4):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned",
                                   data="content")
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(4, len(objects))
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the container configuration
        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(3, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])
        oldest_version = min(objects, key=lambda x: x['version'])

        # use the maxvers of the request
        self.api.container.content_purge(self.account,
                                         self.container,
                                         "versioned",
                                         maxvers=1)
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(1, len(objects))
        self.assertNotIn(oldest_version, [x['version'] for x in objects])

        # other contents
        for i in range(0, 4):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned2",
                                   data="content" + str(i))
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(5, len(objects))

        # use the maxvers of the container configuration
        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        listing = self.api.object_list(self.account,
                                       self.container,
                                       versions=True)
        objects = listing['objects']
        self.assertEqual(5, len(objects))

    def test_delete_exceeding_version(self):
        def check_num_objects_and_get_oldest_version(expected_objects,
                                                     expected_deleted_aliases,
                                                     oldest_version):
            listing = self.api.object_list(self.account,
                                           self.container,
                                           versions=True)
            objects = listing['objects']
            nb_objects = 0
            nb_deleted = 0
            new_oldest_version = 0
            for obj in objects:
                if obj['deleted']:
                    nb_deleted += 1
                else:
                    nb_objects += 1
                    if new_oldest_version == 0 \
                            or new_oldest_version > obj['version']:
                        new_oldest_version = obj['version']
            self.assertEqual(expected_objects, nb_objects)
            self.assertEqual(expected_deleted_aliases, nb_deleted)
            if oldest_version is not None:
                self.assertLess(oldest_version, new_oldest_version)
            return new_oldest_version

        system = {'sys.m2.policy.version.delete_exceeding': '1'}
        self.api.container_set_properties(self.account,
                                          self.container,
                                          system=system)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content0")
        oldest_version = check_num_objects_and_get_oldest_version(1, 0, None)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content1")
        self.assertEqual(oldest_version,
                         check_num_objects_and_get_oldest_version(2, 0, None))
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content2")
        self.assertEqual(oldest_version,
                         check_num_objects_and_get_oldest_version(3, 0, None))

        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content3")
        oldest_version = check_num_objects_and_get_oldest_version(
            3, 0, oldest_version)

        self.api.object_delete(self.account, self.container, "versioned")
        self.assertEqual(oldest_version,
                         check_num_objects_and_get_oldest_version(3, 1, None))
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content4")
        oldest_version = check_num_objects_and_get_oldest_version(
            3, 1, oldest_version)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content5")
        oldest_version = check_num_objects_and_get_oldest_version(
            3, 1, oldest_version)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content6")
        # FIXME(adu) The deleted alias should be deleted at the same time
        oldest_version = check_num_objects_and_get_oldest_version(
            3, 1, oldest_version)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content7")
        oldest_version = check_num_objects_and_get_oldest_version(
            3, 1, oldest_version)

    def test_change_flag_delete_exceeding_versions(self):
        def check_num_objects(expected):
            listing = self.api.object_list(self.account,
                                           self.container,
                                           versions=True)
            objects = listing['objects']
            self.assertEqual(expected, len(objects))

        for i in range(5):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned",
                                   data="content" + str(i))
        check_num_objects(5)

        system = {'sys.m2.policy.version.delete_exceeding': '1'}
        self.api.container_set_properties(self.account,
                                          self.container,
                                          system=system)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content5")
        check_num_objects(3)
        for i in range(6, 10):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned",
                                   data="content" + str(i))
        check_num_objects(3)

        system['sys.m2.policy.version.delete_exceeding'] = '0'
        self.api.container_set_properties(self.account,
                                          self.container,
                                          system=system)
        self.api.object_create(self.account,
                               self.container,
                               obj_name="versioned",
                               data="content11")
        check_num_objects(4)

    def test_purge_objects_with_delete_marker(self):
        def check_num_objects(expected):
            listing = self.api.object_list(self.account,
                                           self.container,
                                           versions=True)
            objects = listing['objects']
            self.assertEqual(expected, len(objects))

        for i in range(5):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name="versioned",
                                   data="content" + str(i))
        check_num_objects(5)

        self.api.object_delete(self.account, self.container, "versioned")
        self.assertRaises(NoSuchObject, self.api.object_locate, self.account,
                          self.container, "versioned")
        check_num_objects(6)

        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        self.assertRaises(NoSuchObject, self.api.object_locate, self.account,
                          self.container, "versioned")
        check_num_objects(4)

        system = {'sys.m2.keep_deleted_delay': '1'}
        self.api.container_set_properties(self.account,
                                          self.container,
                                          system=system)
        time.sleep(2)

        self.api.container.content_purge(self.account, self.container,
                                         "versioned")
        check_num_objects(0)

    def test_list_objects(self):
        resp = self.api.object_list(self.account, self.container)
        self.assertEqual(0, len(list(resp['objects'])))
        self.assertFalse(resp.get('truncated'))

        def _check_objects(expected_objects, objects):
            self.assertEqual(len(expected_objects), len(objects))
            for i in range(len(expected_objects)):
                self.assertEqual(expected_objects[i]['name'],
                                 objects[i]['name'])
                self.assertEqual(int(expected_objects[i]['version']),
                                 int(objects[i]['version']))
                self.assertEqual(true_value(expected_objects[i]['deleted']),
                                 true_value(objects[i]['deleted']))

        all_versions = dict()

        def _create_object(obj_name, all_versions):
            self.api.object_create(self.account,
                                   self.container,
                                   obj_name=obj_name,
                                   data="test")
            versions = all_versions.get(obj_name, list())
            versions.append(
                self.api.object_show(self.account, self.container, obj_name))
            all_versions[obj_name] = versions

        def _delete_object(obj_name, all_versions):
            self.api.object_delete(self.account, self.container, obj_name)
            versions = all_versions.get(obj_name, list())
            versions.append(
                self.api.object_show(self.account, self.container, obj_name))
            all_versions[obj_name] = versions

        def _get_current_objects(all_versions):
            current_objects = list()
            obj_names = sorted(all_versions.keys())
            for obj_name in obj_names:
                obj = all_versions[obj_name][-1]
                if not true_value(obj['deleted']):
                    current_objects.append(obj)
            return current_objects

        def _get_object_versions(all_versions):
            object_versions = list()
            obj_names = sorted(all_versions.keys())
            for obj_name in obj_names:
                versions = all_versions[obj_name]
                versions.reverse()
                object_versions += versions
                versions.reverse()
            return object_versions

        # 0 object
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        # 3 objects with 1 version
        for i in range(3):
            _create_object("versioned" + str(i), all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects[:2], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects[:1], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects[1:2], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        # 3 objects with 2 versions
        for i in range(3):
            _create_object("versioned" + str(i), all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects[:2], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects[:1], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects[1:2], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[2:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[2:5], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned2', resp['next_marker'])

        # 3 objects with 2 versions and 1 object with delete marker
        _delete_object("versioned1", all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects[:1], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[2:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[2:5], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        # 3 objects with 2 versions and 2 objects with delete marker
        _delete_object("versioned0", all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[3:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[3:6], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        # 3 objects with 2 versions and 3 objects with delete marker
        _delete_object("versioned2", all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[3:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[3:6], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])

        # 3 objects with 2 versions and 3 objects with delete marker
        # (1 current version and 2 non current versions)
        _create_object("versioned0", all_versions)
        expected_current_objects = _get_current_objects(all_versions)
        expected_object_versions = _get_object_versions(all_versions)

        resp = self.api.object_list(self.account, self.container)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=3)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=2)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account, self.container, limit=1)
        _check_objects(expected_current_objects, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True)
        _check_objects(expected_object_versions, list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[:3], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned0', resp['next_marker'])

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0')
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    limit=1)
        _check_objects(expected_current_objects[1:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True)
        _check_objects(expected_object_versions[4:], list(resp['objects']))
        self.assertFalse(resp.get('truncated'))

        resp = self.api.object_list(self.account,
                                    self.container,
                                    marker='versioned0',
                                    versions=True,
                                    limit=3)
        _check_objects(expected_object_versions[4:7], list(resp['objects']))
        self.assertTrue(resp.get('truncated'))
        self.assertEqual('versioned1', resp['next_marker'])