示例#1
0
class ObjectStorageApi(object):
    """
    The Object Storage API.

    High level API that wraps `AccountClient`, `ContainerClient` and
    `DirectoryClient` classes.

    Every method that takes a `kwargs` argument accepts the at least
    the following keywords:

        - `headers`: `dict` of extra headers to pass to the proxy
        - `connection_timeout`: `float`
        - `read_timeout`: `float`
        - `write_timeout`: `float`
    """
    TIMEOUT_KEYS = ('connection_timeout', 'read_timeout', 'write_timeout')

    def __init__(self, namespace, logger=None, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        :keyword pool_manager: a pooled connection manager that will be used
            for all HTTP based APIs (except rawx)
        :type pool_manager: `urllib3.PoolManager`
        """
        self.namespace = namespace
        conf = {"namespace": self.namespace}
        self.logger = logger or get_logger(conf)
        self.timeouts = {tok: float_value(tov, None)
                         for tok, tov in kwargs.items()
                         if tok in self.__class__.TIMEOUT_KEYS}

        from oio.account.client import AccountClient
        from oio.container.client import ContainerClient
        from oio.directory.client import DirectoryClient
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.container = ContainerClient(conf, logger=self.logger, **kwargs)

        # In AccountClient, "endpoint" is the account service, not the proxy
        acct_kwargs = kwargs.copy()
        acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None)
        self.account = AccountClient(conf, logger=self.logger, **acct_kwargs)

    def _patch_timeouts(self, kwargs):
        """
        Insert timeout settings from this class's constructor into `kwargs`,
        if they are not already there.
        """
        for tok, tov in self.timeouts.items():
            if tok not in kwargs:
                kwargs[tok] = tov

    def account_create(self, account, **kwargs):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        return self.account.account_create(account, **kwargs)

    @handle_account_not_found
    def account_delete(self, account, **kwargs):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account.account_delete(account, **kwargs)

    @handle_account_not_found
    def account_show(self, account, **kwargs):
        """
        Get information about an account.
        """
        return self.account.account_show(account, **kwargs)

    def account_list(self, **kwargs):
        """
        List known accounts.

        Notice that account creation is asynchronous, and an autocreated
        account may appear in the listing only after several seconds.
        """
        return self.account.account_list(**kwargs)

    @handle_account_not_found
    def account_update(self, account, metadata, to_delete=None, **kwargs):
        warnings.warn("You'd better use account_set_properties()",
                      DeprecationWarning, stacklevel=2)
        self.account.account_update(account, metadata, to_delete, **kwargs)

    @handle_account_not_found
    def account_set_properties(self, account, properties, **kwargs):
        self.account.account_update(account, properties, None, **kwargs)

    @handle_account_not_found
    def account_del_properties(self, account, properties, **kwargs):
        self.account.account_update(account, None, properties, **kwargs)

    def container_create(self, account, container, properties=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        return self.container.container_create(account, container,
                                               properties=properties,
                                               **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def container_touch(self, account, container, **kwargs):
        """
        Trigger a notification about the container state.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_touch(account, container, **kwargs)

    def container_create_many(self, account, containers, properties=None,
                              **kwargs):
        """
        Create Many containers

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: `list`
        :param properties: properties to set on the containers
        :type properties: `dict`
        """
        return self.container.container_create_many(account,
                                                    containers,
                                                    properties=properties,
                                                    **kwargs)

    @handle_container_not_found
    def container_delete(self, account, container, **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_delete(account, container, **kwargs)

    @handle_account_not_found
    def container_list(self, account, limit=None, marker=None,
                       end_marker=None, prefix=None, delimiter=None,
                       **kwargs):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        :return: the list of containers of an account
        :rtype: `list` of items (`list`) with 4 fields:
            name, number of objects, number of bytes, and 1 if the item
            is a prefix or 0 if the item is actually a container
        """
        resp = self.account.container_list(account, limit=limit,
                                           marker=marker,
                                           end_marker=end_marker,
                                           prefix=prefix,
                                           delimiter=delimiter,
                                           **kwargs)
        return resp["listing"]

    @handle_container_not_found
    def container_show(self, account, container, **kwargs):
        """
        Get information about a container (user properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :returns: a `dict` with "properties" containing a `dict`
            of user properties.
        """
        return self.container.container_show(account, container, **kwargs)

    @handle_container_not_found
    def container_snapshot(self, account, container, dst_account,
                           dst_container, batch=100, **kwargs):
        """
        Create a copy of the container (only the content of the database)

        :param account: account in which the target is
        :type account: `str`
        :param container: name of the target
        :type container: `str`
        :param dst_account: account in which the snapshot will be.
        :type dst_account: `str`
        :param dst_container: name of the snapshot
        :type dst_container: `str`
        """
        try:
            self.container.container_freeze(account, container)
            self.container.container_snapshot(
                account, container, dst_account, dst_container)
            resp = self.object_list(dst_account, dst_container)
            obj_gen = resp['objects']
            target_beans = []
            copy_beans = []
            for obj in obj_gen:
                data = self.object_locate(
                    account, container, obj["name"])
                chunks = [chunk['url'] for chunk in data[1]]
                copies = self._generate_copy(chunks)
                fullpath = self._generate_fullpath(
                    dst_account, dst_container, obj['name'], obj['version'])
                self._send_copy(chunks, copies, fullpath[0])
                t_beans, c_beans = self._prepare_update_meta2(
                    data[1], copies, dst_account, dst_container,
                    obj['content'])
                target_beans.extend(t_beans)
                copy_beans.extend(c_beans)
                if len(target_beans) > batch:
                    self.container.container_raw_update(
                        target_beans, copy_beans,
                        dst_account, dst_container,
                        frozen=True)
                    target_beans = []
                    copy_beans = []
            if target_beans:
                self.container.container_raw_update(
                    target_beans, copy_beans,
                    dst_account, dst_container,
                    frozen=True)
        finally:
            self.container.container_enable(account, container)

    @handle_container_not_found
    def container_get_properties(self, account, container, properties=None,
                                 **kwargs):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: *ignored*
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        return self.container.container_get_properties(account, container,
                                                       properties=properties,
                                                       **kwargs)

    @handle_container_not_found
    def container_set_properties(self, account, container, properties=None,
                                 clear=False, **kwargs):
        """
        Set properties on a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container where to set properties
        :type container: `str`
        :param properties: a dictionary of properties
        :type properties: `dict`
        :param clear:
        :type clear: `bool`
        :keyword system: dictionary of system properties to set
        """
        return self.container.container_set_properties(
            account, container, properties,
            clear=clear, **kwargs)

    @handle_container_not_found
    def container_del_properties(self, account, container, properties,
                                 **kwargs):
        """
        Delete properties of a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container to deal with
        :type container: `str`
        :param properties: a list of property keys
        :type properties: `list`
        """
        return self.container.container_del_properties(
            account, container, properties, **kwargs)

    def container_update(self, account, container, metadata, clear=False,
                         **kwargs):
        warnings.warn("You'd better use container_set_properties()",
                      DeprecationWarning)
        if not metadata:
            self.container_del_properties(
                account, container, [], **kwargs)
        else:
            self.container_set_properties(
                account, container, metadata, clear, **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def object_create(self, account, container, file_or_path=None, data=None,
                      etag=None, obj_name=None, mime_type=None,
                      metadata=None, policy=None, key_file=None,
                      append=False, properties=None, **kwargs):
        """
        Create an object or append data to object in *container* of *account*
        with data taken from either *data* (`str` or `generator`) or
        *file_or_path* (path to a file or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :keyword key_file:
        :param append: if set, data will be append to existing object (or
        object will be created if unset)
        :type append: `bool`

        :returns: `list` of chunks, size and hash of the what has been uploaded
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName(
                "No name for the object has been specified"
            )

        sysmeta = {'mime_type': mime_type,
                   'etag': etag}
        if metadata:
            warnings.warn(
                "You'd better use 'properties' instead of 'metadata'",
                DeprecationWarning, stacklevel=4)
            if not properties:
                properties = metadata
            else:
                properties.update(metadata)

        if src is data:
            return self._object_create(
                account, container, obj_name, BytesIO(data), sysmeta,
                properties=properties, policy=policy,
                key_file=key_file, append=append, **kwargs)
        elif hasattr(file_or_path, "read"):
            return self._object_create(
                account, container, obj_name, src, sysmeta,
                properties=properties, policy=policy, key_file=key_file,
                append=append, **kwargs)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(
                    account, container, obj_name, f, sysmeta,
                    properties=properties, policy=policy,
                    key_file=key_file, append=append, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_touch(self, account, container, obj,
                     version=None, **kwargs):
        """
        Trigger a notification about an object
        (as if it just had been created).

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param obj: name of the object to touch
        """
        self.container.content_touch(account, container, obj,
                                     version=version, **kwargs)

    def object_drain(self, account, container, obj,
                     version=None, **kwargs):
        """
        Remove all the chunks of a content, but keep all the metadata.

        :param account: name of the account where the object is present
        :type account: `str`
        :param container: name of the container where the object is present
        :type container: `str`
        :param obj: name of the object to drain
        """
        self.container.content_drain(account, container, obj,
                                     version=version, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_delete(self, account, container, obj,
                      version=None, **kwargs):
        """
        Delete an object from a container. If versioning is enabled and no
        version is specified, the object will be marked as deleted but not
        actually deleted.

        :param account: name of the account the object belongs to
        :type account: `str`
        :param container: name of the container the object belongs to
        :type container: `str`
        :param obj: name of the object to delete
        :param version: version of the object to delete
        :returns: True on success
        """
        return self.container.content_delete(account, container, obj,
                                             version=version, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_delete_many(self, account, container, objs, **kwargs):
        return self.container.content_delete_many(
            account, container, objs, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_truncate(self, account, container, obj,
                        version=None, size=None, **kwargs):
        """
        Truncate object at specified size. Only shrink is supported.
        A download may occur if size is not on chunk boundaries.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :param size: new size of object
        """

        # code copied from object_fetch (should be factorized !)
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)

        for pos in sorted(chunks.keys()):
            chunk = chunks[pos][0]
            if (size >= chunk['offset']
                    and size <= chunk['offset'] + chunk['size']):
                break
        else:
            raise exc.OioException("No chunk found at position %d" % size)

        if chunk['offset'] != size:
            # retrieve partial chunk
            ret = self.object_fetch(account, container, obj,
                                    version=version,
                                    ranges=[(chunk['offset'], size-1)])
            # TODO implement a proper object_update
            pos = int(chunk['pos'].split('.')[0])
            self.object_create(account, container, obj_name=obj,
                               data=ret[1], meta_pos=pos,
                               content_id=meta['id'])

        return self.container.content_truncate(account, container, obj,
                                               version=version, size=size,
                                               **kwargs)

    @handle_container_not_found
    def object_list(self, account, container, limit=None, marker=None,
                    delimiter=None, prefix=None, end_marker=None,
                    properties=False, versions=False, deleted=False,
                    **kwargs):
        """
        Lists objects inside a container.

        :param properties: if True, list object properties along with objects
        :param versions: if True, list all versions of objects
        :param deleted: if True, list also the deleted objects

        :returns: a dict which contains
           * 'objects': the list of objects
           * 'prefixes': common prefixes (only if delimiter and prefix are set)
           * 'properties': a dict of container properties
           * 'system': a dict of system metadata
        """
        _, resp_body = self.container.content_list(
            account, container, limit=limit, marker=marker,
            end_marker=end_marker, prefix=prefix, delimiter=delimiter,
            properties=properties, versions=versions, deleted=deleted,
            **kwargs)

        for obj in resp_body['objects']:
            mtype = obj.get('mime-type')
            if mtype is not None:
                obj['mime_type'] = mtype
                del obj['mime-type']
            version = obj.get('ver')
            if version is not None:
                obj['version'] = version
                del obj['ver']

        return resp_body

    @handle_object_not_found
    def object_locate(self, account, container, obj,
                      version=None, **kwargs):
        """
        Get a description of the object along with the list of its chunks.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :returns: a tuple with object metadata `dict` as first element
            and chunk `list` as second element
        """
        obj_meta, chunks = self.container.content_locate(
            account, container, obj, version=version, **kwargs)
        return obj_meta, chunks

    def object_analyze(self, *args, **kwargs):
        """
        :deprecated: use `object_locate`
        """
        warnings.warn("You'd better use object_locate()",
                      DeprecationWarning)
        return self.object_locate(*args, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_fetch(self, account, container, obj, version=None, ranges=None,
                     key_file=None, **kwargs):
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)
        meta['container_id'] = cid_from_name(account, container).upper()
        meta['ns'] = self.namespace
        self._patch_timeouts(kwargs)
        if storage_method.ec:
            stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs)
        elif storage_method.backblaze:
            stream = self._fetch_stream_backblaze(meta, chunks, ranges,
                                                  storage_method, key_file,
                                                  **kwargs)
        else:
            stream = fetch_stream(chunks, ranges, storage_method, **kwargs)
        return meta, stream

    @handle_object_not_found
    def object_get_properties(self, account, container, obj, **kwargs):
        return self.container.content_get_properties(account, container, obj,
                                                     **kwargs)

    @handle_object_not_found
    def object_show(self, account, container, obj, version=None, **kwargs):
        """
        Get a description of the content along with its user properties.


        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :returns: a `dict` describing the object

        .. python::

            {'hash': '6BF60C17CC15EEA108024903B481738F',
             'ctime': '1481031763',
             'deleted': 'False',
             'properties': {
                 u'projet': u'OpenIO-SDS'},
             'length': '43518',
             'hash_method': 'md5',
             'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
             'version': '1481031762951972',
             'policy': 'EC',
             'id': '20BF2194FD420500CD4729AE0B5CBC07',
             'mime_type': 'application/octet-stream',
             'name': 'Makefile'}
        """
        return self.container.content_show(account, container, obj,
                                           version=version,
                                           **kwargs)

    def object_update(self, account, container, obj, metadata,
                      version=None, clear=False, **kwargs):
        warnings.warn("You'd better use object_set_properties()",
                      DeprecationWarning, stacklevel=2)
        if clear:
            self.object_del_properties(
                account, container, obj, [], version=version, **kwargs)
        if metadata:
            self.object_set_properties(
                account, container, obj, metadata, version=version, **kwargs)

    @handle_object_not_found
    def object_set_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_set_properties(
            account, container, obj, properties={'properties': properties},
            version=version, **kwargs)

    @handle_object_not_found
    def object_del_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_del_properties(
            account, container, obj, properties=properties,
            version=version, **kwargs)

    def _content_preparer(self, account, container, obj_name,
                          policy=None, **kwargs):
        # TODO: optimize by asking more than one metachunk at a time
        obj_meta, first_body = self.container.content_prepare(
            account, container, obj_name, size=1, stgpol=policy,
            autocreate=True, **kwargs)
        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])

        def _fix_mc_pos(chunks, mc_pos):
            for chunk in chunks:
                raw_pos = chunk["pos"].split(".")
                if storage_method.ec:
                    chunk['num'] = int(raw_pos[1])
                    chunk["pos"] = "%d.%d" % (mc_pos, chunk['num'])
                else:
                    chunk["pos"] = str(mc_pos)

        def _metachunk_preparer():
            mc_pos = kwargs.get('meta_pos', 0)
            _fix_mc_pos(first_body, mc_pos)
            yield first_body
            while True:
                mc_pos += 1
                _, next_body = self.container.content_prepare(
                        account, container, obj_name, 1, stgpol=policy,
                        autocreate=True, **kwargs)
                _fix_mc_pos(next_body, mc_pos)
                yield next_body

        return obj_meta, _metachunk_preparer

    def _generate_fullpath(self, account, container_name, path, version):
        return ['{0}/{1}/{2}/{3}'.format(quote_plus(account),
                                         quote_plus(container_name),
                                         quote_plus(path),
                                         version)]

    def _object_create(self, account, container, obj_name, source,
                       sysmeta, properties=None, policy=None,
                       key_file=None, **kwargs):
        self._patch_timeouts(kwargs)
        obj_meta, chunk_prep = self._content_preparer(
            account, container, obj_name,
            policy=policy, **kwargs)
        obj_meta.update(sysmeta)
        obj_meta['content_path'] = obj_name
        obj_meta['container_id'] = cid_from_name(account, container).upper()
        obj_meta['ns'] = self.namespace
        obj_meta['full_path'] = self._generate_fullpath(account, container,
                                                        obj_name,
                                                        obj_meta['version'])
        obj_meta['oio_version'] = (obj_meta.get('oio_version')
                                   or OIO_VERSION)

        # XXX content_id is necessary to update an existing object
        kwargs['content_id'] = kwargs.get('content_id', obj_meta['id'])

        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])
        if storage_method.ec:
            handler = ECWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)
        elif storage_method.backblaze:
            backblaze_info = self._b2_credentials(storage_method, key_file)
            handler = BackblazeWriteHandler(
                source, obj_meta, chunk_prep, storage_method,
                backblaze_info, **kwargs)
        else:
            handler = ReplicatedWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)

        final_chunks, bytes_transferred, content_checksum = handler.stream()

        etag = obj_meta.get('etag')
        if etag and etag.lower() != content_checksum.lower():
            raise exc.EtagMismatch(
                "given etag %s != computed %s" % (etag, content_checksum))
        obj_meta['etag'] = content_checksum

        data = {'chunks': final_chunks, 'properties': properties or {}}
        # FIXME: we may just pass **obj_meta
        self.container.content_create(
            account, container, obj_name, size=bytes_transferred,
            checksum=content_checksum, data=data,
            stgpol=obj_meta['policy'],
            version=obj_meta['version'], mime_type=obj_meta['mime_type'],
            chunk_method=obj_meta['chunk_method'],
            **kwargs)
        return final_chunks, bytes_transferred, content_checksum

    def _b2_credentials(self, storage_method, key_file):
        key_file = key_file or '/etc/oio/sds/b2-appkey.conf'
        try:
            return BackblazeUtils.get_credentials(storage_method, key_file)
        except BackblazeUtilsException as err:
            raise exc.ConfigurationException(str(err))

    def _fetch_stream_backblaze(self, meta, chunks, ranges,
                                storage_method, key_file,
                                **kwargs):
        backblaze_info = self._b2_credentials(storage_method, key_file)
        total_bytes = 0
        current_offset = 0
        size = None
        offset = 0
        for pos in range(len(chunks)):
            if ranges:
                offset = ranges[pos][0]
                size = ranges[pos][1]

            if size is None:
                size = int(meta["length"])
            chunk_size = int(chunks[pos][0]["size"])
            if total_bytes >= size:
                break
            if current_offset + chunk_size > offset:
                if current_offset < offset:
                    _offset = offset - current_offset
                else:
                    _offset = 0
                if chunk_size + total_bytes > size:
                    _size = size - total_bytes
                else:
                    _size = chunk_size
            handler = BackblazeChunkDownloadHandler(
                meta, chunks[pos], _offset, _size,
                backblaze_info=backblaze_info)
            stream = handler.get_stream()
            if not stream:
                raise exc.OioException("Error while downloading")
            total_bytes += len(stream)
            yield stream
            current_offset += chunk_size

    @handle_container_not_found
    def container_refresh(self, account, container, attempts=3, **kwargs):
        for i in range(attempts):
            try:
                self.account.container_reset(account, container, time.time())
            except exc.Conflict:
                if i >= attempts - 1:
                    raise
        try:
            self.container.container_touch(account, container)
        except exc.ClientException as e:
            if e.status != 406 and e.status != 431:
                raise
            # CODE_USER_NOTFOUND or CODE_CONTAINER_NOTFOUND
            metadata = dict()
            metadata["dtime"] = time.time()
            self.account.container_update(account, container, metadata)

    @handle_account_not_found
    def account_refresh(self, account, **kwargs):
        self.account.account_refresh(account)

        containers = self.container_list(account)
        for container in containers:
            try:
                self.container_refresh(account, container[0])
            except exc.NoSuchContainer:
                # container remove in the meantime
                pass

        while containers:
            marker = containers[-1][0]
            containers = self.container_list(account, marker=marker)
            if containers:
                for container in containers:
                    try:
                        self.container_refresh(account, container[0])
                    except exc.NoSuchContainer:
                        # container remove in the meantime
                        pass

    def all_accounts_refresh(self, **kwargs):
        accounts = self.account_list()
        for account in accounts:
            try:
                self.account_refresh(account)
            except exc.NoSuchAccount:  # account remove in the meantime
                pass

    @handle_account_not_found
    def account_flush(self, account):
        self.account.account_flush(account)

    def _random_buffer(self, dictionary, n):
        return ''.join(random.choice(dictionary) for _ in range(n))

    def _generate_copy(self, chunks, random_hex=60):
        # random_hex is the number of hexadecimals characters to generate for
        # the copy path
        copies = []
        for c in chunks:
            tmp = ''.join([c[:-random_hex],
                           self._random_buffer('0123456789ABCDEF',
                                               random_hex)])
            copies.append(tmp)
        return copies

    def _send_copy(self, targets, copies, fullpath):
        headers = {"x-oio-chunk-meta-full-path": fullpath}
        if not hasattr(self, "blob_client"):
            from oio.blob.client import BlobClient
            self.blob_client = BlobClient()
        for t, c in zip(targets, copies):
            self.blob_client.chunk_link(t, c, headers=headers).status

    def _prepare_update_meta2(self, targets, copies, account, container,
                              content):
        targets_beans = []
        copies_beans = []
        for t, c in zip(targets, copies):
            targets_beans.append(self._meta2bean(t['url'], t, content))
            copies_beans.append(self._meta2bean(c, t, content))
        return targets_beans, copies_beans

    def _meta2bean(self, url, meta, content):
        return {"type": "chunk",
                "id": url,
                "hash": meta['hash'],
                "size": int(meta["size"]),
                "pos": meta["pos"],
                "content": content}
示例#2
0
class BlobConverter(object):
    def __init__(self, conf, logger=None, **kwargs):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise ConfigurationException('No volume specified for converter')
        self.volume = volume
        self.namespace, self.volume_id = check_volume(self.volume)
        # cache
        self.name_by_cid = CacheDict()
        self.content_id_by_name = CacheDict()
        # client
        self.container_client = ContainerClient(conf, **kwargs)
        self.content_factory = ContentFactory(conf,
                                              self.container_client,
                                              logger=self.logger)
        # stats/logs
        self.errors = 0
        self.passes = 0
        self.total_chunks_processed = 0
        self.start_time = 0
        self.last_reported = 0
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        # backup
        self.no_backup = true_value(conf.get('no_backup', False))
        self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
        self.backup_name = 'backup_%s_%f' \
            % (self.volume_id, time.time())
        # dry run
        self.dry_run = true_value(conf.get('dry_run', False))

    def save_xattr(self, chunk_id, xattr):
        if self.no_backup:
            return
        dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3]
        try:
            os.makedirs(dirname)
        except OSError:
            if not os.path.isdir(dirname):
                raise
        with open(dirname + '/' + chunk_id, 'w') as backup_fd:
            # same format as getfattr
            backup_fd.write('# file: ' + self._get_path(chunk_id) + '\n')
            for k, v in xattr.iteritems():
                backup_fd.write('user.' + k + '="' + v + '"\n')

    def _save_container(self, cid, account, container):
        cid = cid.upper()
        self.name_by_cid[cid] = (account, container)
        return cid, account, container

    def _save_content(self, cid, path, version, content_id):
        cid = cid.upper()
        content_id = content_id.upper()
        self.content_id_by_name[(cid, path, version)] = content_id
        return cid, path, version, content_id

    def _get_path(self, chunk_id):
        return self.volume + '/' + chunk_id[:3] + '/' + chunk_id

    def cid_from_name(self, account, container):
        cid = cid_from_name(account, container)
        cid, account, container = self._save_container(cid, account, container)
        return cid

    def name_from_cid(self, cid):
        name = self.name_by_cid.get(cid)
        if name:
            return name

        properties = self.container_client.container_get_properties(cid=cid)
        account = properties['system']['sys.account']
        container = properties['system']['sys.user.name']
        cid, account, container = self._save_container(cid, account, container)
        return account, container

    def content_id_from_name(self, cid, path, version, search=False):
        content_id = self.content_id_by_name.get((cid, path, version))
        if content_id or not search:
            return content_id

        properties = self.container_client.content_get_properties(
            cid=cid, path=path, version=version)
        content_id = properties['id']
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return content_id

    def decode_fullpath(self, fullpath):
        account, container, path, version, content_id = decode_fullpath(
            fullpath)
        cid = self.cid_from_name(account, container)
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return account, container, cid, path, version, content_id

    def decode_old_fullpath(self, old_fullpath):
        account, container, path, version = decode_old_fullpath(old_fullpath)
        cid = self.cid_from_name(account, container)
        content_id = self.content_id_from_name(cid, path, version)
        return account, container, cid, path, version, content_id

    def encode_fullpath(self, chunk_inode, chunk_id, account, container, path,
                        version, content_id):
        # check if chunk exists and has the same inode
        if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID:
            raise ValueError('chunk ID must be hexadecimal (%s)' %
                             STRLEN_CHUNKID)
        try:
            chunk_inode2 = os.stat(self._get_path(chunk_id)).st_ino
        except OSError:
            raise OrphanChunk('No such chunk: possible orphan chunk')
        if chunk_inode2 != chunk_inode:
            raise OrphanChunk('Not the same inode: possible orphan chunk')

        # check fullpath and chunk ID
        if isinstance(version, basestring):
            try:
                version = int(version)
            except ValueError:
                raise ValueError('version must be a number')
        if version <= 0:
            raise ValueError('version must be positive')
        if not is_hexa(content_id):
            raise ValueError('content ID must be hexadecimal')

        fullpath = encode_fullpath(account, container, path, version,
                                   content_id.upper())

        return chunk_id.upper(), fullpath

    def _get_chunk_id_and_fullpath(self,
                                   chunk_inode,
                                   chunk_pos,
                                   content,
                                   chunk_id=None):
        content.container_id, content.account, content.container_name = \
            self._save_container(content.container_id, content.account,
                                 content.container_name)
        content.container_id, content.path, content.version, \
            content.content_id = self._save_content(
                content.container_id, content.path, content.version,
                content.content_id)

        chunks = content.chunks.filter(host=self.volume_id)
        if chunk_id:
            chunks = chunks.filter(id=chunk_id)
        chunk = chunks.filter(pos=chunk_pos).one()
        if chunk is None:
            raise OrphanChunk('Chunk not found in content:'
                              'possible orphan chunk')

        chunk_id, new_fullpath = self.encode_fullpath(
            chunk_inode, chunk.id, content.account, content.container_name,
            content.path, content.version, content.content_id)
        return chunk_id, new_fullpath

    def get_chunk_id_and_fullpath(self,
                                  chunk_inode,
                                  chunk_pos,
                                  container_id,
                                  path,
                                  version,
                                  chunk_id=None,
                                  account=None,
                                  container=None,
                                  content_id=None):
        if account is None or container is None:
            account, container = self.name_from_cid(container_id)

        if content_id:
            try:
                content = self.content_factory.get(container_id,
                                                   content_id,
                                                   account=account,
                                                   container_name=container)
                return self._get_chunk_id_and_fullpath(chunk_inode,
                                                       chunk_pos,
                                                       content,
                                                       chunk_id=chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s',
                    chunk_id, chunk_pos, str(account), str(container),
                    container_id, path, str(version), str(content_id), exc)

        # version must be integer
        try:
            version = str(int(version))
        except Exception:
            version = None

        try:
            content = self.content_factory.get_by_path_and_version(
                container_id,
                path,
                version,
                account=account,
                container_name=container)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')
        return self._get_chunk_id_and_fullpath(chunk_inode,
                                               chunk_pos,
                                               content,
                                               chunk_id=chunk_id)

    def convert_chunk(self, fd, chunk_id):
        meta, raw_meta = read_chunk_metadata(fd,
                                             chunk_id,
                                             check_chunk_id=False)

        links = meta.get('links', dict())
        for chunk_id2, fullpath2 in links.iteritems():
            self.decode_fullpath(fullpath2)

        fullpath = meta.get('full_path')
        if fullpath is not None:
            self.decode_fullpath(fullpath)
            if meta.get('oio_version') == OIO_VERSION:
                return True, meta

        chunk_inode = os.fstat(fd.fileno()).st_ino
        raw_chunk_id = None
        chunk_id = chunk_id.upper()
        chunk_pos = meta['chunk_pos']
        container_id = meta['container_id'].upper()
        path = meta['content_path']
        version = meta['content_version']
        content_id = meta['content_id'].upper()

        new_fullpaths = dict()
        xattr_to_remove = list()
        success = True

        for k, v in raw_meta.iteritems():
            # fetch raw chunk ID
            if k == XATTR_CHUNK_ID:
                raw_chunk_id = v.upper()

            # search old fullpaths
            if not k.startswith(XATTR_OLD_FULLPATH) \
                    or not is_hexa(k[4:], size=64):
                continue

            try:
                account2, container2, container_id2, path2, version2, \
                    content_id2 = self.decode_old_fullpath(v)

                if container_id == container_id2 and path == path2 \
                        and version == version2:
                    if content_id2 is None:
                        content_id2 = self.content_id_from_name(container_id2,
                                                                path2,
                                                                version2,
                                                                search=True)

                    chunk_id, new_fullpath = self.encode_fullpath(
                        chunk_inode, chunk_id, account2, container2, path2,
                        version2, content_id2)
                    new_fullpaths[chunk_id] = new_fullpath
                else:
                    chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath(
                        chunk_inode,
                        chunk_pos,
                        container_id2,
                        path2,
                        version2,
                        account=account2,
                        container=container2,
                        content_id=content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath

                xattr_to_remove.append(k)
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id,
                                 k, exc)

        # old xattr
        if raw_chunk_id is not None:
            try:
                if raw_chunk_id != chunk_id and raw_chunk_id not in links:
                    if raw_chunk_id not in new_fullpaths:
                        meta2, _ = read_chunk_metadata(fd, raw_chunk_id)
                        container_id2 = meta2['container_id'].upper()
                        path2 = meta2['content_path']
                        version2 = meta2['content_version']
                        content_id2 = meta2['content_id'].upper()

                        raw_chunk_id, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                chunk_inode, chunk_pos, container_id2, path2,
                                version2, chunk_id=raw_chunk_id,
                                content_id=content_id2)
                        new_fullpaths[raw_chunk_id] = new_fullpath
                elif raw_chunk_id == chunk_id and fullpath is None:
                    if raw_chunk_id not in new_fullpaths:
                        raw_chunk_id, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                chunk_inode, chunk_pos, container_id, path,
                                version, chunk_id=raw_chunk_id,
                                content_id=content_id)
                        new_fullpaths[raw_chunk_id] = new_fullpath
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id,
                                 exc)

        self.save_xattr(chunk_id, raw_meta)

        if self.dry_run:
            self.logger.info(
                "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s "
                "xattr_to_remove=%s", chunk_id, str(success),
                str(new_fullpaths), str(xattr_to_remove))
        else:
            # for security, if there is an error, we don't delete old xattr
            modify_xattr(fd, new_fullpaths, success, xattr_to_remove)
        return success, None

    def safe_convert_chunk(self, path, fd=None, chunk_id=None):
        if chunk_id is None:
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('Not a chunk %s' % path)
                return
            for c in chunk_id:
                if c not in hexdigits:
                    self.logger.warn('Not a chunk %s' % path)
                    return

        success = False
        self.total_chunks_processed += 1
        try:
            if fd is None:
                with open(path) as fd:
                    success, _ = self.convert_chunk(fd, chunk_id)
            else:
                success, _ = self.convert_chunk(fd, chunk_id)
        except Exception:
            self.logger.exception('ERROR while conversion %s', path)

        if not success:
            self.errors += 1
        else:
            self.logger.debug('Converted %s', path)
        self.passes += 1

    def _fetch_chunks_from_file(self, input_file):
        with open(input_file, 'r') as ifile:
            for line in ifile:
                chunk_id = line.strip()
                if chunk_id and not chunk_id.startswith('#'):
                    yield self._get_path(chunk_id)

    def paths_gen(self, input_file=None):
        if input_file:
            return self._fetch_chunks_from_file(input_file)
        else:
            return paths_gen(self.volume)

    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           float(self.total_chunks_processed))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
示例#3
0
class BlobConverter(object):
    def __init__(self, conf, logger=None, **kwargs):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise ConfigurationException('No volume specified for converter')
        self.volume = volume
        self.namespace, self.volume_id = check_volume(self.volume)
        # cache
        self.name_by_cid = CacheDict()
        self.content_id_by_name = CacheDict()
        # client
        self.container_client = ContainerClient(conf, **kwargs)
        self.content_factory = ContentFactory(conf,
                                              self.container_client,
                                              logger=self.logger)
        self._rdir = None  # we may never need it
        # stats/logs
        self.errors = 0
        self.passes = 0
        self.total_chunks_processed = 0
        self.start_time = 0
        self.last_reported = 0
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        # backup
        self.no_backup = true_value(conf.get('no_backup', False))
        self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
        self.backup_name = 'backup_%s_%f' \
            % (self.volume_id, time.time())
        # dry run
        self.dry_run = true_value(conf.get('dry_run', False))

    @property
    def rdir(self):
        """Get an instance of `RdirClient`."""
        if self._rdir is None:
            self._rdir = RdirClient(
                self.conf, pool_manager=self.container_client.pool_manager)
        return self._rdir

    def save_xattr(self, fd, chunk_id, xattr):
        if self.no_backup:
            return
        dirname = self.backup_dir + '/' + self.backup_name + '/' + chunk_id[:3]
        try:
            os.makedirs(dirname)
        except OSError:
            if not os.path.isdir(dirname):
                raise
        with open(dirname + '/' + chunk_id, 'w') as backup_fd:
            # same format as getfattr
            backup_fd.write('# file: ' + self._get_path(fd, chunk_id) + '\n')
            for k, v in xattr.items():
                backup_fd.write('user.' + k + '="' + v + '"\n')

    def _save_container(self, cid, account, container):
        cid = cid.upper()
        self.name_by_cid[cid] = (account, container)
        return cid, account, container

    def _save_content(self, cid, path, version, content_id):
        cid = cid.upper()
        content_id = content_id.upper()
        self.content_id_by_name[(cid, path, version)] = content_id
        return cid, path, version, content_id

    def _get_path(self, fd, chunk_id):
        chunk_path = self.volume
        chunk_path_split = fd.name[len(self.volume):].split('/')
        start = 0
        for chunk_part in chunk_path_split[:-1]:
            end = start + len(chunk_part)
            chunk_path += '/' + chunk_id[start:end]
            start = end
        chunk_path += '/' + chunk_path_split[-1]
        return chunk_path

    def cid_from_name(self, account, container):
        cid = cid_from_name(account, container)
        cid, account, container = self._save_container(cid, account, container)
        return cid

    def name_from_cid(self, cid):
        name = self.name_by_cid.get(cid)
        if name:
            return name

        properties = self.container_client.container_get_properties(cid=cid)
        account = properties['system']['sys.account']
        container = properties['system']['sys.user.name']
        cid, account, container = self._save_container(cid, account, container)
        return account, container

    def content_id_from_name(self, cid, path, version, search=False):
        content_id = self.content_id_by_name.get((cid, path, version))
        if content_id or not search:
            return content_id

        properties = self.container_client.content_get_properties(
            cid=cid, path=path, version=version)
        content_id = properties['id']
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return content_id

    def decode_fullpath(self, fullpath):
        # pylint: disable=unbalanced-tuple-unpacking
        account, container, path, version, content_id = decode_fullpath(
            fullpath)
        cid = self.cid_from_name(account, container)
        cid, path, version, content_id = self._save_content(
            cid, path, version, content_id)
        return account, container, cid, path, version, content_id

    def decode_old_fullpath(self, old_fullpath):
        # pylint: disable=unbalanced-tuple-unpacking
        try:
            account, container, path, version = decode_old_fullpath(
                old_fullpath)
            cid = self.cid_from_name(account, container)
            content_id = self.content_id_from_name(cid, path, version)
        except ValueError:
            # We never know, let's try to decode the fullpath as if it was new
            account, container, path, version, content_id = decode_fullpath(
                old_fullpath)
            cid = self.cid_from_name(account, container)
        return account, container, cid, path, version, content_id

    def encode_fullpath(self, fd, chunk_id, account, container, path, version,
                        content_id):
        # check if chunk exists and has the same inode
        if not is_hexa(chunk_id) or len(chunk_id) != STRLEN_CHUNKID:
            raise ValueError('chunk ID must be hexadecimal (%s)' %
                             STRLEN_CHUNKID)
        try:
            chunk_inode = os.fstat(fd.fileno()).st_ino
            chunk_inode2 = os.stat(self._get_path(fd, chunk_id)).st_ino
            if chunk_inode2 != chunk_inode:
                raise OrphanChunk('Not the same inode: possible orphan chunk')
        except OSError:
            raise OrphanChunk('No such chunk: possible orphan chunk')

        # check fullpath and chunk ID
        if isinstance(version, string_types):
            try:
                version = int(version)
            except ValueError:
                raise ValueError('version must be a number')
        if version <= 0:
            raise ValueError('version must be positive')
        if not is_hexa(content_id):
            raise ValueError('content ID must be hexadecimal')

        fullpath = encode_fullpath(account, container, path, version,
                                   content_id.upper())

        return chunk_id.upper(), fullpath

    def _get_chunk_id_and_fullpath(self,
                                   fd,
                                   chunk_pos,
                                   content,
                                   chunk_id=None):
        content.container_id, content.account, content.container_name = \
            self._save_container(content.container_id, content.account,
                                 content.container_name)
        content.container_id, content.path, content.version, \
            content.content_id = self._save_content(
                content.container_id, content.path, content.version,
                content.content_id)

        chunks = content.chunks.filter(host=self.volume_id)
        if chunk_id:
            chunks = chunks.filter(id=chunk_id)
        chunk = chunks.filter(pos=chunk_pos).one()
        if chunk is None:
            raise OrphanChunk('Chunk not found in content:'
                              'possible orphan chunk')

        chunk_id, new_fullpath = self.encode_fullpath(
            fd, chunk.id, content.account, content.container_name,
            content.path, content.version, content.content_id)
        return chunk_id, new_fullpath

    def get_chunk_id_and_fullpath(self,
                                  fd,
                                  chunk_pos,
                                  container_id,
                                  path,
                                  version,
                                  chunk_id=None,
                                  account=None,
                                  container=None,
                                  content_id=None):
        if account is None or container is None:
            account, container = self.name_from_cid(container_id)

        if content_id:
            try:
                content = self.content_factory.get(container_id,
                                                   content_id,
                                                   account=account,
                                                   container_name=container)
                return self._get_chunk_id_and_fullpath(fd,
                                                       chunk_pos,
                                                       content,
                                                       chunk_id=chunk_id)
            except Exception as exc:
                self.logger.warn(
                    'chunk_id=%s chunk_pos=%s object=%s/%s/%s/%s/%s/%s: %s',
                    chunk_id, chunk_pos, str(account), str(container),
                    container_id, path, str(version), str(content_id), exc)

        # version must be integer
        try:
            version = str(int(version))
        except Exception:
            version = None

        try:
            content = self.content_factory.get_by_path_and_version(
                container_id,
                path,
                version,
                account=account,
                container_name=container)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')
        return self._get_chunk_id_and_fullpath(fd,
                                               chunk_pos,
                                               content,
                                               chunk_id=chunk_id)

    def convert_chunk(self, fd, chunk_id):
        meta, raw_meta = read_chunk_metadata(fd, chunk_id, for_conversion=True)

        links = meta.get('links', dict())
        for chunk_id2, fullpath2 in links.items():
            self.decode_fullpath(fullpath2)

        fullpath = meta.get('full_path')
        if fullpath is not None:
            self.decode_fullpath(fullpath)
            if meta.get('oio_version') == OIO_VERSION:
                return True, meta

        raw_chunk_id = None
        chunk_id = chunk_id.upper()
        chunk_pos = meta['chunk_pos']
        container_id = meta['container_id'].upper()
        path = meta['content_path']
        version = meta['content_version']
        content_id = meta['content_id'].upper()

        new_fullpaths = dict()
        xattr_to_remove = list()
        success = True

        for k, v in raw_meta.items():
            # fetch raw chunk ID
            if k == XATTR_CHUNK_ID:
                raw_chunk_id = v.upper()

            # search old fullpaths
            if not k.startswith(XATTR_OLD_FULLPATH) \
                    or not is_hexa(k[4:], size=64):
                continue

            try:
                account2, container2, container_id2, path2, version2, \
                    content_id2 = self.decode_old_fullpath(v)

                if meta['chunk_id'] == chunk_id \
                        and container_id == container_id2 \
                        and path == path2 \
                        and version == version2:
                    if content_id2 is None:
                        content_id2 = self.content_id_from_name(container_id2,
                                                                path2,
                                                                version2,
                                                                search=True)

                    chunk_id2, new_fullpath = self.encode_fullpath(
                        fd, chunk_id, account2, container2, path2, version2,
                        content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath
                else:
                    chunk_id2, new_fullpath = self.get_chunk_id_and_fullpath(
                        fd,
                        chunk_pos,
                        container_id2,
                        path2,
                        version2,
                        account=account2,
                        container=container2,
                        content_id=content_id2)
                    new_fullpaths[chunk_id2] = new_fullpath

                xattr_to_remove.append(k)
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s old_fullpath=%s: %s', chunk_id,
                                 k, exc)

        # old xattr
        if raw_chunk_id is not None:
            try:
                if raw_chunk_id != chunk_id and raw_chunk_id not in links:
                    if raw_chunk_id not in new_fullpaths:
                        meta2, _ = read_chunk_metadata(fd, raw_chunk_id)
                        container_id2 = meta2['container_id'].upper()
                        path2 = meta2['content_path']
                        version2 = meta2['content_version']
                        content_id2 = meta2['content_id'].upper()

                        raw_chunk_id2, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                fd, chunk_pos, container_id2, path2,
                                version2, chunk_id=raw_chunk_id,
                                content_id=content_id2)
                        new_fullpaths[raw_chunk_id2] = new_fullpath
                elif raw_chunk_id == chunk_id and fullpath is None:
                    if raw_chunk_id not in new_fullpaths:
                        raw_chunk_id2, new_fullpath = \
                            self.get_chunk_id_and_fullpath(
                                fd, chunk_pos, container_id, path,
                                version, chunk_id=raw_chunk_id,
                                content_id=content_id)
                        new_fullpaths[raw_chunk_id2] = new_fullpath
            except Exception as exc:
                success = False
                self.logger.warn('chunk_id=%s (old xattr): %s', raw_chunk_id,
                                 exc)

        self.save_xattr(fd, chunk_id, raw_meta)

        if self.dry_run:
            self.logger.info(
                "[dryrun] Converting chunk %s: success=%s new_fullpaths=%s "
                "xattr_to_remove=%s", chunk_id, str(success),
                str(new_fullpaths), str(xattr_to_remove))
        else:
            # for security, if there is an error, we don't delete old xattr
            set_fullpath_xattr(fd, new_fullpaths, success, xattr_to_remove)
        return success, None

    def is_fullpath_error(self, err):
        if (isinstance(err, MissingAttribute) and
            (err.attribute.startswith(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX)
             or err.attribute == CHUNK_XATTR_KEYS['content_path']
             or err.attribute.startswith(XATTR_OLD_FULLPATH))):
            return True
        elif isinstance(err, FaultyChunk):
            return any(self.is_fullpath_error(x) for x in err.args)
        return False

    def safe_convert_chunk(self, path, chunk_id=None):
        if chunk_id is None:
            chunk_id = path.rsplit('/', 1)[-1]
            if len(chunk_id) != STRLEN_CHUNKID:
                self.logger.warn('Not a chunk %s' % path)
                return
            for char in chunk_id:
                if char not in hexdigits:
                    self.logger.warn('Not a chunk %s' % path)
                    return

        success = False
        self.total_chunks_processed += 1
        try:
            with open(path) as fd:
                success, _ = self.convert_chunk(fd, chunk_id)
        except (FaultyChunk, MissingAttribute) as err:
            if self.is_fullpath_error(err):
                self.logger.warn(
                    "Cannot convert %s: %s, will try to recover 'fullpath'",
                    path, err)
                try:
                    success = self.recover_chunk_fullpath(path, chunk_id)
                except Exception as err2:
                    self.logger.error('Could not recover fullpath: %s', err2)
            else:
                self.logger.exception('ERROR while converting %s', path)
        except Exception:
            self.logger.exception('ERROR while converting %s', path)

        if not success:
            self.errors += 1
        else:
            self.logger.debug('Converted %s', path)
        self.passes += 1

    def recover_chunk_fullpath(self, path, chunk_id=None):
        if not chunk_id:
            chunk_id = path.rsplit('/', 1)[-1]
        # 1. Fetch chunk list from rdir (could be cached).
        # Unfortunately we cannot seek for a chunk ID.
        entries = [
            x for x in self.rdir.chunk_fetch(self.volume_id, limit=-1)
            if x[2] == chunk_id
        ]
        if not entries:
            raise KeyError('Chunk %s not found in rdir' % chunk_id)
        elif len(entries) > 1:
            self.logger.info('Chunk %s appears in %d objects', chunk_id,
                             len(entries))
        # 2. Find content and container IDs
        cid, content_id = entries[0][0:2]
        # 3a. Call ContainerClient.content_locate()
        #    with the container ID and content ID
        try:
            meta, chunks = self.container_client.content_locate(
                cid=cid, content=content_id)
        except NotFound as err:
            raise OrphanChunk('Cannot check %s is valid: %s' % (path, err))
        # 3b. Resolve container ID into account and container names.
        # FIXME(FVE): get account and container names from meta1
        cmeta = self.container_client.container_get_properties(cid=cid)
        aname = cmeta['system']['sys.account']
        cname = cmeta['system']['sys.user.name']
        fullpath = encode_fullpath(aname, cname, meta['name'], meta['version'],
                                   content_id)
        # 4. Check if the chunk actually belongs to the object
        chunk_url = 'http://%s/%s' % (self.volume_id, chunk_id)
        if chunk_url not in [x['url'] for x in chunks]:
            raise OrphanChunk('Chunk %s not found in object %s' %
                              (chunk_url, fullpath))
        # 5. Regenerate the fullpath
        with open(path, 'w') as fd:
            set_fullpath_xattr(fd, {chunk_id: fullpath})
        return True

    def _fetch_chunks_from_file(self, input_file):
        with open(input_file, 'r') as ifile:
            for line in ifile:
                chunk_path = line.strip()
                if chunk_path and not chunk_path.startswith('#'):
                    yield self.volume + '/' + chunk_path

    def paths_gen(self, input_file=None):
        if input_file:
            return self._fetch_chunks_from_file(input_file)
        else:
            return paths_gen(self.volume)

    def converter_pass(self, input_file=None):
        def report(tag, now=None):
            if now is None:
                now = time.time()
            total_time = now - self.start_time
            self.logger.info(
                '%(tag)s  %(volume)s '
                'started=%(start_time)s '
                'passes=%(passes)d '
                'errors=%(errors)d '
                'chunks=%(nb_chunks)d %(c_rate).2f/s '
                'total_time=%(total_time).2f '
                '(converter: %(success_rate).2f%%)' % {
                    'tag':
                    tag,
                    'volume':
                    self.volume_id,
                    'start_time':
                    datetime.fromtimestamp(int(self.start_time)).isoformat(),
                    'passes':
                    self.passes,
                    'errors':
                    self.errors,
                    'nb_chunks':
                    self.total_chunks_processed,
                    'c_rate':
                    self.total_chunks_processed / total_time,
                    'total_time':
                    total_time,
                    'success_rate':
                    100 * ((self.total_chunks_processed - self.errors) /
                           (float(self.total_chunks_processed) or 1.0))
                })
            self.passes = 0
            self.last_reported = now

        self.start_time = time.time()
        self.errors = 0
        self.passes = 0

        self.backup_name = 'backup_%s_%f' % (self.volume_id, self.start_time)

        paths = self.paths_gen(input_file=input_file)
        for path in paths:
            self.safe_convert_chunk(path)

            now = time.time()
            if now - self.last_reported >= self.report_interval:
                report('RUN', now=now)

            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
        report('DONE')

        return self.errors == 0
示例#4
0
class ObjectStorageApi(object):
    """
    The Object Storage API.

    High level API that wraps `AccountClient`, `ContainerClient` and
    `DirectoryClient` classes.
    """
    def __init__(self, namespace, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        """
        self.namespace = namespace
        self.connection_timeout = utils.float_value(
            kwargs.get("connection_timeout"), None)
        self.read_timeout = utils.float_value(kwargs.get("read_timeout"), None)
        self.write_timeout = utils.float_value(kwargs.get("write_timeout"),
                                               None)

        # FIXME: share session between all the clients
        self.directory = DirectoryClient({"namespace": self.namespace},
                                         **kwargs)
        self.account = AccountClient({"namespace": self.namespace}, **kwargs)
        self.container = ContainerClient({"namespace": self.namespace},
                                         **kwargs)

    def account_create(self, account, headers=None):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        return self.account.account_create(account, headers=headers)

    @handle_account_not_found
    def account_delete(self, account, headers=None):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account.account_delete(account, headers=headers)

    @handle_account_not_found
    def account_show(self, account, headers=None):
        """
        Get information about an account.
        """
        return self.account.account_show(account, headers=headers)

    def account_list(self, headers=None):
        """
        List accounts
        """
        return self.account.account_list(headers=headers)

    # FIXME:
    @handle_account_not_found
    def account_update(self, account, metadata, to_delete=None, headers=None):
        self.account.account_update(account,
                                    metadata,
                                    to_delete,
                                    headers=headers)

    @handle_account_not_found
    def account_set_properties(self, account, properties, headers=None):
        self.account_update(account, properties, headers=headers)

    @handle_account_not_found
    def account_del_properties(self, account, properties, headers=None):
        self.account_update(account, None, properties, headers=headers)

    def container_create(self,
                         account,
                         container,
                         properties=None,
                         headers=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        return self.container.container_create(account,
                                               container,
                                               properties=properties,
                                               headers=headers,
                                               autocreate=True,
                                               **kwargs)

    @handle_container_not_found
    def container_touch(self, account, container, headers=None, **kwargs):
        """
        Trigger a notification about the container state.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        self.container.container_touch(account,
                                       container,
                                       headers=headers,
                                       **kwargs)

    def container_create_many(self,
                              account,
                              containers,
                              properties=None,
                              headers=None,
                              **kwargs):
        """
        Create Many containers

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: `list`
        :param properties: properties to set on the containers
        :type properties: `dict`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        return self.container.container_create_many(account,
                                                    containers,
                                                    properties=properties,
                                                    headers=headers,
                                                    autocreate=True,
                                                    **kwargs)

    @handle_container_not_found
    def container_delete(self, account, container, headers=None, **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        self.container.container_delete(account,
                                        container,
                                        headers=headers,
                                        **kwargs)

    @handle_account_not_found
    def container_list(self,
                       account,
                       limit=None,
                       marker=None,
                       end_marker=None,
                       prefix=None,
                       delimiter=None,
                       headers=None):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        """
        resp = self.account.container_list(account,
                                           limit=limit,
                                           marker=marker,
                                           end_marker=end_marker,
                                           prefix=prefix,
                                           delimiter=delimiter,
                                           headers=headers)
        return resp["listing"]

    @handle_container_not_found
    def container_show(self, account, container, headers=None):
        """
        Get information about a container (user properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" containing a `dict`
            of user properties.
        """
        return self.container.container_show(account,
                                             container,
                                             headers=headers)

    @handle_container_not_found
    def container_get_properties(self,
                                 account,
                                 container,
                                 properties=None,
                                 headers=None):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: *ignored*
        :keyword headers: extra headers to send to the proxy
        :type headers: `dict`
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        return self.container.container_get_properties(account,
                                                       container,
                                                       properties=properties,
                                                       headers=headers)

    @handle_container_not_found
    def container_set_properties(self,
                                 account,
                                 container,
                                 properties=None,
                                 clear=False,
                                 headers=None,
                                 **kwargs):
        """
        Set properties on a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container where to set properties
        :type container: `str`
        :param properties: a dictionary of properties
        :type properties: `dict`
        :param clear:
        :type clear: `bool`
        :param headers: extra headers to pass to the proxy
        :type headers: `dict`
        :keyword system: dictionary of system properties to set
        """
        return self.container.container_set_properties(account,
                                                       container,
                                                       properties,
                                                       clear=clear,
                                                       headers=headers,
                                                       **kwargs)

    @handle_container_not_found
    def container_del_properties(self,
                                 account,
                                 container,
                                 properties,
                                 headers=None,
                                 **kwargs):
        return self.container.container_del_properties(account,
                                                       container,
                                                       properties,
                                                       headers=headers,
                                                       **kwargs)

    def container_update(self,
                         account,
                         container,
                         metadata,
                         clear=False,
                         headers=None):
        if not metadata:
            self.container_del_properties(account,
                                          container, [],
                                          headers=headers)
        else:
            self.container_set_properties(account,
                                          container,
                                          metadata,
                                          clear,
                                          headers=headers)

    @handle_container_not_found
    def object_create(self,
                      account,
                      container,
                      file_or_path=None,
                      data=None,
                      etag=None,
                      obj_name=None,
                      mime_type=None,
                      metadata=None,
                      policy=None,
                      headers=None,
                      key_file=None,
                      **_kwargs):
        """
        Create an object in *container* of *account* with data taken from
        either *data* (`str` or `generator`) or *file_or_path* (path to a file
        or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :param headers: extra headers to pass to the proxy
        :type headers: `dict`
        :keyword key_file:
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = utils.GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName("No name for the object has been specified")

        sysmeta = {'mime_type': mime_type, 'etag': etag}

        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()

        if src is data:
            return self._object_create(account,
                                       container,
                                       obj_name,
                                       BytesIO(data),
                                       sysmeta,
                                       properties=metadata,
                                       policy=policy,
                                       headers=headers,
                                       key_file=key_file)
        elif hasattr(file_or_path, "read"):
            return self._object_create(account,
                                       container,
                                       obj_name,
                                       src,
                                       sysmeta,
                                       properties=metadata,
                                       policy=policy,
                                       headers=headers,
                                       key_file=key_file)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(account,
                                           container,
                                           obj_name,
                                           f,
                                           sysmeta,
                                           properties=metadata,
                                           policy=policy,
                                           headers=headers,
                                           key_file=key_file)

    def object_touch(self, account, container, obj, headers=None, **kwargs):
        """
        Trigger a notification about an object
        (as if it just had been created).

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param obj: name of the object to touch
        :param headers: extra headers to pass to the proxy

        """
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        self.container.content_touch(account,
                                     container,
                                     obj,
                                     headers=headers,
                                     **kwargs)

    @handle_object_not_found
    def object_delete(self, account, container, obj, headers=None, **kwargs):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        return self.container.content_delete(account,
                                             container,
                                             obj,
                                             headers=headers,
                                             **kwargs)

    def object_delete_many(self,
                           account,
                           container,
                           objs,
                           headers=None,
                           **kwargs):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        return self.container.content_delete_many(account,
                                                  container,
                                                  objs,
                                                  headers=headers,
                                                  **kwargs)

    @handle_container_not_found
    def object_list(self,
                    account,
                    container,
                    limit=None,
                    marker=None,
                    delimiter=None,
                    prefix=None,
                    end_marker=None,
                    headers=None,
                    properties=False,
                    **kwargs):
        """
        Lists objects inside a container.

        :returns: a dict which contains
           * 'objects': the list of objects
           * 'prefixes': common prefixes (only if delimiter and prefix are set)
           * 'properties': a dict of container properties
           * 'system': system metadata
        """
        _, resp_body = self.container.content_list(account,
                                                   container,
                                                   limit=limit,
                                                   marker=marker,
                                                   end_marker=end_marker,
                                                   prefix=prefix,
                                                   delimiter=delimiter,
                                                   properties=properties,
                                                   headers=headers,
                                                   **kwargs)

        for obj in resp_body['objects']:
            mtype = obj.get('mime-type')
            if mtype:
                obj['mime_type'] = mtype
                del obj['mime-type']

        return resp_body

    # FIXME:
    @handle_object_not_found
    def object_locate(self, account, container, obj, headers=None):
        obj_meta, body = self.container.content_locate(account, container, obj)
        return obj_meta, body

    def object_analyze(self, *args, **kwargs):
        """
        :deprecated: use `object_locate`
        """
        return self.object_locate(*args, **kwargs)

    def object_fetch(self,
                     account,
                     container,
                     obj,
                     ranges=None,
                     headers=None,
                     key_file=None):
        if not headers:
            headers = dict()
        if 'X-oio-req-id' not in headers:
            headers['X-oio-req-id'] = utils.request_id()
        meta, raw_chunks = self.object_locate(account,
                                              container,
                                              obj,
                                              headers=headers)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)
        meta['container_id'] = utils.name2cid(account, container).upper()
        meta['ns'] = self.namespace
        if storage_method.ec:
            stream = self._fetch_stream_ec(meta, chunks, ranges,
                                           storage_method, headers)
        elif storage_method.backblaze:
            stream = self._fetch_stream_backblaze(meta, chunks, ranges,
                                                  storage_method, key_file)
        else:
            stream = self._fetch_stream(meta, chunks, ranges, storage_method,
                                        headers)
        return meta, stream

    @handle_object_not_found
    def object_get_properties(self, account, container, obj, headers=None):
        return self.container.content_get_properties(account, container, obj)

    def object_show(self, account, container, obj, headers=None):
        """
        Get a description of the content along with its user properties.


        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :returns: a `dict` describing the object

        .. python::

            {'hash': '6BF60C17CC15EEA108024903B481738F',
             'ctime': '1481031763',
             'deleted': 'False',
             'properties': {
                 u'projet': u'OpenIO-SDS'},
             'length': '43518',
             'hash_method': 'md5',
             'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
             'version': '1481031762951972',
             'policy': 'EC',
             'id': '20BF2194FD420500CD4729AE0B5CBC07',
             'mime_type': 'application/octet-stream',
             'name': 'Makefile'}
        """
        return self.container.content_show(account,
                                           container,
                                           obj,
                                           headers=headers)

    def object_update(self,
                      account,
                      container,
                      obj,
                      metadata,
                      clear=False,
                      headers=None):
        if clear:
            self.object_del_properties(account,
                                       container,
                                       obj, [],
                                       headers=headers)
        if metadata:
            self.object_set_properties(account,
                                       container,
                                       obj,
                                       metadata,
                                       headers=headers)

    @handle_object_not_found
    def object_set_properties(self,
                              account,
                              container,
                              obj,
                              properties,
                              clear=False,
                              headers=None,
                              **kwargs):
        return self.container.content_set_properties(
            account,
            container,
            obj,
            properties={'properties': properties},
            headers=headers,
            **kwargs)

    @handle_object_not_found
    def object_del_properties(self,
                              account,
                              container,
                              obj,
                              properties,
                              headers=None,
                              **kwargs):
        return self.container.content_del_properties(account,
                                                     container,
                                                     obj,
                                                     properties=properties,
                                                     headers=headers,
                                                     **kwargs)

    # FIXME: remove and call self.container.content_prepare() directly
    def _content_prepare(self,
                         account,
                         container,
                         obj_name,
                         size,
                         policy=None,
                         headers=None):
        return self.container.content_prepare(account,
                                              container,
                                              obj_name,
                                              size,
                                              stgpol=policy,
                                              autocreate=True,
                                              headers=headers)

    def _content_preparer(self,
                          account,
                          container,
                          obj_name,
                          policy=None,
                          headers=None):
        # TODO: optimize by asking more than one metachunk at a time
        obj_meta, first_body = self.container.content_prepare(account,
                                                              container,
                                                              obj_name,
                                                              size=1,
                                                              stgpol=policy,
                                                              autocreate=True,
                                                              headers=headers)
        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])

        def _fix_mc_pos(chunks, mc_pos):
            for chunk in chunks:
                raw_pos = chunk["pos"].split(".")
                if storage_method.ec:
                    chunk['num'] = int(raw_pos[1])
                    chunk["pos"] = "%d.%d" % (mc_pos, chunk['num'])
                else:
                    chunk["pos"] = str(mc_pos)

        def _metachunk_preparer():
            mc_pos = 0
            _fix_mc_pos(first_body, mc_pos)
            yield first_body
            while True:
                mc_pos += 1
                _, next_body = self._content_prepare(account, container,
                                                     obj_name, 1, policy,
                                                     headers)
                _fix_mc_pos(next_body, mc_pos)
                yield next_body

        return obj_meta, _metachunk_preparer

    def _object_create(self,
                       account,
                       container,
                       obj_name,
                       source,
                       sysmeta,
                       properties=None,
                       policy=None,
                       headers=None,
                       key_file=None):
        obj_meta, chunk_prep = self._content_preparer(account,
                                                      container,
                                                      obj_name,
                                                      policy=policy,
                                                      headers=headers)
        obj_meta.update(sysmeta)
        obj_meta['content_path'] = obj_name
        obj_meta['container_id'] = utils.name2cid(account, container).upper()
        obj_meta['ns'] = self.namespace

        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])
        if storage_method.ec:
            handler = ECWriteHandler(
                source,
                obj_meta,
                chunk_prep,
                storage_method,
                headers=headers,
                write_timeout=self.write_timeout,
                read_timeout=self.read_timeout,
                connection_timeout=self.connection_timeout)
        elif storage_method.backblaze:
            backblaze_info = self._b2_credentials(storage_method, key_file)
            handler = BackblazeWriteHandler(source, obj_meta, chunk_prep,
                                            storage_method, headers,
                                            backblaze_info)
        else:
            handler = ReplicatedWriteHandler(
                source,
                obj_meta,
                chunk_prep,
                storage_method,
                headers=headers,
                write_timeout=self.write_timeout,
                read_timeout=self.read_timeout,
                connection_timeout=self.connection_timeout)

        final_chunks, bytes_transferred, content_checksum = handler.stream()

        etag = obj_meta.get('etag')
        if etag and etag.lower() != content_checksum.lower():
            raise exc.EtagMismatch("given etag %s != computed %s" %
                                   (etag, content_checksum))
        obj_meta['etag'] = content_checksum

        data = {'chunks': final_chunks, 'properties': properties or {}}
        # FIXME: we may just pass **obj_meta
        self.container.content_create(account,
                                      container,
                                      obj_name,
                                      size=bytes_transferred,
                                      checksum=content_checksum,
                                      data=data,
                                      content_id=obj_meta['id'],
                                      stgpol=obj_meta['policy'],
                                      version=obj_meta['version'],
                                      mime_type=obj_meta['mime_type'],
                                      chunk_method=obj_meta['chunk_method'],
                                      headers=headers)
        return final_chunks, bytes_transferred, content_checksum

    def _fetch_stream(self, meta, chunks, ranges, storage_method, headers):
        total_bytes = 0
        headers = headers or {}
        ranges = ranges or [(None, None)]

        meta_range_list = get_meta_ranges(ranges, chunks)

        for meta_range_dict in meta_range_list:
            for pos, meta_range in meta_range_dict.iteritems():
                meta_start, meta_end = meta_range
                if meta_start is not None and meta_end is not None:
                    headers['Range'] = http_header_from_ranges([meta_range])
                reader = io.ChunkReader(
                    iter(chunks[pos]),
                    io.READ_CHUNK_SIZE,
                    headers,
                    connection_timeout=self.connection_timeout,
                    response_timeout=self.read_timeout,
                    read_timeout=self.read_timeout)
                try:
                    it = reader.get_iter()
                except Exception as err:
                    raise exc.OioException(
                        "Error while downloading position %d: %s" % (pos, err))
                for part in it:
                    for d in part['iter']:
                        total_bytes += len(d)
                        yield d

    def _fetch_stream_ec(self, meta, chunks, ranges, storage_method, headers):
        ranges = ranges or [(None, None)]

        meta_range_list = get_meta_ranges(ranges, chunks)

        for meta_range_dict in meta_range_list:
            for pos, meta_range in meta_range_dict.iteritems():
                meta_start, meta_end = meta_range
                handler = ECChunkDownloadHandler(
                    storage_method,
                    chunks[pos],
                    meta_start,
                    meta_end,
                    headers,
                    connection_timeout=self.connection_timeout,
                    response_timeout=self.read_timeout,
                    read_timeout=self.read_timeout)
                stream = handler.get_stream()
                for part_info in stream:
                    for d in part_info['iter']:
                        yield d
                stream.close()

    def _b2_credentials(self, storage_method, key_file):
        key_file = key_file or '/etc/oio/sds/b2-appkey.conf'
        try:
            return BackblazeUtils.get_credentials(storage_method, key_file)
        except BackblazeUtilsException as err:
            raise exc.ConfigurationException(str(err))

    def _fetch_stream_backblaze(self, meta, chunks, ranges, storage_method,
                                key_file):
        backblaze_info = self._b2_credentials(storage_method, key_file)
        total_bytes = 0
        current_offset = 0
        size = None
        offset = 0
        for pos in range(len(chunks)):
            if ranges:
                offset = ranges[pos][0]
                size = ranges[pos][1]

            if size is None:
                size = int(meta["length"])
            chunk_size = int(chunks[pos][0]["size"])
            if total_bytes >= size:
                break
            if current_offset + chunk_size > offset:
                if current_offset < offset:
                    _offset = offset - current_offset
                else:
                    _offset = 0
                if chunk_size + total_bytes > size:
                    _size = size - total_bytes
                else:
                    _size = chunk_size
            handler = BackblazeChunkDownloadHandler(
                meta,
                chunks[pos],
                _offset,
                _size,
                backblaze_info=backblaze_info)
            stream = handler.get_stream()
            if not stream:
                raise exc.OioException("Error while downloading")
            total_bytes += len(stream)
            yield stream
            current_offset += chunk_size
示例#5
0
class TestContentRebuildFilter(BaseTestCase):
    def setUp(self):
        super(TestContentRebuildFilter, self).setUp()
        self.namespace = self.conf['namespace']
        self.gridconf = {"namespace": self.namespace}
        self.container = "TestContentRebuildFilter%f" % time.time()
        self.ref = self.container
        self.container_client = ContainerClient(self.conf)
        self.container_client.container_create(self.account, self.container)
        syst = self.container_client.container_get_properties(
            self.account, self.container)['system']
        self.container_id = syst['sys.name'].split('.', 1)[0]
        self.object_storage_api = ObjectStorageApi(namespace=self.namespace)
        queue_addr = choice(self.conf['services']['beanstalkd'])['addr']
        self.queue_url = queue_addr
        self.conf['queue_url'] = 'beanstalk://' + self.queue_url
        self.conf['tube'] = DEFAULT_REBUILDER_TUBE
        self.notify_filter = NotifyFilter(app=_App, conf=self.conf)
        bt = Beanstalk.from_url(self.conf['queue_url'])
        bt.drain_tube(DEFAULT_REBUILDER_TUBE)
        bt.close()

    def _create_event(self, content_name, present_chunks, missing_chunks,
                      content_id):
        event = {}
        event["when"] = time.time()
        event["event"] = "storage.content.broken"
        event["data"] = {
            "present_chunks": present_chunks,
            "missing_chunks": missing_chunks
        }
        event["url"] = {
            "ns": self.namespace,
            "account": self.account,
            "user": self.container,
            "path": content_name,
            "id": self.container_id,
            "content": content_id
        }
        return event

    def _is_chunks_created(self, previous, after, pos_created):
        remain = list(after)
        for p in previous:
            for r in remain:
                if p["url"] == r["url"]:
                    remain.remove(r)
                    break
        if len(remain) != len(pos_created):
            return False
        for r in remain:
            if r["pos"] in pos_created:
                remain.remove(r)
            else:
                return False
        return True

    def _rebuild(self, event, job_id=0):
        self.blob_rebuilder = subprocess.Popen([
            'oio-blob-rebuilder', self.namespace,
            '--beanstalkd=' + self.queue_url
        ])
        time.sleep(3)
        self.blob_rebuilder.kill()

    def _remove_chunks(self, chunks, content_id):
        if not chunks:
            return
        for chunk in chunks:
            chunk['id'] = chunk['url']
            chunk['content'] = content_id
            chunk['type'] = 'chunk'
        self.container_client.container_raw_delete(self.account,
                                                   self.container,
                                                   data=chunks)

    def _check_rebuild(self,
                       content_name,
                       chunks,
                       missing_pos,
                       meta,
                       chunks_to_remove,
                       chunk_created=True):
        self._remove_chunks(chunks_to_remove, meta['id'])
        event = self._create_event(content_name, chunks, missing_pos,
                                   meta['id'])
        self.notify_filter.process(env=event, cb=None)
        self._rebuild(event)
        _, after = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        self.assertIs(chunk_created,
                      self._is_chunks_created(chunks, after, missing_pos))

    def test_nothing_missing(self):
        content_name = "test_nothing_missing"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)

        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = []
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=True)

    def test_missing_1_chunk(self):
        content_name = "test_missing_1_chunk"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_last_chunk(self):
        content_name = "test_missing_last_chunk"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_2_chunks(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 2):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "0"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_all_chunks(self):
        content_name = "test_missing_all_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="SINGLE",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_all_chunks_of_a_pos(self):
        content_name = "test_missing_2_chunks"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))

        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)

    def test_missing_multiple_chunks(self):
        content_name = "test_missing_multiple_chunks"
        data = random_str(1024 * 1024 * 4)
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data=data,
                                              policy="THREECOPIES",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(9))
        chunks_to_remove.append(chunks.pop(6))
        chunks_to_remove.append(chunks.pop(4))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0", "1", "2", "3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        for i in range(0, 3):
            chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m_chunk_ec_2(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(3))
        chunks_to_remove.append(chunks.pop(5))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.5", "0.8"]
        self._check_rebuild(content_name, chunks, missing_pos, meta,
                            chunks_to_remove)

    def test_missing_m1_chunk_ec(self):
        if len(self.conf['services']['rawx']) < 9:
            self.skipTest("Not enough rawx. "
                          "EC tests needs at least 9 rawx to run")
        content_name = "test_missing_m1_chunk_ec"
        self.object_storage_api.object_create(account=self.account,
                                              container=self.container,
                                              data="test",
                                              policy="EC",
                                              obj_name=content_name)
        meta, chunks = self.object_storage_api.object_locate(
            container=self.container, obj=content_name, account=self.account)
        chunks_to_remove = []
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        chunks_to_remove.append(chunks.pop(0))
        for chunk in chunks:
            chunk.pop('score', None)

        missing_pos = ["0.1", "0.2", "0.3", "0.4"]
        self._check_rebuild(content_name,
                            chunks,
                            missing_pos,
                            meta,
                            chunks_to_remove,
                            chunk_created=False)
示例#6
0
class ContentFactory(object):
    DEFAULT_DATASEC = "plain", {"nb_copy": "1", "distance": "0"}

    def __init__(self, conf, **kwargs):
        self.conf = conf
        self.logger = get_logger(conf)
        self.container_client = ContainerClient(conf,
                                                logger=self.logger,
                                                **kwargs)

    def get(self, container_id, content_id, account=None, container_name=None):
        try:
            meta, chunks = self.container_client.content_locate(
                cid=container_id, content=content_id)
        except NotFound:
            raise ContentNotFound("Content %s/%s not found" %
                                  (container_id, content_id))

        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        if not account or not container_name:
            container_info = self.container_client.container_get_properties(
                cid=container_id)['system']
            if not account:
                account = container_info['sys.account']
            if not container_name:
                container_name = container_info['sys.user.name']
        cls = ECContent if storage_method.ec else PlainContent
        return cls(self.conf,
                   container_id,
                   meta,
                   chunks,
                   storage_method,
                   account,
                   container_name,
                   container_client=self.container_client)

    def new(self,
            container_id,
            path,
            size,
            policy,
            account=None,
            container_name=None):
        meta, chunks = self.container_client.content_prepare(cid=container_id,
                                                             path=path,
                                                             size=size,
                                                             stgpol=policy)

        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        if not account or not container_name:
            container_info = self.container_client.container_get_properties(
                cid=container_id)['system']
            if not account:
                account = container_info['sys.account']
            if not container_name:
                container_name = container_info['sys.user.name']
        cls = ECContent if storage_method.ec else PlainContent
        return cls(self.conf, container_id, meta, chunks, storage_method,
                   account, container_name)

    def copy(self, origin, policy=None):
        if not policy:
            policy = origin.policy
        metadata = origin.metadata.copy()
        new_metadata, chunks = self.container_client.content_prepare(
            cid=origin.container_id,
            path=metadata['name'],
            size=metadata['length'],
            stgpol=policy)

        metadata['chunk_method'] = new_metadata['chunk_method']
        metadata['chunk_size'] = new_metadata['chunk_size']
        # We must use a new content_id since we change the data
        metadata['id'] = new_metadata['id']
        # We may want to keep the same version, but it is denied by meta2
        metadata['version'] = int(metadata['version']) + 1
        metadata['policy'] = new_metadata['policy']
        # FIXME: meta2 does not allow us to set ctime
        # and thus the object will appear as new.
        storage_method = STORAGE_METHODS.load(metadata['chunk_method'])

        cls = ECContent if storage_method.ec else PlainContent
        return cls(self.conf, origin.container_id, metadata, chunks,
                   storage_method, origin.account, origin.container_name)

    def change_policy(self, container_id, content_id, new_policy):
        old_content = self.get(container_id, content_id)
        if old_content.policy == new_policy:
            return old_content

        new_content = self.copy(old_content, policy=new_policy)

        stream = old_content.fetch()
        new_content.create(GeneratorIO(stream))
        # the old content is automatically deleted because the new content has
        # the same name (but not the same id)
        return new_content