Exemplo n.º 1
0
class ObjectStorageApi(object):
    """
    The Object Storage API.

    High level API that wraps `AccountClient`, `ContainerClient` and
    `DirectoryClient` classes.

    Every method that takes a `kwargs` argument accepts the at least
    the following keywords:

        - `headers`: `dict` of extra headers to pass to the proxy
        - `connection_timeout`: `float`
        - `read_timeout`: `float`
        - `write_timeout`: `float`
    """
    TIMEOUT_KEYS = ('connection_timeout', 'read_timeout', 'write_timeout')

    def __init__(self, namespace, logger=None, **kwargs):
        """
        Initialize the object storage API.

        :param namespace: name of the namespace to interract with
        :type namespace: `str`

        :keyword connection_timeout: connection timeout towards rawx services
        :type connection_timeout: `float` seconds
        :keyword read_timeout: timeout for rawx responses and data reads from
            the caller (when uploading)
        :type read_timeout: `float` seconds
        :keyword write_timeout: timeout for rawx write requests
        :type write_timeout: `float` seconds
        :keyword pool_manager: a pooled connection manager that will be used
            for all HTTP based APIs (except rawx)
        :type pool_manager: `urllib3.PoolManager`
        """
        self.namespace = namespace
        conf = {"namespace": self.namespace}
        self.logger = logger or get_logger(conf)
        self.timeouts = {tok: float_value(tov, None)
                         for tok, tov in kwargs.items()
                         if tok in self.__class__.TIMEOUT_KEYS}

        from oio.account.client import AccountClient
        from oio.container.client import ContainerClient
        from oio.directory.client import DirectoryClient
        self.directory = DirectoryClient(conf, logger=self.logger, **kwargs)
        self.container = ContainerClient(conf, logger=self.logger, **kwargs)

        # In AccountClient, "endpoint" is the account service, not the proxy
        acct_kwargs = kwargs.copy()
        acct_kwargs["proxy_endpoint"] = acct_kwargs.pop("endpoint", None)
        self.account = AccountClient(conf, logger=self.logger, **acct_kwargs)

    def _patch_timeouts(self, kwargs):
        """
        Insert timeout settings from this class's constructor into `kwargs`,
        if they are not already there.
        """
        for tok, tov in self.timeouts.items():
            if tok not in kwargs:
                kwargs[tok] = tov

    def account_create(self, account, **kwargs):
        """
        Create an account.

        :param account: name of the account to create
        :type account: `str`
        :returns: `True` if the account has been created
        """
        return self.account.account_create(account, **kwargs)

    @handle_account_not_found
    def account_delete(self, account, **kwargs):
        """
        Delete an account.

        :param account: name of the account to delete
        :type account: `str`
        """
        self.account.account_delete(account, **kwargs)

    @handle_account_not_found
    def account_show(self, account, **kwargs):
        """
        Get information about an account.
        """
        return self.account.account_show(account, **kwargs)

    def account_list(self, **kwargs):
        """
        List known accounts.

        Notice that account creation is asynchronous, and an autocreated
        account may appear in the listing only after several seconds.
        """
        return self.account.account_list(**kwargs)

    @handle_account_not_found
    def account_update(self, account, metadata, to_delete=None, **kwargs):
        warnings.warn("You'd better use account_set_properties()",
                      DeprecationWarning, stacklevel=2)
        self.account.account_update(account, metadata, to_delete, **kwargs)

    @handle_account_not_found
    def account_set_properties(self, account, properties, **kwargs):
        self.account.account_update(account, properties, None, **kwargs)

    @handle_account_not_found
    def account_del_properties(self, account, properties, **kwargs):
        self.account.account_update(account, None, properties, **kwargs)

    def container_create(self, account, container, properties=None,
                         **kwargs):
        """
        Create a container.

        :param account: account in which to create the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: properties to set on the container
        :type properties: `dict`
        :returns: True if the container has been created,
                  False if it already exists
        """
        return self.container.container_create(account, container,
                                               properties=properties,
                                               **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def container_touch(self, account, container, **kwargs):
        """
        Trigger a notification about the container state.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_touch(account, container, **kwargs)

    def container_create_many(self, account, containers, properties=None,
                              **kwargs):
        """
        Create Many containers

        :param account: account in which to create the containers
        :type account: `str`
        :param containers: names of the containers
        :type containers: `list`
        :param properties: properties to set on the containers
        :type properties: `dict`
        """
        return self.container.container_create_many(account,
                                                    containers,
                                                    properties=properties,
                                                    **kwargs)

    @handle_container_not_found
    def container_delete(self, account, container, **kwargs):
        """
        Delete a container.

        :param account: account from which to delete the container
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        """
        self.container.container_delete(account, container, **kwargs)

    @handle_account_not_found
    def container_list(self, account, limit=None, marker=None,
                       end_marker=None, prefix=None, delimiter=None,
                       **kwargs):
        """
        Get the list of containers of an account.

        :param account: account from which to get the container list
        :type account: `str`
        :keyword limit: maximum number of results to return
        :type limit: `int`
        :keyword marker: name of the container from where to start the listing
        :type marker: `str`
        :keyword end_marker:
        :keyword prefix:
        :keyword delimiter:
        :return: the list of containers of an account
        :rtype: `list` of items (`list`) with 4 fields:
            name, number of objects, number of bytes, and 1 if the item
            is a prefix or 0 if the item is actually a container
        """
        resp = self.account.container_list(account, limit=limit,
                                           marker=marker,
                                           end_marker=end_marker,
                                           prefix=prefix,
                                           delimiter=delimiter,
                                           **kwargs)
        return resp["listing"]

    @handle_container_not_found
    def container_show(self, account, container, **kwargs):
        """
        Get information about a container (user properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :returns: a `dict` with "properties" containing a `dict`
            of user properties.
        """
        return self.container.container_show(account, container, **kwargs)

    @handle_container_not_found
    def container_snapshot(self, account, container, dst_account,
                           dst_container, batch=100, **kwargs):
        """
        Create a copy of the container (only the content of the database)

        :param account: account in which the target is
        :type account: `str`
        :param container: name of the target
        :type container: `str`
        :param dst_account: account in which the snapshot will be.
        :type dst_account: `str`
        :param dst_container: name of the snapshot
        :type dst_container: `str`
        """
        try:
            self.container.container_freeze(account, container)
            self.container.container_snapshot(
                account, container, dst_account, dst_container)
            resp = self.object_list(dst_account, dst_container)
            obj_gen = resp['objects']
            target_beans = []
            copy_beans = []
            for obj in obj_gen:
                data = self.object_locate(
                    account, container, obj["name"])
                chunks = [chunk['url'] for chunk in data[1]]
                copies = self._generate_copy(chunks)
                fullpath = self._generate_fullpath(
                    dst_account, dst_container, obj['name'], obj['version'])
                self._send_copy(chunks, copies, fullpath[0])
                t_beans, c_beans = self._prepare_update_meta2(
                    data[1], copies, dst_account, dst_container,
                    obj['content'])
                target_beans.extend(t_beans)
                copy_beans.extend(c_beans)
                if len(target_beans) > batch:
                    self.container.container_raw_update(
                        target_beans, copy_beans,
                        dst_account, dst_container,
                        frozen=True)
                    target_beans = []
                    copy_beans = []
            if target_beans:
                self.container.container_raw_update(
                    target_beans, copy_beans,
                    dst_account, dst_container,
                    frozen=True)
        finally:
            self.container.container_enable(account, container)

    @handle_container_not_found
    def container_get_properties(self, account, container, properties=None,
                                 **kwargs):
        """
        Get information about a container (user and system properties).

        :param account: account in which the container is
        :type account: `str`
        :param container: name of the container
        :type container: `str`
        :param properties: *ignored*
        :returns: a `dict` with "properties" and "system" entries,
            containing respectively a `dict` of user properties and
            a `dict` of system properties.
        """
        return self.container.container_get_properties(account, container,
                                                       properties=properties,
                                                       **kwargs)

    @handle_container_not_found
    def container_set_properties(self, account, container, properties=None,
                                 clear=False, **kwargs):
        """
        Set properties on a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container where to set properties
        :type container: `str`
        :param properties: a dictionary of properties
        :type properties: `dict`
        :param clear:
        :type clear: `bool`
        :keyword system: dictionary of system properties to set
        """
        return self.container.container_set_properties(
            account, container, properties,
            clear=clear, **kwargs)

    @handle_container_not_found
    def container_del_properties(self, account, container, properties,
                                 **kwargs):
        """
        Delete properties of a container.

        :param account: name of the account
        :type account: `str`
        :param container: name of the container to deal with
        :type container: `str`
        :param properties: a list of property keys
        :type properties: `list`
        """
        return self.container.container_del_properties(
            account, container, properties, **kwargs)

    def container_update(self, account, container, metadata, clear=False,
                         **kwargs):
        warnings.warn("You'd better use container_set_properties()",
                      DeprecationWarning)
        if not metadata:
            self.container_del_properties(
                account, container, [], **kwargs)
        else:
            self.container_set_properties(
                account, container, metadata, clear, **kwargs)

    @handle_container_not_found
    @ensure_headers
    @ensure_request_id
    def object_create(self, account, container, file_or_path=None, data=None,
                      etag=None, obj_name=None, mime_type=None,
                      metadata=None, policy=None, key_file=None,
                      append=False, properties=None, **kwargs):
        """
        Create an object or append data to object in *container* of *account*
        with data taken from either *data* (`str` or `generator`) or
        *file_or_path* (path to a file or file-like object).
        The object will be named after *obj_name* if specified, or after
        the base name of *file_or_path*.

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param file_or_path: file-like object or path to a file from which
            to read object data
        :type file_or_path: `str` or file-like object
        :param data: object data (if `file_or_path` is not set)
        :type data: `str` or `generator`
        :keyword etag: entity tag of the object
        :type etag: `str`
        :keyword obj_name: name of the object to create. If not set, will use
            the base name of `file_or_path`.
        :keyword mime_type: MIME type of the object
        :type mime_type: `str`
        :keyword properties: a dictionary of properties
        :type properties: `dict`
        :keyword policy: name of the storage policy
        :type policy: `str`
        :keyword key_file:
        :param append: if set, data will be append to existing object (or
        object will be created if unset)
        :type append: `bool`

        :returns: `list` of chunks, size and hash of the what has been uploaded
        """
        if (data, file_or_path) == (None, None):
            raise exc.MissingData()
        src = data if data is not None else file_or_path
        if src is file_or_path:
            if isinstance(file_or_path, basestring):
                if not os.path.exists(file_or_path):
                    raise exc.FileNotFound("File '%s' not found." %
                                           file_or_path)
                file_name = os.path.basename(file_or_path)
            else:
                try:
                    file_name = os.path.basename(file_or_path.name)
                except AttributeError:
                    file_name = None
            obj_name = obj_name or file_name
        elif isgenerator(src):
            file_or_path = GeneratorIO(src)
            src = file_or_path
        if not obj_name:
            raise exc.MissingName(
                "No name for the object has been specified"
            )

        sysmeta = {'mime_type': mime_type,
                   'etag': etag}
        if metadata:
            warnings.warn(
                "You'd better use 'properties' instead of 'metadata'",
                DeprecationWarning, stacklevel=4)
            if not properties:
                properties = metadata
            else:
                properties.update(metadata)

        if src is data:
            return self._object_create(
                account, container, obj_name, BytesIO(data), sysmeta,
                properties=properties, policy=policy,
                key_file=key_file, append=append, **kwargs)
        elif hasattr(file_or_path, "read"):
            return self._object_create(
                account, container, obj_name, src, sysmeta,
                properties=properties, policy=policy, key_file=key_file,
                append=append, **kwargs)
        else:
            with open(file_or_path, "rb") as f:
                return self._object_create(
                    account, container, obj_name, f, sysmeta,
                    properties=properties, policy=policy,
                    key_file=key_file, append=append, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_touch(self, account, container, obj,
                     version=None, **kwargs):
        """
        Trigger a notification about an object
        (as if it just had been created).

        :param account: name of the account where to create the object
        :type account: `str`
        :param container: name of the container where to create the object
        :type container: `str`
        :param obj: name of the object to touch
        """
        self.container.content_touch(account, container, obj,
                                     version=version, **kwargs)

    def object_drain(self, account, container, obj,
                     version=None, **kwargs):
        """
        Remove all the chunks of a content, but keep all the metadata.

        :param account: name of the account where the object is present
        :type account: `str`
        :param container: name of the container where the object is present
        :type container: `str`
        :param obj: name of the object to drain
        """
        self.container.content_drain(account, container, obj,
                                     version=version, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_delete(self, account, container, obj,
                      version=None, **kwargs):
        """
        Delete an object from a container. If versioning is enabled and no
        version is specified, the object will be marked as deleted but not
        actually deleted.

        :param account: name of the account the object belongs to
        :type account: `str`
        :param container: name of the container the object belongs to
        :type container: `str`
        :param obj: name of the object to delete
        :param version: version of the object to delete
        :returns: True on success
        """
        return self.container.content_delete(account, container, obj,
                                             version=version, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_delete_many(self, account, container, objs, **kwargs):
        return self.container.content_delete_many(
            account, container, objs, **kwargs)

    @handle_object_not_found
    @ensure_headers
    @ensure_request_id
    def object_truncate(self, account, container, obj,
                        version=None, size=None, **kwargs):
        """
        Truncate object at specified size. Only shrink is supported.
        A download may occur if size is not on chunk boundaries.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :param size: new size of object
        """

        # code copied from object_fetch (should be factorized !)
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)

        for pos in sorted(chunks.keys()):
            chunk = chunks[pos][0]
            if (size >= chunk['offset']
                    and size <= chunk['offset'] + chunk['size']):
                break
        else:
            raise exc.OioException("No chunk found at position %d" % size)

        if chunk['offset'] != size:
            # retrieve partial chunk
            ret = self.object_fetch(account, container, obj,
                                    version=version,
                                    ranges=[(chunk['offset'], size-1)])
            # TODO implement a proper object_update
            pos = int(chunk['pos'].split('.')[0])
            self.object_create(account, container, obj_name=obj,
                               data=ret[1], meta_pos=pos,
                               content_id=meta['id'])

        return self.container.content_truncate(account, container, obj,
                                               version=version, size=size,
                                               **kwargs)

    @handle_container_not_found
    def object_list(self, account, container, limit=None, marker=None,
                    delimiter=None, prefix=None, end_marker=None,
                    properties=False, versions=False, deleted=False,
                    **kwargs):
        """
        Lists objects inside a container.

        :param properties: if True, list object properties along with objects
        :param versions: if True, list all versions of objects
        :param deleted: if True, list also the deleted objects

        :returns: a dict which contains
           * 'objects': the list of objects
           * 'prefixes': common prefixes (only if delimiter and prefix are set)
           * 'properties': a dict of container properties
           * 'system': a dict of system metadata
        """
        _, resp_body = self.container.content_list(
            account, container, limit=limit, marker=marker,
            end_marker=end_marker, prefix=prefix, delimiter=delimiter,
            properties=properties, versions=versions, deleted=deleted,
            **kwargs)

        for obj in resp_body['objects']:
            mtype = obj.get('mime-type')
            if mtype is not None:
                obj['mime_type'] = mtype
                del obj['mime-type']
            version = obj.get('ver')
            if version is not None:
                obj['version'] = version
                del obj['ver']

        return resp_body

    @handle_object_not_found
    def object_locate(self, account, container, obj,
                      version=None, **kwargs):
        """
        Get a description of the object along with the list of its chunks.

        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :param version: version of the object to query
        :returns: a tuple with object metadata `dict` as first element
            and chunk `list` as second element
        """
        obj_meta, chunks = self.container.content_locate(
            account, container, obj, version=version, **kwargs)
        return obj_meta, chunks

    def object_analyze(self, *args, **kwargs):
        """
        :deprecated: use `object_locate`
        """
        warnings.warn("You'd better use object_locate()",
                      DeprecationWarning)
        return self.object_locate(*args, **kwargs)

    @ensure_headers
    @ensure_request_id
    def object_fetch(self, account, container, obj, version=None, ranges=None,
                     key_file=None, **kwargs):
        meta, raw_chunks = self.object_locate(
            account, container, obj, version=version, **kwargs)
        chunk_method = meta['chunk_method']
        storage_method = STORAGE_METHODS.load(chunk_method)
        chunks = _sort_chunks(raw_chunks, storage_method.ec)
        meta['container_id'] = cid_from_name(account, container).upper()
        meta['ns'] = self.namespace
        self._patch_timeouts(kwargs)
        if storage_method.ec:
            stream = fetch_stream_ec(chunks, ranges, storage_method, **kwargs)
        elif storage_method.backblaze:
            stream = self._fetch_stream_backblaze(meta, chunks, ranges,
                                                  storage_method, key_file,
                                                  **kwargs)
        else:
            stream = fetch_stream(chunks, ranges, storage_method, **kwargs)
        return meta, stream

    @handle_object_not_found
    def object_get_properties(self, account, container, obj, **kwargs):
        return self.container.content_get_properties(account, container, obj,
                                                     **kwargs)

    @handle_object_not_found
    def object_show(self, account, container, obj, version=None, **kwargs):
        """
        Get a description of the content along with its user properties.


        :param account: name of the account in which the object is stored
        :param container: name of the container in which the object is stored
        :param obj: name of the object to query
        :returns: a `dict` describing the object

        .. python::

            {'hash': '6BF60C17CC15EEA108024903B481738F',
             'ctime': '1481031763',
             'deleted': 'False',
             'properties': {
                 u'projet': u'OpenIO-SDS'},
             'length': '43518',
             'hash_method': 'md5',
             'chunk_method': 'ec/algo=liberasurecode_rs_vand,k=6,m=3',
             'version': '1481031762951972',
             'policy': 'EC',
             'id': '20BF2194FD420500CD4729AE0B5CBC07',
             'mime_type': 'application/octet-stream',
             'name': 'Makefile'}
        """
        return self.container.content_show(account, container, obj,
                                           version=version,
                                           **kwargs)

    def object_update(self, account, container, obj, metadata,
                      version=None, clear=False, **kwargs):
        warnings.warn("You'd better use object_set_properties()",
                      DeprecationWarning, stacklevel=2)
        if clear:
            self.object_del_properties(
                account, container, obj, [], version=version, **kwargs)
        if metadata:
            self.object_set_properties(
                account, container, obj, metadata, version=version, **kwargs)

    @handle_object_not_found
    def object_set_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_set_properties(
            account, container, obj, properties={'properties': properties},
            version=version, **kwargs)

    @handle_object_not_found
    def object_del_properties(self, account, container, obj, properties,
                              version=None, **kwargs):
        return self.container.content_del_properties(
            account, container, obj, properties=properties,
            version=version, **kwargs)

    def _content_preparer(self, account, container, obj_name,
                          policy=None, **kwargs):
        # TODO: optimize by asking more than one metachunk at a time
        obj_meta, first_body = self.container.content_prepare(
            account, container, obj_name, size=1, stgpol=policy,
            autocreate=True, **kwargs)
        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])

        def _fix_mc_pos(chunks, mc_pos):
            for chunk in chunks:
                raw_pos = chunk["pos"].split(".")
                if storage_method.ec:
                    chunk['num'] = int(raw_pos[1])
                    chunk["pos"] = "%d.%d" % (mc_pos, chunk['num'])
                else:
                    chunk["pos"] = str(mc_pos)

        def _metachunk_preparer():
            mc_pos = kwargs.get('meta_pos', 0)
            _fix_mc_pos(first_body, mc_pos)
            yield first_body
            while True:
                mc_pos += 1
                _, next_body = self.container.content_prepare(
                        account, container, obj_name, 1, stgpol=policy,
                        autocreate=True, **kwargs)
                _fix_mc_pos(next_body, mc_pos)
                yield next_body

        return obj_meta, _metachunk_preparer

    def _generate_fullpath(self, account, container_name, path, version):
        return ['{0}/{1}/{2}/{3}'.format(quote_plus(account),
                                         quote_plus(container_name),
                                         quote_plus(path),
                                         version)]

    def _object_create(self, account, container, obj_name, source,
                       sysmeta, properties=None, policy=None,
                       key_file=None, **kwargs):
        self._patch_timeouts(kwargs)
        obj_meta, chunk_prep = self._content_preparer(
            account, container, obj_name,
            policy=policy, **kwargs)
        obj_meta.update(sysmeta)
        obj_meta['content_path'] = obj_name
        obj_meta['container_id'] = cid_from_name(account, container).upper()
        obj_meta['ns'] = self.namespace
        obj_meta['full_path'] = self._generate_fullpath(account, container,
                                                        obj_name,
                                                        obj_meta['version'])
        obj_meta['oio_version'] = (obj_meta.get('oio_version')
                                   or OIO_VERSION)

        # XXX content_id is necessary to update an existing object
        kwargs['content_id'] = kwargs.get('content_id', obj_meta['id'])

        storage_method = STORAGE_METHODS.load(obj_meta['chunk_method'])
        if storage_method.ec:
            handler = ECWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)
        elif storage_method.backblaze:
            backblaze_info = self._b2_credentials(storage_method, key_file)
            handler = BackblazeWriteHandler(
                source, obj_meta, chunk_prep, storage_method,
                backblaze_info, **kwargs)
        else:
            handler = ReplicatedWriteHandler(
                source, obj_meta, chunk_prep, storage_method, **kwargs)

        final_chunks, bytes_transferred, content_checksum = handler.stream()

        etag = obj_meta.get('etag')
        if etag and etag.lower() != content_checksum.lower():
            raise exc.EtagMismatch(
                "given etag %s != computed %s" % (etag, content_checksum))
        obj_meta['etag'] = content_checksum

        data = {'chunks': final_chunks, 'properties': properties or {}}
        # FIXME: we may just pass **obj_meta
        self.container.content_create(
            account, container, obj_name, size=bytes_transferred,
            checksum=content_checksum, data=data,
            stgpol=obj_meta['policy'],
            version=obj_meta['version'], mime_type=obj_meta['mime_type'],
            chunk_method=obj_meta['chunk_method'],
            **kwargs)
        return final_chunks, bytes_transferred, content_checksum

    def _b2_credentials(self, storage_method, key_file):
        key_file = key_file or '/etc/oio/sds/b2-appkey.conf'
        try:
            return BackblazeUtils.get_credentials(storage_method, key_file)
        except BackblazeUtilsException as err:
            raise exc.ConfigurationException(str(err))

    def _fetch_stream_backblaze(self, meta, chunks, ranges,
                                storage_method, key_file,
                                **kwargs):
        backblaze_info = self._b2_credentials(storage_method, key_file)
        total_bytes = 0
        current_offset = 0
        size = None
        offset = 0
        for pos in range(len(chunks)):
            if ranges:
                offset = ranges[pos][0]
                size = ranges[pos][1]

            if size is None:
                size = int(meta["length"])
            chunk_size = int(chunks[pos][0]["size"])
            if total_bytes >= size:
                break
            if current_offset + chunk_size > offset:
                if current_offset < offset:
                    _offset = offset - current_offset
                else:
                    _offset = 0
                if chunk_size + total_bytes > size:
                    _size = size - total_bytes
                else:
                    _size = chunk_size
            handler = BackblazeChunkDownloadHandler(
                meta, chunks[pos], _offset, _size,
                backblaze_info=backblaze_info)
            stream = handler.get_stream()
            if not stream:
                raise exc.OioException("Error while downloading")
            total_bytes += len(stream)
            yield stream
            current_offset += chunk_size

    @handle_container_not_found
    def container_refresh(self, account, container, attempts=3, **kwargs):
        for i in range(attempts):
            try:
                self.account.container_reset(account, container, time.time())
            except exc.Conflict:
                if i >= attempts - 1:
                    raise
        try:
            self.container.container_touch(account, container)
        except exc.ClientException as e:
            if e.status != 406 and e.status != 431:
                raise
            # CODE_USER_NOTFOUND or CODE_CONTAINER_NOTFOUND
            metadata = dict()
            metadata["dtime"] = time.time()
            self.account.container_update(account, container, metadata)

    @handle_account_not_found
    def account_refresh(self, account, **kwargs):
        self.account.account_refresh(account)

        containers = self.container_list(account)
        for container in containers:
            try:
                self.container_refresh(account, container[0])
            except exc.NoSuchContainer:
                # container remove in the meantime
                pass

        while containers:
            marker = containers[-1][0]
            containers = self.container_list(account, marker=marker)
            if containers:
                for container in containers:
                    try:
                        self.container_refresh(account, container[0])
                    except exc.NoSuchContainer:
                        # container remove in the meantime
                        pass

    def all_accounts_refresh(self, **kwargs):
        accounts = self.account_list()
        for account in accounts:
            try:
                self.account_refresh(account)
            except exc.NoSuchAccount:  # account remove in the meantime
                pass

    @handle_account_not_found
    def account_flush(self, account):
        self.account.account_flush(account)

    def _random_buffer(self, dictionary, n):
        return ''.join(random.choice(dictionary) for _ in range(n))

    def _generate_copy(self, chunks, random_hex=60):
        # random_hex is the number of hexadecimals characters to generate for
        # the copy path
        copies = []
        for c in chunks:
            tmp = ''.join([c[:-random_hex],
                           self._random_buffer('0123456789ABCDEF',
                                               random_hex)])
            copies.append(tmp)
        return copies

    def _send_copy(self, targets, copies, fullpath):
        headers = {"x-oio-chunk-meta-full-path": fullpath}
        if not hasattr(self, "blob_client"):
            from oio.blob.client import BlobClient
            self.blob_client = BlobClient()
        for t, c in zip(targets, copies):
            self.blob_client.chunk_link(t, c, headers=headers).status

    def _prepare_update_meta2(self, targets, copies, account, container,
                              content):
        targets_beans = []
        copies_beans = []
        for t, c in zip(targets, copies):
            targets_beans.append(self._meta2bean(t['url'], t, content))
            copies_beans.append(self._meta2bean(c, t, content))
        return targets_beans, copies_beans

    def _meta2bean(self, url, meta, content):
        return {"type": "chunk",
                "id": url,
                "hash": meta['hash'],
                "size": int(meta["size"]),
                "pos": meta["pos"],
                "content": content}
Exemplo n.º 2
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(conf.get('usage_check_interval'),
                                              3600)
        self.report_interval = int_value(conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                               30)
        self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                              10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    })
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            })

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            _, data = self.container_client.content_show(cid=content_cid,
                                                         path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(cid=content_cid,
                                                         path=content_path,
                                                         data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id'])

        old = [{
            'type': 'chunk',
            'id': current_chunk['url'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        new = [{
            'type': 'chunk',
            'id': new_chunk['id'],
            'hash': meta['chunk_hash'],
            'size': int(meta['chunk_size'])
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=content_cid,
                                                   data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info('moved chunk %s to %s', current_chunk['url'],
                         new_chunk['id'])
Exemplo n.º 3
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, storage_method):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.storage_method = storage_method
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(self.conf)
        self.content_id = self.metadata["id"]
        self.stgpol = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.checksum = self.metadata["hash"]
        self.mime_type = self.metadata["mime_type"]
        self.chunk_method = self.metadata["chunk_method"]

    def _get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.checksum,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol,
                                             size=self.length,
                                             checksum=self.checksum,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id, allow_same_rawx=False):
        raise NotImplementedError()

    def create(self, stream):
        raise NotImplementedError()

    def fetch(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._get_spare_chunk(other_chunks, [current_chunk])

        self.logger.debug("copy chunk from %s to %s", current_chunk.url,
                          spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Exemplo n.º 4
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Exemplo n.º 5
0
class BlobMoverWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.last_usage_check = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.usage_target = int_value(
            conf.get('usage_target'), 0)
        self.usage_check_interval = int_value(
            conf.get('usage_check_interval'), 3600)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)

    def mover_pass(self):
        self.namespace, self.address = check_volume(self.volume)

        start_time = report_time = time.time()

        total_errors = 0
        mover_time = 0

        paths = paths_gen(self.volume)

        for path in paths:
            loop_time = time.time()

            now = time.time()
            if now - self.last_usage_check >= self.usage_check_interval:
                used, total = statfs(self.volume)
                usage = (float(used) / total) * 100
                if usage <= self.usage_target:
                    self.logger.info(
                        'current usage %.2f%%: target reached (%.2f%%)', usage,
                        self.usage_target)
                    self.last_usage_check = now
                    break

            self.safe_chunk_move(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(mover_time).2f'
                    '%(mover_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'mover_time': mover_time,
                        'mover_rate': mover_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            mover_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(mover_time).2f '
            '%(mover_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'mover_time': mover_time,
                'mover_rate': mover_time / elapsed
            }
        )

    def safe_chunk_move(self, path):
        try:
            self.chunk_move(path)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while moving chunk %s: %s', path, e)
        self.passes += 1

    def load_chunk_metadata(self, path):
        with open(path) as f:
            return read_chunk_metadata(f)

    def chunk_move(self, path):
        meta = self.load_chunk_metadata(path)
        content_cid = meta['content_cid']
        content_path = meta['content_path']

        chunk_url = 'http://%s/%s' % \
            (self.address, meta['chunk_id'])

        try:
            data = self.container_client.content_show(
                cid=content_cid, path=content_path)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')
        current_chunk = None
        notin = []
        for c in data:
            if c['pos'] == meta['chunk_pos']:
                notin.append(c)
        for c in notin:
            if c['url'] == chunk_url:
                current_chunk = c
                notin.remove(c)
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')
        spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0}
        spare_resp = self.container_client.content_spare(
            cid=content_cid, path=content_path, data=spare_data)

        new_chunk = spare_resp['chunks'][0]
        self.blob_client.chunk_copy(
            current_chunk['url'], new_chunk['id'])

        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': meta['chunk_hash'],
                'size': int(meta['chunk_size'])}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=content_cid, data=update_data)

        self.blob_client.chunk_delete(current_chunk['url'])

        self.logger.info(
            'moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
Exemplo n.º 6
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id, content=self.content_id,
                data=spare_data, stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{'type': 'chunk',
                'id': current_chunk.url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        new = [{'type': 'chunk',
                'id': new_url,
                'hash': current_chunk.hash,
                'size': current_chunk.size,
                'pos': current_chunk.pos,
                'content': self.content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=self.container_id, data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except:
            # Keep the stack trace
            exc_info = sys.exc_info()
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    self.logger.warn("Failed to delete %s", chunk.url)
            # Raise with the original stack trace
            raise exc_info[0], exc_info[1], exc_info[2]

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()

    def delete(self):
        self.container_client.content_delete(cid=self.container_id,
                                             path=self.path)

    def move_chunk(self, chunk_id):
        current_chunk = self.chunks.filter(id=chunk_id).one()
        if current_chunk is None:
            raise OrphanChunk("Chunk not found in content")

        other_chunks = self.chunks.filter(
            metapos=current_chunk.metapos).exclude(id=chunk_id).all()

        spare_urls = self._meta2_get_spare_chunk(other_chunks,
                                                 [current_chunk])

        self.logger.debug("copy chunk from %s to %s",
                          current_chunk.url, spare_urls[0])
        self.blob_client.chunk_copy(current_chunk.url, spare_urls[0])

        self._meta2_update_spare_chunk(current_chunk, spare_urls[0])

        try:
            self.blob_client.chunk_delete(current_chunk.url)
        except:
            self.logger.warn("Failed to delete chunk %s" % current_chunk.url)

        current_chunk.url = spare_urls[0]

        return current_chunk.raw()
Exemplo n.º 7
0
class BlobRegistratorWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger
        self.volume = volume
        self.namespace = self.conf["namespace"]
        self.volume_ns, self.volume_id = check_volume(self.volume)
        c = dict()
        c['namespace'] = self.namespace
        self.client = ContainerClient(c, logger=self.logger)
        self.report_interval = conf.get(
                "report_period", default_report_interval)

        actions = {
                'update': BlobRegistratorWorker._update_chunk,
                'insert': BlobRegistratorWorker._insert_chunk,
                'check': BlobRegistratorWorker._check_chunk,
        }
        self.action = actions[conf.get("action", "check")]

    def pass_with_lock(self):
        with lock_volume(self.volume):
            return self.pass_without_lock()

    def pass_without_lock(self):
        last_report = now()
        count, success, fail = 0, 0, 0
        if self.namespace != self.volume_ns:
            self.logger.warn("Forcing the NS to [%s] (previously [%s])",
                             self.namespace, self.volume_ns)

        self.logger.info("START %s", self.volume)

        paths = paths_gen(self.volume)
        for path in paths:
            # Action
            try:
                with open(path) as f:
                    meta = read_chunk_metadata(f)
                    self.action(self, path, f, meta)
                    success = success + 1
            except NotFound as e:
                fail = fail + 1
                self.logger.info("ORPHAN %s/%s in %s/%s %s",
                                 meta['content_id'], meta['chunk_id'],
                                 meta['container_id'], meta['content_path'],
                                 str(e))
            except Conflict as e:
                fail = fail + 1
                self.logger.info("ALREADY %s/%s in %s/%s %s",
                                 meta['content_id'], meta['chunk_id'],
                                 meta['container_id'], meta['content_path'],
                                 str(e))
            except Exception as e:
                fail = fail + 1
                self.logger.warn("ERROR %s/%s in %s/%s %s",
                                 meta['content_id'], meta['chunk_id'],
                                 meta['container_id'], meta['content_path'],
                                 str(e))
            count = count + 1

            # TODO(jfs): do the throttling

            # periodical reporting
            t = now()
            if t - last_report > self.report_interval:
                self.logger.info("STEP %d ok %d ko %d",
                                 count, success, fail)

        self.logger.info("FINAL %s %d ok %d ko %d",
                         self.volume, count, success, fail)

    def _check_chunk(self, path, f, meta):
        raise Exception("CHECK not yet implemented")

    def _insert_chunk(self, path, f, meta):
        cid = meta['container_id']
        chunkid = basename(path)
        bean = meta2bean(self.volume_id, meta)
        self.client.container_raw_insert(bean, cid=cid)
        self.logger.info("inserted %s/%s in %s/%s",
                         meta['content_id'], chunkid, cid,
                         meta['content_path'])

    def _update_chunk(self, path, f, meta):
        cid = meta['container_id']
        chunkid = basename(path)
        if str(meta['chunk_pos']).startswith('0'):
            if not self.conf['first']:
                self.logger.info("skip %s/%s from %s/%s",
                                 meta['content_id'], chunkid, cid,
                                 meta['content_path'])
                return
        pre = meta2bean(self.volume_id, meta)
        post = meta2bean(self.volume_id, meta)
        self.client.container_raw_update(pre, post, cid=cid)
        self.logger.info("updated %s/%s in %s/%s",
                         meta['content_id'], chunkid, cid,
                         meta['content_path'])
Exemplo n.º 8
0
class Content(object):
    def __init__(self, conf, container_id, metadata, chunks, stgpol_args):
        self.conf = conf
        self.container_id = container_id
        self.metadata = metadata
        self.chunks = ChunksHelper(chunks)
        self.stgpol_args = stgpol_args
        self.logger = get_logger(self.conf)
        self.cs_client = ConscienceClient(conf)
        self.container_client = ContainerClient(self.conf)
        self.blob_client = BlobClient()
        self.session = requests.Session()
        self.content_id = self.metadata["id"]
        self.stgpol_name = self.metadata["policy"]
        self.path = self.metadata["name"]
        self.length = int(self.metadata["length"])
        self.version = self.metadata["version"]
        self.hash = self.metadata["hash"]
        self.mime_type = self.metadata["mime-type"]
        self.chunk_method = self.metadata["chunk-method"]

    def _meta2_get_spare_chunk(self, chunks_notin, chunks_broken):
        spare_data = {
            "notin": ChunksHelper(chunks_notin, False).raw(),
            "broken": ChunksHelper(chunks_broken, False).raw()
        }
        try:
            spare_resp = self.container_client.content_spare(
                cid=self.container_id,
                content=self.content_id,
                data=spare_data,
                stgpol=self.stgpol_name)
        except ClientException as e:
            raise exc.SpareChunkException("No spare chunk (%s)" % e.message)

        url_list = []
        for c in spare_resp["chunks"]:
            url_list.append(c["id"])

        return url_list

    def _meta2_update_spare_chunk(self, current_chunk, new_url):
        old = [{
            'type': 'chunk',
            'id': current_chunk.url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        new = [{
            'type': 'chunk',
            'id': new_url,
            'hash': current_chunk.hash,
            'size': current_chunk.size,
            'pos': current_chunk.pos,
            'content': self.content_id
        }]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(cid=self.container_id,
                                                   data=update_data)

    def _meta2_create_object(self):
        self.container_client.content_create(cid=self.container_id,
                                             path=self.path,
                                             content_id=self.content_id,
                                             stgpol=self.stgpol_name,
                                             size=self.length,
                                             checksum=self.hash,
                                             version=self.version,
                                             chunk_method=self.chunk_method,
                                             mime_type=self.mime_type,
                                             data=self.chunks.raw())

    def rebuild_chunk(self, chunk_id):
        raise NotImplementedError()

    def upload(self, stream):
        try:
            self._upload(stream)
        except Exception as e:
            for chunk in self.chunks:
                try:
                    self.blob_client.chunk_delete(chunk.url)
                except:
                    pass
            raise e

    def _upload(self, stream):
        raise NotImplementedError()

    def download(self):
        raise NotImplementedError()
Exemplo n.º 9
0
class BlobRegistrator(object):
    DEFAULT_CHUNKS_PER_SECOND = 30
    DEFAULT_REPORT_INTERVAL = 3600
    BEAN_TYPES = ('alias', 'header', 'chunk')

    def __init__(self, conf, logger, volume, container_ids):
        self.conf = conf
        self.logger = logger
        self.volume = volume
        self.volume_ns, self.volume_id = check_volume(self.volume)
        self.container_ids = container_ids or list()
        self.container_ids = [
            container_id.upper() for container_id in self.container_ids
        ]

        self.namespace = self.conf['namespace']
        if self.namespace != self.volume_ns:
            raise ValueError(
                'Namespace (%s) mismatch with volume namespace (%s)',
                self.namespace, self.volume_ns)

        # action
        self.action_name = self.conf['action'].lower()
        if (self.action_name == 'insert'):
            self.action = self._insert_bean
        elif (self.action_name == 'update'):
            self.action = self._update_bean
        elif (self.action_name == 'check'):
            self.action = self._check_bean
        else:
            raise ValueError('Unknown action (%s)', self.action_name)

        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(
            self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND)

        # counters
        self.chunks_processed = 0
        self.chunk_errors = 0
        self.beans_processed = dict()
        self.bean_successes = dict()
        self.bean_already_exists = dict()
        self.bean_orphans = dict()
        self.bean_errors = dict()
        for bean_type in self.BEAN_TYPES:
            self.beans_processed[bean_type] = 0
            self.bean_successes[bean_type] = 0
            self.bean_already_exists[bean_type] = 0
            self.bean_orphans[bean_type] = 0
            self.bean_errors[bean_type] = 0

        # report
        self.start_time = 0
        self.last_report = 0
        self.report_interval = int_value(conf.get('report_interval'),
                                         self.DEFAULT_REPORT_INTERVAL)

        self.client = ContainerClient({'namespace': self.namespace},
                                      logger=self.logger)
        self.ctime = int(time.time())

    def _beans_from_meta(self, meta):
        return \
            [{
                'type': 'alias',
                'name': meta['content_path'],
                'version': int(meta['content_version']),
                'ctime': self.ctime,
                'mtime': self.ctime,
                'deleted': False,
                'header': meta['content_id']
            }, {
                'type': 'header',
                'id': meta['content_id'],
                'size': 0,
                'ctime': self.ctime,
                'mtime': self.ctime,
                'policy': meta['content_policy'],
                'chunk-method': meta['content_chunkmethod'],
                'mime-type': 'application/octet-stream'
            }, {
                'type': 'chunk',
                'id': 'http://' + self.volume_id + '/' + meta['chunk_id'],
                'hash': meta.get('metachunk_hash') or meta['chunk_hash'],
                'size': int(meta['chunk_size']),
                'ctime': self.ctime,
                'pos': meta['chunk_pos'],
                'content': meta['content_id']
            }]

    def _check_bean(self, meta, bean):
        raise Exception("CHECK not yet implemented")

    def _insert_bean(self, meta, bean):
        self.client.container_raw_insert(bean, cid=meta['container_id'])

    def _update_bean(self, meta, bean):
        self.client.container_raw_update([bean], [bean],
                                         cid=meta['container_id'])

    def _get_report(self, status, end_time):
        time_since_last_report = (end_time - self.last_report) or 0.00001
        total_time = (end_time - self.start_time) or 0.00001
        report = (
            '%(status)s volume=%(volume)s '
            'start_time=%(start_time)s %(total_time).2fs '
            'last_report=%(last_report)s %(time_since_last_report).2fs '
            'chunks_processed=%(chunks_processed)d '
            '%(chunks_processed_rate).2f/s '
            'chunk_errors=%(chunk_errors)d '
            '%(chunk_errors_rate).2f%% ' % {
                'status':
                status,
                'volume':
                self.volume_id,
                'start_time':
                datetime.fromtimestamp(int(self.start_time)).isoformat(),
                'total_time':
                total_time,
                'last_report':
                datetime.fromtimestamp(int(self.last_report)).isoformat(),
                'time_since_last_report':
                time_since_last_report,
                'chunks_processed':
                self.chunks_processed,
                'chunks_processed_rate':
                self.chunks_processed / total_time,
                'chunk_errors':
                self.chunk_errors,
                'chunk_errors_rate':
                100 * self.chunk_errors / float(self.chunks_processed or 1),
            })
        for bean_type in self.BEAN_TYPES:
            report = (
                '%(report)s '
                'bean_%(bean_type)s_processed=%(beans_processed)d '
                '%(beans_processed_rate).2f/s '
                'bean_%(bean_type)s_successes=%(bean_successes)d '
                '%(bean_successes_rate).2f%% '
                'bean_%(bean_type)s_already_exists=%(bean_already_exists)d '
                '%(bean_already_exists_rate).2f%% '
                'bean_%(bean_type)s_orphans=%(bean_orphans)d '
                '%(bean_orphans_rate).2f%% '
                'bean_%(bean_type)s_errors=%(bean_errors)d '
                '%(bean_errors_rate).2f%%' % {
                    'report':
                    report,
                    'bean_type':
                    bean_type,
                    'beans_processed':
                    self.beans_processed[bean_type],
                    'beans_processed_rate':
                    self.beans_processed[bean_type] / total_time,
                    'bean_successes':
                    self.bean_successes[bean_type],
                    'bean_successes_rate':
                    100 * self.bean_successes[bean_type] /
                    float(self.beans_processed[bean_type] or 1),
                    'bean_already_exists':
                    self.bean_already_exists[bean_type],
                    'bean_already_exists_rate':
                    100 * self.bean_already_exists[bean_type] /
                    float(self.beans_processed[bean_type] or 1),
                    'bean_orphans':
                    self.bean_orphans[bean_type],
                    'bean_orphans_rate':
                    100 * self.bean_orphans[bean_type] /
                    float(self.beans_processed[bean_type] or 1),
                    'bean_errors':
                    self.bean_errors[bean_type],
                    'bean_errors_rate':
                    100 * self.bean_errors[bean_type] /
                    float(self.beans_processed[bean_type] or 1)
                })
        return report

    def log_report(self, status, force=False):
        end_time = time.time()
        if force or (end_time - self.last_report >= self.report_interval):
            self.logger.info(self._get_report(status, end_time))
            self.last_report = end_time

    def pass_volume(self):
        self.start_time = self.last_report = time.time()
        self.log_report('START', force=True)

        paths = paths_gen(self.volume)
        for path in paths:
            try:
                self.pass_chunk_file(path)
                self.chunks_processed += 1
            except Exception as exc:
                self.logger.error(
                    'Failed to pass chunk file (chunk_file=%s): %s', path, exc)
                self.chunk_errors += 1

            self.log_report('RUN')
            self.chunks_run_time = ratelimit(self.chunks_run_time,
                                             self.max_chunks_per_second)

        self.log_report('DONE', force=True)
        return self.chunk_errors == 0 \
            and all(errors == 0 for errors in self.bean_errors.values())

    def pass_chunk_file(self, path):
        chunk_id = path.rsplit('/', 1)[-1]
        if len(chunk_id) != STRLEN_CHUNKID:
            if chunk_id.endswith(CHUNK_SUFFIX_PENDING):
                self.logger.info('Skipping pending chunk %s', path)
            else:
                self.logger.warn('WARN Not a chunk %s', path)
            return
        for char in chunk_id:
            if char not in hexdigits:
                self.logger.warn('WARN Not a chunk %s', path)
                return

        with open(path) as f:
            meta, _ = read_chunk_metadata(f, chunk_id)
            if self.container_ids \
                    and meta['container_id'] in self.container_ids:
                self.logger.debug(
                    'Skipping chunk file (container_id=%s content_path=%s '
                    'content_version=%s content_id=%s chunk_id=%s '
                    'chunk_pos=%s)', meta['container_id'],
                    meta['content_path'], meta['content_version'],
                    meta['content_id'], meta['chunk_id'], meta['chunk_pos'])
                return

            beans = self._beans_from_meta(meta)
            for bean in beans:
                try:
                    self.pass_bean(meta, bean)
                except Exception as exc:
                    self.logger.error(
                        'Failed to pass chunk file (container_id=%s '
                        'content_path=%s content_version=%s content_id=%s '
                        'chunk_id=%s chunk_pos=%s): %s', meta['container_id'],
                        meta['content_path'], meta['content_version'],
                        meta['content_id'], meta['chunk_id'],
                        meta['chunk_pos'], exc)
                    self.bean_errors[bean['type']] = \
                        self.bean_errors[bean['type']] + 1

    def pass_bean(self, meta, bean):
        try:
            self.beans_processed[bean['type']] = \
                self.beans_processed[bean['type']] + 1
            self.action(meta, bean)
            self.logger.debug(
                'Passed %s (container_id=%s content_path=%s '
                'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s)',
                bean['type'], meta['container_id'], meta['content_path'],
                meta['content_version'], meta['content_id'], meta['chunk_id'],
                meta['chunk_pos'])
            self.bean_successes[bean['type']] = \
                self.bean_successes[bean['type']] + 1
        except Conflict as exc:
            self.logger.info(
                'Already exists %s (container_id=%s content_path=%s '
                'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): '
                '%s', bean['type'], meta['container_id'], meta['content_path'],
                meta['content_version'], meta['content_id'], meta['chunk_id'],
                meta['chunk_pos'], exc)
            self.bean_already_exists[bean['type']] = \
                self.bean_already_exists[bean['type']] + 1
        except NotFound as exc:
            self.logger.info(
                'Orphan %s (container_id=%s content_path=%s '
                'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): '
                '%s', bean['type'], meta['container_id'], meta['content_path'],
                meta['content_version'], meta['content_id'], meta['chunk_id'],
                meta['chunk_pos'], exc)
            self.bean_orphans[bean['type']] = \
                self.bean_orphans[bean['type']] + 1
        except Exception as exc:
            self.logger.error(
                'Failed to pass %s (container_id=%s content_path=%s '
                'content_version=%s content_id=%s chunk_id=%s chunk_pos=%s): '
                '%s', bean['type'], meta['container_id'], meta['content_path'],
                meta['content_version'], meta['content_id'], meta['chunk_id'],
                meta['chunk_pos'], exc)
            self.bean_errors[bean['type']] = \
                self.bean_errors[bean['type']] + 1
Exemplo n.º 10
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.rdir_client = RdirClient(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s"
                         % (container_id, content_id, chunk_id))
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)'
                         % (container_id, content_id, chunk_id))
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def _meta2_get_chunks_at_pos(self, container_id, content_id, chunk_id):
        current_chunk_url = 'http://%s/%s' % (self.volume, chunk_id)

        try:
            data = self.container_client.content_show(
                cid=container_id, content=content_id)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')

        current_chunk = None
        for c in data:
            if c['url'] == current_chunk_url:
                current_chunk = c
                break
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')

        duplicate_chunks = []
        for c in data:
            if c['pos'] == current_chunk['pos'] \
                    and c['url'] != current_chunk['url']:
                duplicate_chunks.append(c)
        if len(duplicate_chunks) == 0:
            raise exc.UnrecoverableContent('No copy of missing chunk')

        return current_chunk, duplicate_chunks

    def _meta2_get_spare_chunk(self, container_id, content_id, notin, broken):
        spare_data = {'notin': notin,
                      'broken': [broken],
                      'size': 0}
        try:
            spare_resp = self.container_client.content_spare(
                cid=container_id, content=content_id, data=spare_data)
        except ClientException as e:
            raise exc.SpareChunkException('No spare chunk (%s)' % e.message)

        return spare_resp['chunks'][0]

    def _meta2_replace_chunk(self, container_id, content_id,
                             current_chunk, new_chunk):
        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=container_id, data=update_data)

    # TODO rain support
    def chunk_rebuild(self, container_id, content_id, chunk_id):

        current_chunk, duplicate_chunks = self._meta2_get_chunks_at_pos(
            container_id, content_id, chunk_id)

        spare_chunk = self._meta2_get_spare_chunk(
            container_id, content_id, duplicate_chunks, current_chunk)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src['url'], spare_chunk['id'])
                self.logger.debug('copy chunk from %s to %s',
                                  src['url'], spare_chunk['id'])
                uploaded = True
                break
            except Exception as e:
                self.logger.debug('Failed to copy chunk from %s to %s: %s',
                                  src['url'], spare_chunk['id'], type(e))
        if not uploaded:
            raise exc.UnrecoverableContent('No copy available '
                                           'of missing chunk')

        self._meta2_replace_chunk(container_id, content_id,
                                  current_chunk, spare_chunk)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += current_chunk['size']
        self.total_bytes_processed += current_chunk['size']