Exemplo n.º 1
0
    def chunk_audit(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk(
                    'Missing extended attribute %s' % e)
            size = int(meta['chunk_size'])
            md5_checksum = meta['chunk_hash'].lower()
            reader = ChunkReader(f, size, md5_checksum)
            with closing(reader):
                for buf in reader:
                    buf_len = len(buf)
                    self.bytes_running_time = ratelimit(
                        self.bytes_running_time,
                        self.max_bytes_per_second,
                        increment=buf_len)
                    self.bytes_processed += buf_len
                    self.total_bytes_processed += buf_len

            try:
                content_cid = meta['content_cid']
                content_path = meta['content_path']
                content_attr, data = self.container_client.content_show(
                    cid=content_cid, path=content_path)

                # Check chunk data
                chunks_nb = 0
                chunk_data = None
                for c in data:
                    if c['url'].endswith(meta['chunk_id']):
                        chunks_nb += 1  # FIXME: won't work with DUP
                        chunk_data = c
                if not chunk_data:
                    raise exc.OrphanChunk('Not found in content')

                if chunk_data['size'] != int(meta['chunk_size']):
                    raise exc.FaultyChunk('Invalid chunk size found')

                if chunk_data['hash'] != meta['chunk_hash']:
                    raise exc.FaultyChunk('Invalid chunk hash found')

                if chunk_data['pos'] != meta['chunk_pos']:
                    raise exc.FaultyChunk('Invalid chunk position found')

                # Check content data
                if content_attr['length'] != meta['content_size']:
                    raise exc.FaultyChunk('Invalid content size found')

                if chunks_nb != int(meta['content_chunksnb']):
                    self.logger.warn('Invalid number of chunks found')
                    # TODO: really count chunks and enable the exception
                    # raise exc.FaultyChunk('Invalid number of chunks found')

            except exc.NotFound:
                raise exc.OrphanChunk('Chunk not found in container')
Exemplo n.º 2
0
    def chunk_file_audit(self, chunk_file, chunk_id):
        try:
            meta, _ = read_chunk_metadata(chunk_file, chunk_id)
        except exc.MissingAttribute as err:
            raise exc.FaultyChunk(err)
        size = int(meta['chunk_size'])
        md5_checksum = meta['chunk_hash'].lower()
        reader = ChunkReader(chunk_file,
                             size,
                             md5_checksum,
                             compression=meta.get("compression", ""))
        with closing(reader):
            for buf in reader:
                buf_len = len(buf)
                self.bytes_running_time = ratelimit(self.bytes_running_time,
                                                    self.max_bytes_per_second,
                                                    increment=buf_len)
                self.bytes_processed += buf_len
                self.total_bytes_processed += buf_len

        try:
            container_id = meta['container_id']
            content_id = meta['content_id']
            _obj_meta, data = self.container_client.content_locate(
                cid=container_id, content=content_id, properties=False)

            # Check chunk data
            chunk_data = None
            metachunks = set()
            for c in data:
                if c['url'].endswith(meta['chunk_id']):
                    metachunks.add(c['pos'].split('.', 2)[0])
                    chunk_data = c
            if not chunk_data:
                raise exc.OrphanChunk('Not found in content')

            metachunk_size = meta.get('metachunk_size')
            if metachunk_size is not None \
                    and chunk_data['size'] != int(metachunk_size):
                raise exc.FaultyChunk('Invalid metachunk size found')

            metachunk_hash = meta.get('metachunk_hash')
            if metachunk_hash is not None \
                    and chunk_data['hash'] != meta['metachunk_hash']:
                raise exc.FaultyChunk('Invalid metachunk hash found')

            if chunk_data['pos'] != meta['chunk_pos']:
                raise exc.FaultyChunk('Invalid chunk position found')

        except exc.NotFound:
            raise exc.OrphanChunk('Chunk not found in container')
Exemplo n.º 3
0
def extract_headers_meta(headers, check=True):
    """
    Extract chunk metadata from a dictionary of rawx response headers.

    :param headers: a dictionary of headers, as returned by a HEAD or GET
        request to a rawx service.
    :keyword check: if True (the default), raise FaultyChunk if one or
        several mandatory response headers are missing.
    :returns: a dictionary of chunk metadata.
    """
    meta = {}
    missing = list()
    for mkey, hkey in CHUNK_HEADERS.items():
        try:
            if mkey == 'full_path':
                meta[mkey] = headers[hkey]
            else:
                meta[mkey] = unquote(headers[hkey])
        except KeyError:
            if check and mkey not in CHUNK_XATTR_KEYS_OPTIONAL:
                missing.append(exc.MissingAttribute(mkey))
    if check and missing:
        raise exc.FaultyChunk(*missing)
    mtime = meta.get('chunk_mtime')
    if mtime:
        meta['chunk_mtime'] = mktime(parsedate(mtime))
    return meta
Exemplo n.º 4
0
    def chunk_audit(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)
            size = int(meta['chunk_size'])
            md5_checksum = meta['chunk_hash'].lower()
            reader = ChunkReader(f, size, md5_checksum)
            with closing(reader):
                for buf in reader:
                    buf_len = len(buf)
                    self.bytes_running_time = ratelimit(
                        self.bytes_running_time,
                        self.max_bytes_per_second,
                        increment=buf_len)
                    self.bytes_processed += buf_len
                    self.total_bytes_processed += buf_len

            try:
                container_id = meta['container_id']
                content_path = meta['content_path']
                content_attr, data = self.container_client.content_show(
                    cid=container_id, path=content_path)

                # Check chunk data
                chunk_data = None
                metachunks = set()
                for c in data:
                    if c['url'].endswith(meta['chunk_id']):
                        metachunks.add(c['pos'].split('.', 2)[0])
                        chunk_data = c
                if not chunk_data:
                    raise exc.OrphanChunk('Not found in content')

                if chunk_data['size'] != int(meta['chunk_size']):
                    raise exc.FaultyChunk('Invalid chunk size found')

                if chunk_data['hash'] != meta['chunk_hash']:
                    raise exc.FaultyChunk('Invalid chunk hash found')

                if chunk_data['pos'] != meta['chunk_pos']:
                    raise exc.FaultyChunk('Invalid chunk position found')

            except exc.NotFound:
                raise exc.OrphanChunk('Chunk not found in container')
Exemplo n.º 5
0
    def close(self):
        if self.fp:
            self.md5_read = self.iter_md5.hexdigest()
            if self.bytes_read != self.size:
                raise exc.FaultyChunk('Invalid size for chunk')

            if self.md5_read != self.md5_checksum:
                raise exc.CorruptedChunk('checksum does not match %s != %s' %
                                         (self.md5_read, self.md5_checksum))
Exemplo n.º 6
0
 def update_index(self, path):
     with open(path) as f:
         try:
             meta = read_chunk_metadata(f)
         except exc.MissingAttribute as e:
             raise exc.FaultyChunk('Missing extended attribute %s' % e)
         data = {'mtime': int(time.time())}
         self.index_client.chunk_push(self.volume_id, meta['container_id'],
                                      meta['content_id'], meta['chunk_id'],
                                      **data)
Exemplo n.º 7
0
    def close(self):
        """
        Perform checks on what has been read before closing,
        if no error has occurred yet.
        """
        if self.fp and not self.error:
            md5_read = self.iter_md5.hexdigest()
            if self.bytes_read != self.size:
                raise exc.FaultyChunk('Invalid size: expected %d, got %d' %
                                      (self.size, self.bytes_read))

            if md5_read != self.md5_checksum:
                raise exc.CorruptedChunk('checksum does not match %s != %s' %
                                         (md5_read, self.md5_checksum))
Exemplo n.º 8
0
 def __init__(self, fp, size, md5_checksum, compression=None):
     self.fp = fp
     self.decompressor = None
     self.error = None
     if compression not in (None, 'off'):
         if compression == 'zlib':
             self.decompressor = zlib.decompressobj(0)
         else:
             msg = "Compression method not managed: %s" % compression
             self.error = exc.FaultyChunk(msg)
             raise self.error
     self.size = size
     self.md5_checksum = md5_checksum
     self.bytes_read = 0
     self.iter_md5 = None
Exemplo n.º 9
0
def read_chunk_metadata(fd, chunk_id, check_chunk_id=True):
    chunk_id = chunk_id.upper()
    raw_meta = read_user_xattr(fd)
    raw_meta_copy = None
    meta = {}
    meta['links'] = dict()
    attr_vers = 0.0
    raw_chunk_id = container_id = path = version = content_id = None
    missing = list()
    for k, v in raw_meta.iteritems():
        # New chunks have a version
        if k == chunk_xattr_keys['oio_version']:
            attr_vers = float(v)
        # Chunks with version >= 4.2 have a "full_path"
        elif k.startswith(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX):
            parsed_chunk_id = k[len(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX):]
            if parsed_chunk_id == chunk_id:
                raw_chunk_id = parsed_chunk_id
                meta['full_path'] = v
                account, container, path, version, content_id = \
                    decode_fullpath(v)
                container_id = cid_from_name(account, container)
            else:
                meta['links'][parsed_chunk_id] = v
    if raw_chunk_id:
        raw_meta_copy = raw_meta.copy()
        raw_meta[chunk_xattr_keys['chunk_id']] = raw_chunk_id
        raw_meta[chunk_xattr_keys['container_id']] = container_id
        raw_meta[chunk_xattr_keys['content_path']] = path
        raw_meta[chunk_xattr_keys['content_version']] = version
        raw_meta[chunk_xattr_keys['content_id']] = content_id
    if attr_vers >= 4.2 and 'full_path' not in meta:
        # TODO(FVE): in that case, do not warn about other attributes
        # that could be deduced from this one.
        missing.append(
            exc.MissingAttribute(CHUNK_XATTR_CONTENT_FULLPATH_PREFIX +
                                 chunk_id))
    for k, v in chunk_xattr_keys.iteritems():
        if v not in raw_meta:
            if k not in chunk_xattr_keys_optional:
                missing.append(exc.MissingAttribute(v))
        else:
            meta[k] = raw_meta[v]
    if missing:
        raise exc.FaultyChunk(*missing)
    if check_chunk_id and meta['chunk_id'] != chunk_id:
        raise exc.MissingAttribute(chunk_xattr_keys['chunk_id'])
    return meta, raw_meta_copy if raw_meta_copy else raw_meta
Exemplo n.º 10
0
    def update_index(self, path, chunk_id):
        with open(path) as file_:
            try:
                meta = None
                if meta is None:
                    meta, _ = read_chunk_metadata(file_, chunk_id)
            except exc.MissingAttribute as err:
                raise exc.FaultyChunk(err)

            data = {'mtime': int(time.time())}
            headers = {REQID_HEADER: request_id('blob-indexer-')}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)
Exemplo n.º 11
0
 def update_index(self, path):
     with open(path) as f:
         try:
             meta = read_chunk_metadata(f)
         except exc.MissingAttribute as e:
             raise exc.FaultyChunk('Missing extended attribute %s' % e)
         data = {
             'content_version': meta['content_version'],
             'content_nbchunks': meta['content_chunksnb'],
             'content_path': meta['content_path'],
             'content_size': meta['content_size'],
             'chunk_hash': meta['chunk_hash'],
             'chunk_position': meta['chunk_pos'],
             'chunk_size': meta['chunk_size'],
             'mtime': int(time.time())
         }
         self.index_client.chunk_push(self.volume_id, meta['content_cid'],
                                      meta['content_id'], meta['chunk_id'],
                                      **data)
Exemplo n.º 12
0
    def update_index(self, path, chunk_id):
        with open(path) as f:
            try:
                meta = None
                if self.convert_chunks and self.converter:
                    _, meta = self.converter.convert_chunk(f, chunk_id)
                if meta is None:
                    meta, _ = read_chunk_metadata(f, chunk_id)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk('Missing extended attribute %s' % e)

            data = {'mtime': int(time.time())}
            headers = {'X-oio-req-id': 'blob-indexer-' + request_id()[:-13]}
            self.index_client.chunk_push(self.volume_id,
                                         meta['container_id'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         headers=headers,
                                         **data)