Exemplo n.º 1
0
    def test_invalidate_hash(self):

        def assertFileData(file_path, data):
            with open(file_path, 'r') as fp:
                fdata = fp.read()
                self.assertEquals(pickle.loads(fdata), pickle.loads(data))

        df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger())
        mkdirs(df.datadir)
        ohash = hash_path('a', 'c', 'o')
        data_dir = ohash[-3:]
        whole_path_from = os.path.join(self.objects, '0', data_dir)
        hashes_file = os.path.join(self.objects, '0',
                                   object_replicator.HASH_FILE)
        # test that non existant file except caught
        self.assertEquals(object_replicator.invalidate_hash(whole_path_from),
                          None)
        # test that hashes get cleared
        check_pickle_data = pickle.dumps({data_dir: None},
                                         object_replicator.PICKLE_PROTOCOL)
        for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]:
            with open(hashes_file, 'wb') as fp:
                pickle.dump(data_hash, fp, object_replicator.PICKLE_PROTOCOL)
            object_replicator.invalidate_hash(whole_path_from)
            assertFileData(hashes_file, check_pickle_data)
Exemplo n.º 2
0
    def test_invalidate_hash(self):

        def assertFileData(file_path, data):
            with open(file_path, 'r') as fp:
                fdata = fp.read()
                self.assertEquals(pickle.loads(fdata), pickle.loads(data))

        df = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', FakeLogger())
        mkdirs(df.datadir)
        ohash = hash_path('a', 'c', 'o')
        data_dir = ohash[-3:]
        whole_path_from = os.path.join(self.objects, '0', data_dir)
        hashes_file = os.path.join(self.objects, '0',
                                   object_replicator.HASH_FILE)
        # test that non existent file except caught
        self.assertEquals(object_replicator.invalidate_hash(whole_path_from),
                          None)
        # test that hashes get cleared
        check_pickle_data = pickle.dumps({data_dir: None},
                                         object_replicator.PICKLE_PROTOCOL)
        for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]:
            with open(hashes_file, 'wb') as fp:
                pickle.dump(data_hash, fp, object_replicator.PICKLE_PROTOCOL)
            object_replicator.invalidate_hash(whole_path_from)
            assertFileData(hashes_file, check_pickle_data)
Exemplo n.º 3
0
    def put(self, fd, fsize, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param fsize: final on-disk size of the created file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        assert self.tmppath is not None
        metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        # Write the metadata before calling fsync() so that both data and
        # metadata are flushed to disk.
        write_metadata(fd, metadata)
        # We call fsync() before calling drop_cache() to lower the amount of
        # redundant work the drop cache code will perform on the pages (now
        # that after fsync the pages will be all clean).
        tpool.execute(fsync, fd)
        # From the Department of the Redundancy Department, make sure we
        # call drop_cache() after fsync() to avoid redundant work (pages
        # all clean).
        self.drop_cache(fd, 0, fsize)
        invalidate_hash(os.path.dirname(self.datadir))
        # After the rename completes, this object will be available for other
        # requests to reference.
        renamer(self.tmppath,
                os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Exemplo n.º 4
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file.datadir, timestamp + ".data")
     mkdirs(self.disk_file.datadir)
     fp = open(path, "w")
     fp.write("0" * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file.datadir))
     self.auditor = auditor.AuditorWorker(self.conf)
     pre_quarantines = self.auditor.quarantines
     self.auditor.object_audit(os.path.join(self.disk_file.datadir, timestamp + ".data"), "sda", "0")
     self.assertEquals(self.auditor.quarantines, pre_quarantines + 1)
Exemplo n.º 5
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file.datadir, timestamp + '.data')
     mkdirs(self.disk_file.datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file.datadir))
     self.auditor = auditor.AuditorWorker(self.conf)
     pre_quarantines = self.auditor.quarantines
     self.auditor.object_audit(
         os.path.join(self.disk_file.datadir, timestamp + '.data'), 'sda',
         '0')
     self.assertEquals(self.auditor.quarantines, pre_quarantines + 1)
Exemplo n.º 6
0
 def test_object_audit_no_meta(self):
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(self.disk_file.datadir, timestamp + '.data')
     mkdirs(self.disk_file.datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(self.disk_file.datadir))
     self.auditor = auditor.AuditorWorker(self.conf, self.logger)
     pre_quarantines = self.auditor.quarantines
     self.auditor.object_audit(
         os.path.join(self.disk_file.datadir, timestamp + '.data'),
         'sda', '0')
     self.assertEquals(self.auditor.quarantines, pre_quarantines + 1)
Exemplo n.º 7
0
 def test_object_audit_no_meta(self):
     cur_part = '0'
     disk_file = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o')
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(disk_file.datadir, timestamp + '.data')
     mkdirs(disk_file.datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(disk_file.datadir))
     self.auditor = auditor.ObjectAuditor(self.conf)
     pre_quarantines = self.auditor.quarantines
     self.auditor.object_audit(
         os.path.join(disk_file.datadir, timestamp + '.data'),
         'sda', cur_part)
     self.assertEquals(self.auditor.quarantines, pre_quarantines + 1)
Exemplo n.º 8
0
 def test_object_audit_no_meta(self):
     cur_part = '0'
     disk_file = DiskFile(self.devices, 'sda', cur_part, 'a', 'c', 'o')
     timestamp = str(normalize_timestamp(time.time()))
     path = os.path.join(disk_file.datadir, timestamp + '.data')
     mkdirs(disk_file.datadir)
     fp = open(path, 'w')
     fp.write('0' * 1024)
     fp.close()
     invalidate_hash(os.path.dirname(disk_file.datadir))
     self.auditor = auditor.AuditorWorker(self.conf)
     pre_quarantines = self.auditor.quarantines
     self.auditor.object_audit(
         os.path.join(disk_file.datadir, timestamp + '.data'),
         'sda', cur_part)
     self.assertEquals(self.auditor.quarantines, pre_quarantines + 1)
Exemplo n.º 9
0
    def put(self, fd, tmppath, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        write_metadata(fd, metadata)
        if 'Content-Length' in metadata:
            self.drop_cache(fd, 0, int(metadata['Content-Length']))
        tpool.execute(fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Exemplo n.º 10
0
    def put(self, fd, tmppath, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extention: extension to be used when making the file
        """
        metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        write_metadata(fd, metadata)
        if 'Content-Length' in metadata:
            self.drop_cache(fd, 0, int(metadata['Content-Length']))
        tpool.execute(os.fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Exemplo n.º 11
0
    def put(self, fd, metadata, extension=".data"):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        assert self.tmppath is not None
        metadata["name"] = self.name
        timestamp = normalize_timestamp(metadata["X-Timestamp"])
        write_metadata(fd, metadata)
        if "Content-Length" in metadata:
            self.drop_cache(fd, 0, int(metadata["Content-Length"]))
        tpool.execute(fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(self.tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Exemplo n.º 12
0
    def test_invalidate_hash(self):
        def assertFileData(file_path, data):
            with open(file_path, "r") as fp:
                fdata = fp.read()
                self.assertEquals(fdata, data)

        df = DiskFile(self.devices, "sda", "0", "a", "c", "o")
        mkdirs(df.datadir)
        ohash = hash_path("a", "c", "o")
        data_dir = ohash[-3:]
        whole_path_from = os.path.join(self.objects, "0", data_dir)
        hashes_file = os.path.join(self.objects, "0", object_replicator.HASH_FILE)
        # test that non existant file except caught
        self.assertEquals(object_replicator.invalidate_hash(whole_path_from), None)
        # test that hashes get cleared
        check_pickle_data = pickle.dumps({data_dir: None}, object_replicator.PICKLE_PROTOCOL)
        for data_hash in [{data_dir: None}, {data_dir: "abcdefg"}]:
            with open(hashes_file, "wb") as fp:
                pickle.dump(data_hash, fp, object_replicator.PICKLE_PROTOCOL)
            object_replicator.invalidate_hash(whole_path_from)
            assertFileData(hashes_file, check_pickle_data)
Exemplo n.º 13
0
def quarantine_renamer(device_path, corrupted_file_path):
    """
    In the case that a file is corrupted, move it to a quarantined
    area to allow replication to fix it.

    :params device_path: The path to the device the corrupted file is on.
    :params corrupted_file_path: The path to the file you want quarantined.

    :returns: path (str) of directory the file was moved to
    :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY
                     exceptions from rename
    """
    from_dir = os.path.dirname(corrupted_file_path)
    to_dir = os.path.join(device_path, 'quarantined',
                          'objects', os.path.basename(from_dir))
    invalidate_hash(os.path.dirname(from_dir))
    try:
        renamer(from_dir, to_dir)
    except OSError, e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        to_dir = "%s-%s" % (to_dir, uuid.uuid4().hex)
        renamer(from_dir, to_dir)
Exemplo n.º 14
0
def quarantine_renamer(device_path, corrupted_file_path):
    """
    In the case that a file is corrupted, move it to a quarantined
    area to allow replication to fix it.

    :params device_path: The path to the device the corrupted file is on.
    :params corrupted_file_path: The path to the file you want quarantined.

    :returns: path (str) of directory the file was moved to
    :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY
                     exceptions from rename
    """
    from_dir = os.path.dirname(corrupted_file_path)
    to_dir = os.path.join(device_path, 'quarantined',
                          'objects', os.path.basename(from_dir))
    invalidate_hash(os.path.dirname(from_dir))
    try:
        renamer(from_dir, to_dir)
    except OSError, e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        to_dir = "%s-%s" % (to_dir, uuid.uuid4().hex)
        renamer(from_dir, to_dir)
Exemplo n.º 15
0
class ObjectAuditor(Daemon):
    """Audit objects."""
    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf, 'object-auditor')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            conf.get('bytes_per_second', 10000000))
        self.log_time = int(conf.get('log_time', 3600))
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0

    def run_forever(self):
        """Run the object audit until stopped."""
        while True:
            self.run_once('forever')
            self.total_bytes_processed = 0
            self.total_files_processed = 0
            time.sleep(30)

    def run_once(self, mode='once'):
        """Run the object audit once."""
        self.logger.info(_('Begin object audit "%s" mode' % mode))
        begin = reported = time.time()
        all_locs = audit_location_generator(self.devices,
                                            object_server.DATADIR,
                                            mount_check=self.mount_check,
                                            logger=self.logger)
        for path, device, partition in all_locs:
            self.object_audit(path, device, partition)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            if time.time() - reported >= self.log_time:
                self.logger.info(
                    _('Since %(start_time)s: Locally: %(passes)d passed audit, '
                      '%(quars)d quarantined, %(errors)d errors '
                      'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % {
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (time.time() - reported),
                          'brate': self.bytes_processed /
                          (time.time() - reported)
                      })
                reported = time.time()
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
        elapsed = time.time() - begin
        self.logger.info(
            _('Object audit "%(mode)s" mode completed: %(elapsed).02fs. '
              'Total files/sec: %(frate).2f , '
              'Total bytes/sec: %(brate).2f ') % {
                  'mode': mode,
                  'elapsed': elapsed,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed
              })

    def object_audit(self, path, device, partition):
        """
        Audits the given object path

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            if not path.endswith('.data'):
                return
            try:
                name = object_server.read_metadata(path)['name']
            except Exception, exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = object_server.DiskFile(self.devices,
                                        device,
                                        partition,
                                        account,
                                        container,
                                        obj,
                                        keep_data_fp=True)
            if df.data_file is None:
                # file is deleted, we found the tombstone
                return
            if os.path.getsize(df.data_file) != \
                    int(df.metadata['Content-Length']):
                raise AuditException('Content-Length of %s does not match '
                                     'file size of %s' %
                                     (int(df.metadata['Content-Length']),
                                      os.path.getsize(df.data_file)))
            etag = md5()
            for chunk in df:
                self.bytes_running_time = ratelimit_sleep(
                    self.bytes_running_time,
                    self.max_bytes_per_second,
                    incr_by=len(chunk))
                etag.update(chunk)
                self.bytes_processed += len(chunk)
                self.total_bytes_processed += len(chunk)
            etag = etag.hexdigest()
            if etag != df.metadata['ETag']:
                raise AuditException("ETag of %s does not match file's md5 of "
                                     "%s" % (df.metadata['ETag'], etag))
        except AuditException, err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and will '
                  'be quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
            invalidate_hash(os.path.dirname(path))
            renamer_path = os.path.dirname(path)
            renamer(
                renamer_path,
                os.path.join(self.devices, device, 'quarantined', 'objects',
                             os.path.basename(renamer_path)))
            return
Exemplo n.º 16
0
class AuditorWorker(object):
    """Walk through file system to audit object"""
    def __init__(self, conf, zero_byte_only_at_fps=0):
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-auditor')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
            TRUE_VALUES
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            conf.get('bytes_per_second', 10000000))
        self.auditor_type = 'ALL'
        self.zero_byte_only_at_fps = zero_byte_only_at_fps
        if self.zero_byte_only_at_fps:
            self.max_files_per_second = float(self.zero_byte_only_at_fps)
            self.auditor_type = 'ZBF'
        self.log_time = int(conf.get('log_time', 3600))
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0

    def audit_all_objects(self, mode='once'):
        self.logger.info(
            _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type)))
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        files_running_time = 0
        all_locs = audit_location_generator(self.devices,
                                            object_server.DATADIR,
                                            mount_check=self.mount_check,
                                            logger=self.logger)
        for path, device, partition in all_locs:
            self.object_audit(path, device, partition)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            if time.time() - reported >= self.log_time:
                self.logger.info(
                    _('Object audit (%(type)s). '
                      'Since %(start_time)s: Locally: %(passes)d passed, '
                      '%(quars)d quarantined, %(errors)d errors '
                      'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % {
                          'type': self.auditor_type,
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (time.time() - reported),
                          'brate': self.bytes_processed /
                          (time.time() - reported)
                      })
                reported = time.time()
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
        elapsed = time.time() - begin
        self.logger.info(
            _('Object audit (%(type)s) "%(mode)s" mode '
              'completed: %(elapsed).02fs. '
              'Total files/sec: %(frate).2f , '
              'Total bytes/sec: %(brate).2f ') % {
                  'type': self.auditor_type,
                  'mode': mode,
                  'elapsed': elapsed,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed
              })

    def object_audit(self, path, device, partition):
        """
        Audits the given object path.

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            if not path.endswith('.data'):
                return
            try:
                name = object_server.read_metadata(path)['name']
            except Exception, exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = object_server.DiskFile(self.devices,
                                        device,
                                        partition,
                                        account,
                                        container,
                                        obj,
                                        keep_data_fp=True)
            if df.data_file is None:
                # file is deleted, we found the tombstone
                return
            obj_size = os.path.getsize(df.data_file)
            if obj_size != int(df.metadata['Content-Length']):
                raise AuditException('Content-Length of %s does not match '
                                     'file size of %s' %
                                     (int(df.metadata['Content-Length']),
                                      os.path.getsize(df.data_file)))
            if self.zero_byte_only_at_fps and obj_size:
                return
            etag = md5()
            for chunk in df:
                self.bytes_running_time = ratelimit_sleep(
                    self.bytes_running_time,
                    self.max_bytes_per_second,
                    incr_by=len(chunk))
                etag.update(chunk)
                self.bytes_processed += len(chunk)
                self.total_bytes_processed += len(chunk)
            etag = etag.hexdigest()
            if etag != df.metadata['ETag']:
                raise AuditException("ETag of %s does not match file's md5 of "
                                     "%s" % (df.metadata['ETag'], etag))
        except AuditException, err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and will '
                  'be quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
            object_dir = os.path.dirname(path)
            invalidate_hash(os.path.dirname(object_dir))
            renamer_path = os.path.dirname(path)
            to_path = os.path.join(self.devices, device, 'quarantined',
                                   'objects', os.path.basename(renamer_path))
            try:
                renamer(renamer_path, to_path)
            except OSError, e:
                if e.errno == errno.EEXIST:
                    to_path = "%s-%s" % (to_path, uuid.uuid4().hex)
                    renamer(renamer_path, to_path)