Пример #1
0
 def __init__(self, config, in_memory=False):
     if not in_memory:
         self._engine = sqlalchemy.create_engine(
             config.get('metadataBackend.engine', types=str))
     else:
         logger.info('Running in metadata-backend-less mode.')
         self._engine = sqlalchemy.create_engine('sqlite://')
Пример #2
0
 def nbd(self, bind_address: str, bind_port: str, read_only: bool) -> None:
     with Benji(self.config) as benji_obj:
         store = BenjiStore(benji_obj)
         addr = (bind_address, bind_port)
         server = NbdServer(addr, store, read_only)
         logger.info("Starting to serve NBD on %s:%s" % (addr[0], addr[1]))
         server.serve_forever()
Пример #3
0
    def nbd_client(self, version_uid):
        self.subprocess_run(args=[
            'sudo', 'nbd-client', '127.0.0.1', '-p',
            str(self.SERVER_PORT), '-l'
        ],
                            success_regexp='^Negotiation: ..\n{}\n$'.format(
                                version_uid[0].v_string))

        version_uid, size = version_uid
        self.subprocess_run(
            args=[
                'sudo', 'nbd-client', '-N', version_uid.v_string, '127.0.0.1',
                '-p',
                str(self.SERVER_PORT), self.NBD_DEVICE
            ],
            success_regexp=
            '^Negotiation: ..size = \d+MB\nbs=1024, sz=\d+ bytes\n$|^Negotiation: ..size = \d+MB|Connected /dev/nbd\d+$'
        )

        count = 0
        nbd_data = bytearray()
        with open(self.NBD_DEVICE, 'rb') as f:
            while True:
                data = f.read(64 * 1024 + random.randint(0, 8192))
                if not data:
                    break
                count += len(data)
                nbd_data += data
        self.assertEqual(size, count)

        image_data = self.read_file(self.testpath.path + '/image')
        logger.info('image_data size {}, nbd_data size {}'.format(
            len(image_data), len(nbd_data)))
        self.assertEqual(image_data, bytes(nbd_data))

        f = os.open(self.NBD_DEVICE, os.O_RDWR)
        for offset in range(0, size, 4096):
            os.lseek(f, offset, os.SEEK_SET)
            data = self.random_bytes(4096)
            written = os.write(f, data)
            os.fsync(f)
            self.assertEqual(len(data), written)
            # Discard cache so that the read request below really goes to the NBD server
            os.posix_fadvise(f, offset, len(data), os.POSIX_FADV_DONTNEED)

            os.lseek(f, offset, os.SEEK_SET)
            read_data = os.read(f, 4096)
            self.assertEqual(data, read_data)
        os.close(f)

        self.subprocess_run(args=['sudo', 'nbd-client', '-d', self.NBD_DEVICE],
                            success_regexp='^disconnect, sock, done\n$')

        # Signal NBD server to stop
        self.nbd_server.stop()
Пример #4
0
 def rest_api(self, bind_address: str, bind_port: int,
              threads: int) -> None:
     from benji.restapi import RestAPI
     api = RestAPI(self.config)
     logger.info(
         f'Starting REST API via gunicorn on {bind_address}:{bind_port}.')
     debug = bool(logger.isEnabledFor(logging.DEBUG))
     api.run(bind_address=bind_address,
             bind_port=bind_port,
             threads=threads,
             debug=debug)
Пример #5
0
 def nbd(self, bind_address, bind_port, read_only):
     benji_obj = None
     try:
         benji_obj = Benji(self.config)
         store = BenjiStore(benji_obj)
         addr = (bind_address, bind_port)
         server = NbdServer(addr, store, read_only)
         logger.info("Starting to serve nbd on %s:%s" % (addr[0], addr[1]))
         server.serve_forever()
     finally:
         if benji_obj:
             benji_obj.close()
Пример #6
0
    def backup(self, version_name: str, snapshot_name: str, source: str,
               rbd_hints: str, base_version_uid: str, block_size: int,
               labels: List[str], storage) -> None:
        # Validate version_name and snapshot_name
        if not InputValidation.is_backup_name(version_name):
            raise benji.exception.UsageError(
                'Version name {} is invalid.'.format(version_name))
        if not InputValidation.is_snapshot_name(snapshot_name):
            raise benji.exception.UsageError(
                'Snapshot name {} is invalid.'.format(snapshot_name))
        base_version_uid_obj = VersionUid(
            base_version_uid) if base_version_uid else None
        if labels:
            label_add, label_remove = self._parse_labels(labels)
            if label_remove:
                raise benji.exception.UsageError(
                    'Wanting to delete labels on a new version is senseless.')
        benji_obj = None
        try:
            benji_obj = Benji(self.config, block_size=block_size)
            hints = None
            if rbd_hints:
                data = ''.join(
                    [line for line in fileinput.input(rbd_hints).readline()])
                hints = hints_from_rbd_diff(data)
            backup_version = benji_obj.backup(version_name, snapshot_name,
                                              source, hints,
                                              base_version_uid_obj, storage)

            if labels:
                for key, value in label_add:
                    benji_obj.add_label(backup_version.uid, key, value)
                for key in label_remove:
                    benji_obj.rm_label(backup_version.uid, key)
                if label_add:
                    logger.info('Added label(s) to version {}: {}.'.format(
                        backup_version.uid.v_string, ', '.join([
                            '{}={}'.format(name, value)
                            for name, value in label_add
                        ])))
                if label_remove:
                    logger.info('Removed label(s) from version {}: {}.'.format(
                        backup_version.uid.v_string, ', '.join(label_remove)))

            if self.machine_output:
                benji_obj.export_any({'versions': [backup_version]},
                                     sys.stdout,
                                     ignore_relationships=[((Version, ),
                                                            ('blocks', ))])
        finally:
            if benji_obj:
                benji_obj.close()
Пример #7
0
 def version_info(self) -> None:
     if not self.machine_output:
         logger.info('Benji version: {}.'.format(__version__))
         logger.info('Configuration version: {}, supported {}.'.format(VERSIONS.configuration.current,
                                                                       VERSIONS.configuration.supported))
         logger.info('Metadata version: {}, supported {}.'.format(VERSIONS.database_metadata.current,
                                                                  VERSIONS.database_metadata.supported))
         logger.info('Object metadata version: {}, supported {}.'.format(VERSIONS.object_metadata.current,
                                                                         VERSIONS.object_metadata.supported))
     else:
         result = {
             'version': __version__,
             'configuration_version': {
                 'current': str(VERSIONS.configuration.current),
                 'supported': str(VERSIONS.configuration.supported)
             },
             'database_metadata_version': {
                 'current': str(VERSIONS.database_metadata.current),
                 'supported': str(VERSIONS.database_metadata.supported)
             },
             'object_metadata_version': {
                 'current': str(VERSIONS.object_metadata.current),
                 'supported': str(VERSIONS.object_metadata.supported)
             },
         }
         print(json.dumps(result, indent=4))
Пример #8
0
    def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict) -> None:
        self._name = name
        self._active_transforms: List[TransformBase] = []

        active_transforms = Config.get_from_dict(module_configuration, 'activeTransforms', None, types=list)
        if active_transforms is not None:
            for transform in active_transforms:
                self._active_transforms.append(TransformFactory.get_by_name(transform))
            logger.info('Active transforms for storage {}: {}.'.format(
                name,
                ', '.join('{} ({})'.format(transform.name, transform.module) for transform in self._active_transforms)))

        simultaneous_writes = Config.get_from_dict(module_configuration, 'simultaneousWrites', types=int)
        simultaneous_reads = Config.get_from_dict(module_configuration, 'simultaneousReads', types=int)
        simultaneous_removals = Config.get_from_dict(module_configuration, 'simultaneousRemovals', types=int)
        bandwidth_read = Config.get_from_dict(module_configuration, 'bandwidthRead', types=int)
        bandwidth_write = Config.get_from_dict(module_configuration, 'bandwidthWrite', types=int)

        self._consistency_check_writes = Config.get_from_dict(module_configuration,
                                                              'consistencyCheckWrites',
                                                              False,
                                                              types=bool)

        hmac_key_encoded = Config.get_from_dict(module_configuration, 'hmac.key', None, types=str)
        hmac_key: Optional[bytes] = None
        if hmac_key_encoded is None:
            hmac_password = Config.get_from_dict(module_configuration, 'hmac.password', None, types=str)
            if hmac_password is not None:
                hmac_kdf_salt = base64.b64decode(Config.get_from_dict(module_configuration, 'hmac.kdfSalt', types=str))
                hmac_kdf_iterations = Config.get_from_dict(module_configuration, 'hmac.kdfIterations', types=int)
                hmac_key = derive_key(salt=hmac_kdf_salt,
                                      iterations=hmac_kdf_iterations,
                                      key_length=32,
                                      password=hmac_password)
        else:
            hmac_key = base64.b64decode(hmac_key_encoded)
        self._dict_hmac: Optional[DictHMAC] = None
        if hmac_key is not None:
            logger.info('Enabling HMAC object metadata integrity protection for storage {}.'.format(name))
            self._dict_hmac = DictHMAC(hmac_key=self._HMAC_KEY, secret_key=hmac_key)

        self.read_throttling = TokenBucket()
        self.read_throttling.set_rate(bandwidth_read)  # 0 disables throttling
        self.write_throttling = TokenBucket()
        self.write_throttling.set_rate(bandwidth_write)  # 0 disables throttling

        self._read_executor = JobExecutor(name='Storage-Read', workers=simultaneous_reads, blocking_submit=False)
        self._write_executor = JobExecutor(name='Storage-Write', workers=simultaneous_writes, blocking_submit=True)
        self._remove_executor = JobExecutor(name='Storage-Remove', workers=simultaneous_removals, blocking_submit=True)
Пример #9
0
 def label(self, version_uid: str, labels: List[str]) -> None:
     version_uid_obj = VersionUid(version_uid)
     label_add, label_remove = InputValidation.parse_and_validate_labels(
         labels)
     with Benji(self.config) as benji_obj:
         for name, value in label_add:
             benji_obj.add_label(version_uid_obj, name, value)
         for name in label_remove:
             benji_obj.rm_label(version_uid_obj, name)
         if label_add:
             logger.info('Added label(s) to version {}: {}.'.format(
                 version_uid_obj, ', '.join('{}={}'.format(name, value)
                                            for name, value in label_add)))
         if label_remove:
             logger.info('Removed label(s) from version {}: {}.'.format(
                 version_uid_obj, ', '.join(label_remove)))
Пример #10
0
    def backup(self, version_uid: str, volume: str, snapshot: str, source: str,
               rbd_hints: str, base_version_uid: str, block_size: int,
               labels: List[str], storage: str) -> None:
        if version_uid is None:
            version_uid = '{}-{}'.format(volume[:248], random_string(6))
        version_uid_obj = VersionUid(version_uid)
        base_version_uid_obj = VersionUid(
            base_version_uid) if base_version_uid else None

        if labels:
            label_add, label_remove = InputValidation.parse_and_validate_labels(
                labels)
        with Benji(self.config) as benji_obj:
            hints = None
            if rbd_hints:
                logger.debug(f'Loading RBD hints from file {rbd_hints}.')
                with open(rbd_hints, 'r') as f:
                    hints = hints_from_rbd_diff(f.read())
            backup_version = benji_obj.backup(
                version_uid=version_uid_obj,
                volume=volume,
                snapshot=snapshot,
                source=source,
                hints=hints,
                base_version_uid=base_version_uid_obj,
                storage_name=storage,
                block_size=block_size)

            if labels:
                for key, value in label_add:
                    benji_obj.add_label(backup_version.uid, key, value)
                for key in label_remove:
                    benji_obj.rm_label(backup_version.uid, key)
                if label_add:
                    logger.info('Added label(s) to version {}: {}.'.format(
                        backup_version.uid,
                        ', '.join('{}={}'.format(name, value)
                                  for name, value in label_add)))
                if label_remove:
                    logger.info('Removed label(s) from version {}: {}.'.format(
                        backup_version.uid, ', '.join(label_remove)))

            if self.machine_output:
                benji_obj.export_any({'versions': [backup_version]},
                                     sys.stdout,
                                     ignore_relationships=(((Version, ),
                                                            ('blocks', )), ))
Пример #11
0
    def task_with_blocks(self,
                         task: str,
                         *,
                         version_uid: str,
                         blocks_done: int,
                         blocks_count: int,
                         per_thousand: int = 1000) -> None:

        log_every_blocks = max(
            1, blocks_count // max(1, int(1000 / per_thousand)))
        if per_thousand == 1000 or blocks_done % log_every_blocks == 0 or blocks_done == 1 or blocks_done == blocks_count:
            message = '{} {}/{} blocks ({:.1f}%)'.format(
                task, blocks_done, blocks_count,
                blocks_done / blocks_count * 100)

            logger.info(message)
            self._setproctitle('{} - {}'.format(message, version_uid))
Пример #12
0
    def test(self):
        benji_obj = self.benjiOpen()
        store = BenjiStore(benji_obj)
        addr = ('127.0.0.1', self.SERVER_PORT)
        read_only = False
        self.nbd_server = NbdServer(addr, store, read_only)
        logger.info("Starting to serve NBD on %s:%s" % (addr[0], addr[1]))

        self.subprocess_run(args=['sudo', 'modprobe', 'nbd'])

        self.nbd_client_thread = threading.Thread(target=self.nbd_client, daemon=True, args=(self.version_uid,))
        self.nbd_client_thread.start()
        self.nbd_server.serve_forever()
        self.nbd_client_thread.join()

        self.assertEqual({self.version_uid[0], VersionUid(2)}, set([version.uid for version in benji_obj.ls()]))

        benji_obj.close()
Пример #13
0
 def set_version(self, version_uid, *, valid=None, protected=None):
     try:
         version = self.get_version(version_uid)
         if valid is not None:
             version.valid = valid
         if protected is not None:
             version.protected = protected
         self._session.commit()
         if valid is not None:
             logger_func = logger.info if valid else logger.error
             logger_func('Marked version {} as {}.'.format(
                 version_uid.readable, 'valid' if valid else 'invalid'))
         if protected is not None:
             logger.info('Marked version {} as {}.'.format(
                 version_uid.readable,
                 'protected' if protected else 'unprotected'))
     except:
         self._session.rollback()
         raise
Пример #14
0
 def label(self, version_uid: str, labels: List[str]) -> None:
     version_uid_obj = VersionUid(version_uid)
     label_add, label_remove = InputValidation.parse_and_validate_labels(labels)
     benji_obj = None
     try:
         benji_obj = Benji(self.config)
         for name, value in label_add:
             benji_obj.add_label(version_uid_obj, name, value)
         for name in label_remove:
             benji_obj.rm_label(version_uid_obj, name)
         if label_add:
             logger.info('Added label(s) to version {}: {}.'.format(
                 version_uid_obj.v_string, ', '.join(['{}={}'.format(name, value) for name, value in label_add])))
         if label_remove:
             logger.info('Removed label(s) from version {}: {}.'.format(version_uid_obj.v_string,
                                                                        ', '.join(label_remove)))
     finally:
         if benji_obj:
             benji_obj.close()
Пример #15
0
    def _log_compression_statistics(self):
        if self.active_compression is None or self._compression_statistics[
                'objects_considered'] == 0:
            return

        overall_ratio, ratio = 0.0, 0.0
        if self._compression_statistics['data_out'] > 0:
            overall_ratio = self._compression_statistics[
                'data_in'] / self._compression_statistics['data_out']

        if self._compression_statistics['data_out_compression'] > 0:
            ratio = self._compression_statistics['data_in_compression'] \
                    / self._compression_statistics['data_out_compression']

        tbl = PrettyTable()
        tbl.field_names = [
            'Objects considered', 'Objects compressed', 'Data in', 'Data out',
            'Overall compression ratio', 'Data input to compression',
            'Data output from compression', 'Compression ratio'
        ]
        tbl.align['Objects considered'] = 'r'
        tbl.align['Objects compressed'] = 'r'
        tbl.align['Data in'] = 'r'
        tbl.align['Data out'] = 'r'
        tbl.align['Overall compression ratio'] = 'r'
        tbl.align['Data input to compression'] = 'r'
        tbl.align['Data output from compression'] = 'r'
        tbl.align['Compression ratio'] = 'r'
        tbl.add_row([
            self._compression_statistics['objects_considered'],
            self._compression_statistics['objects_compressed'],
            self._compression_statistics['data_in'],
            self._compression_statistics['data_out'],
            '{:.2f}'.format(overall_ratio),
            self._compression_statistics['data_in_compression'],
            self._compression_statistics['data_out_compression'],
            '{:.2f}'.format(ratio)
        ])
        logger.info('Compression statistics:  \n' +
                    textwrap.indent(str(tbl), '          '))
Пример #16
0
 def task_with_version(self, task: str, *, version_uid: str) -> None:
     logger.info(task)
     self._setproctitle('{} - {}'.format(task, version_uid))
Пример #17
0
 def task(self, task: str):
     logger.info(task)
     self._setproctitle(task)
Пример #18
0
 def _bulk_scrub(self, method, names, tags, version_percentage,
                 block_percentage):
     if version_percentage:
         version_percentage = int(version_percentage)
     if block_percentage:
         block_percentage = int(block_percentage)
     history = BlockUidHistory()
     benji_obj = None
     try:
         benji_obj = Benji(self.config)
         versions = []
         if names:
             for name in names:
                 versions.extend(
                     benji_obj.ls(version_name=name, version_tags=tags))
         else:
             versions.extend(benji_obj.ls(version_tags=tags))
         errors = []
         if version_percentage and versions:
             # Will always scrub at least one matching version
             versions = random.sample(
                 versions,
                 max(1, int(len(versions) * version_percentage / 100)))
         if not versions:
             logger.info('No matching versions found.')
         for version in versions:
             try:
                 logging.info('Scrubbing version {} with name {}.'.format(
                     version.uid.readable, version.name))
                 getattr(benji_obj,
                         method)(version.uid,
                                 block_percentage=block_percentage,
                                 history=history)
             except benji.exception.ScrubbingError as exception:
                 logger.error(exception)
                 errors.append(version)
             except:
                 raise
         if errors:
             if self.machine_output:
                 benji_obj.export_any(
                     {
                         'versions': [
                             benji_obj.ls(version_uid=version.uid)[0]
                             for version in versions
                         ],
                         'errors': [
                             benji_obj.ls(version_uid=version.uid)[0]
                             for version in errors
                         ]
                     },
                     sys.stdout,
                     ignore_relationships=[((Version, ), ('blocks', ))])
             raise benji.exception.ScrubbingError(
                 'One or more version had scrubbing errors: {}.'.format(
                     ', '.join([version.uid.readable
                                for version in errors])))
         else:
             if self.machine_output:
                 benji_obj.export_any(
                     {
                         'versions': [
                             benji_obj.ls(version_uid=version.uid)[0]
                             for version in versions
                         ],
                         'errors': []
                     },
                     sys.stdout,
                     ignore_relationships=[((Version, ), ('blocks', ))])
     finally:
         if benji_obj:
             benji_obj.close()
Пример #19
0
    def filter(self, versions):
        # Category labels without latest
        categories = [
            category for category in self.rules.keys() if category != 'latest'
        ]

        for category in categories:
            setattr(self, '_{}_dict'.format(category), defaultdict(list))

        # Make our own copy
        versions = list(versions)
        # Sort from youngest to oldest
        versions.sort(key=lambda version: version.date.timestamp(),
                      reverse=True)

        # Remove latest versions from consideration if configured
        if 'latest' in self.rules:
            logger.debug('Keeping {} latest versions.'.format(
                self.rules['latest']))
            del versions[:self.rules['latest']]

        dismissed_versions = []
        for version in versions:
            if version.protected:
                logger.info(
                    'Not considering version {}, it is protected.'.format(
                        version.uid.readable))
                continue

            try:
                td = _Timedelta(version.date.timestamp(), self.reference_time)
            except _TimedeltaError as exception:
                # Err on the safe side, ignore this versions (i.e. it won't be dismissed)
                logger.warning('Version {}: {}'.format(version.uid.readable,
                                                       exception))
                continue

            logger.debug(
                'Time and time delta for version {} are {} and {}.'.format(
                    version.uid.readable, version.date, td))

            for category in categories:
                timecount = getattr(td, category)
                if timecount <= self.rules[category]:
                    logger.debug(
                        'Found matching category {}, timecount {}.'.format(
                            category, timecount))
                    getattr(
                        self,
                        '_{}_dict'.format(category))[timecount].append(version)
                    break
            else:
                # For loop did not break: The item doesn't fit into any category,
                # it's too old
                dismissed_versions.append(version)
                logger.debug(
                    'Dismissing version, it doesn\'t fit into any category.')

        for category in categories:
            category_dict = getattr(self, '_{}_dict'.format(category))
            for timecount in category_dict:
                # Keep the oldest of each category, reject the rest
                dismissed_versions.extend(category_dict[timecount][:-1])

        return dismissed_versions
Пример #20
0
    def __init__(self, config):
        self.encryption = {}
        self.compression = {}
        self.active_encryption = None
        self.active_compression = None

        encryption_modules = config.get('dataBackend.encryption',
                                        None,
                                        types=list)
        if encryption_modules is not None:
            for encryption_module_dict in encryption_modules:
                type = config.get_from_dict(encryption_module_dict,
                                            'type',
                                            types=str)
                identifier = config.get_from_dict(encryption_module_dict,
                                                  'identifier',
                                                  types=str)
                materials = config.get_from_dict(encryption_module_dict,
                                                 'materials',
                                                 types=dict)
                try:
                    encryption_module = importlib.import_module('{}.{}'.format(
                        self._ENCRYPTION_PACKAGE_PREFIX, type))
                except ImportError:
                    raise ConfigurationError(
                        'Module file {}.{} not found or related import error.'.
                        format(self._ENCRYPTION_PACKAGE_PREFIX, type))
                else:
                    if type != encryption_module.Encryption.NAME:
                        raise InternalError(
                            'Encryption module type and name don\'t agree ({} != {}).'
                            .format(type, encryption_module.Encryption.NAME))

                    self.encryption[identifier] = encryption_module.Encryption(
                        identifier=identifier, materials=materials)

        active_encryption = config.get(
            'dataBackend.{}.activeEncryption'.format(self.NAME),
            None,
            types=str)
        if active_encryption is not None:
            if self.encryption and active_encryption in self.encryption:
                logger.info(
                    'Encryption is enabled for the {} data backend.'.format(
                        self.NAME))
                self.active_encryption = self.encryption[active_encryption]
            else:
                raise ConfigurationError(
                    'Encryption identifier {} is unknown.'.format(
                        active_encryption))

        compression_modules = config.get('dataBackend.compression',
                                         None,
                                         types=list)
        if compression_modules is not None:
            for compression_module_dict in compression_modules:
                type = config.get_from_dict(compression_module_dict,
                                            'type',
                                            types=str)
                materials = config.get_from_dict(compression_module_dict,
                                                 'materials',
                                                 None,
                                                 types=dict)
                try:
                    compression_module = importlib.import_module(
                        '{}.{}'.format(self._COMPRESSION_PACKAGE_PREFIX, type))
                except ImportError:
                    raise ConfigurationError(
                        'Module file {}.{} not found or related import error.'.
                        format(self._COMPRESSION_PACKAGE_PREFIX, type))
                else:
                    if type != compression_module.Compression.NAME:
                        raise InternalError(
                            'Compression module type and name don\'t agree ({} != {}).'
                            .format(type, compression_module.Compression.NAME))

                    self.compression[type] = compression_module.Compression(
                        materials=materials)

        active_compression = config.get(
            'dataBackend.{}.activeCompression'.format(self.NAME),
            None,
            types=str)
        if active_compression is not None:
            if self.compression and active_compression in self.compression:
                logger.info(
                    'Compression is enabled for the {} data backend.'.format(
                        self.NAME))
                self.active_compression = self.compression[active_compression]
            else:
                raise ConfigurationError(
                    'Compression type {} is unknown.'.format(
                        active_compression))

        simultaneous_writes = config.get('dataBackend.simultaneousWrites',
                                         types=int)
        simultaneous_reads = config.get('dataBackend.simultaneousReads',
                                        types=int)
        bandwidth_read = config.get('dataBackend.bandwidthRead', types=int)
        bandwidth_write = config.get('dataBackend.bandwidthWrite', types=int)

        self._consistency_check_writes = config.get(
            'dataBackend.consistencyCheckWrites'.format(self.NAME),
            False,
            types=bool)

        self._compression_statistics = {
            'objects_considered': 0,
            'objects_compressed': 0,
            'data_in': 0,
            'data_out': 0,
            'data_in_compression': 0,
            'data_out_compression': 0
        }

        self.read_throttling = TokenBucket()
        self.read_throttling.set_rate(bandwidth_read)  # 0 disables throttling
        self.write_throttling = TokenBucket()
        self.write_throttling.set_rate(
            bandwidth_write)  # 0 disables throttling

        self._read_executor = ThreadPoolExecutor(
            max_workers=simultaneous_reads,
            thread_name_prefix='DataBackend-Reader')
        self._read_futures = []
        self._read_semaphore = BoundedSemaphore(simultaneous_reads +
                                                self.READ_QUEUE_LENGTH)

        self._write_executor = ThreadPoolExecutor(
            max_workers=simultaneous_writes,
            thread_name_prefix='DataBackend-Writer')
        self._write_futures = []
        self._write_semaphore = BoundedSemaphore(simultaneous_writes +
                                                 self.WRITE_QUEUE_LENGTH)
Пример #21
0
    def get_delete_candidates(self, dt=3600):
        rounds = 0
        false_positives_count = 0
        hit_list_count = 0
        one_hour_ago = datetime.datetime.utcnow() - datetime.timedelta(
            seconds=dt)
        while True:
            # http://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator
            delete_candidates = self._session.query(DeletedBlock)\
                .filter(DeletedBlock.date < one_hour_ago)\
                .limit(250)\
                .all()
            if not delete_candidates:
                break

            false_positives = set()
            hit_list = set()
            for candidate in delete_candidates:
                rounds += 1
                if rounds % 1000 == 0:
                    logger.info(
                        "Cleanup-fast: {} false positives, {} data deletions.".
                        format(
                            false_positives_count,
                            hit_list_count,
                        ))

                block = self._session.query(Block)\
                    .filter(Block.uid == candidate.uid)\
                    .limit(1)\
                    .scalar()
                if block:
                    false_positives.add(candidate.uid)
                    false_positives_count += 1
                else:
                    hit_list.add(candidate.uid)
                    hit_list_count += 1

            if false_positives:
                logger.debug(
                    "Cleanup-fast: Removing {} false positive from delete candidates."
                    .format(len(false_positives)))
                self._session.query(DeletedBlock)\
                    .filter(DeletedBlock.uid.in_(false_positives))\
                    .delete(synchronize_session=False)

            if hit_list:
                logger.debug(
                    "Cleanup-fast: {} delete candidates will be really deleted."
                    .format(len(hit_list)))
                self._session.query(DeletedBlock).filter(
                    DeletedBlock.uid.in_(hit_list)).delete(
                        synchronize_session=False)
                yield (hit_list)

        self._session.commit()
        logger.info(
            "Cleanup-fast: Cleanup finished. {} false positives, {} data deletions."
            .format(
                false_positives_count,
                hit_list_count,
            ))