Пример #1
0
 def __init__(self, conf, logger, volume, input_file=None, **kwargs):
     # pylint: disable=no-member
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.namespace = conf['namespace']
     self.volume = volume
     self.input_file = input_file
     self.concurrency = int_value(conf.get('concurrency'),
                                  self.DEFAULT_CONCURRENCY)
     self.success = True
     # exit gracefully
     self.running = True
     signal.signal(signal.SIGINT, self.exit_gracefully)
     signal.signal(signal.SIGTERM, self.exit_gracefully)
     # counters
     self.lock_counters = threading.Lock()
     self.items_processed = 0
     self.errors = 0
     self.total_items_processed = 0
     self.total_errors = 0
     # report
     self.lock_report = threading.Lock()
     self.start_time = 0
     self.last_report = 0
     self.report_interval = int_value(conf.get('report_interval'),
                                      self.DEFAULT_REPORT_INTERVAL)
Пример #2
0
    def list_containers(self,
                        account_id,
                        limit=1000,
                        marker=None,
                        end_marker=None,
                        prefix=None,
                        delimiter=None,
                        s3_buckets_only=False):
        raw_list, _next_marker = self._raw_listing(
            self.clistkey(account_id),
            limit=limit,
            marker=marker,
            end_marker=end_marker,
            prefix=prefix,
            delimiter=delimiter,
            s3_buckets_only=s3_buckets_only)
        pipeline = self.conn_slave.pipeline(True)
        # skip prefix
        for container in [entry for entry in raw_list if not entry[3]]:
            pipeline.hmget(AccountBackend.ckey(account_id, container[0]),
                           'objects', 'bytes', 'mtime')
        res = pipeline.execute()

        i = 0
        for container in raw_list:
            if not container[3]:
                # FIXME(adu) Convert to dict
                container[1] = int_value(res[i][0], 0)
                container[2] = int_value(res[i][1], 0)
                container[4] = float_value(res[i][2], 0.0)
                i += 1

        return raw_list
Пример #3
0
    def sanitize_config(cls, job_config):
        """
            Validate and sanitize the job configuration
            Ex: cast a string as integer, set a default
            Also return the lock id if there is one
        """
        sanitized_job_config = dict()

        tasks_per_second = int_value(job_config.get('tasks_per_second'),
                                     cls.DEFAULT_TASKS_PER_SECOND)
        sanitized_job_config['tasks_per_second'] = tasks_per_second

        tasks_batch_size = int_value(job_config.get('tasks_batch_size'), None)
        if tasks_batch_size is None:
            if tasks_per_second > 0:
                tasks_batch_size = min(tasks_per_second,
                                       cls.MAX_TASKS_BATCH_SIZE)
            else:
                tasks_batch_size = cls.MAX_TASKS_BATCH_SIZE
        elif tasks_batch_size < 1:
            raise ValueError('Tasks batch size should be positive')
        elif tasks_batch_size > cls.MAX_TASKS_BATCH_SIZE:
            raise ValueError('Tasks batch size should be less than %d' %
                             cls.MAX_TASKS_BATCH_SIZE)
        sanitized_job_config['tasks_batch_size'] = tasks_batch_size

        sanitized_job_params, lock = cls.sanitize_params(
            job_config.get('params') or dict())
        sanitized_job_config['params'] = sanitized_job_params

        return sanitized_job_config, lock
Пример #4
0
    def init(self):
        self.concurrency = int_value(self.conf.get('concurrency'), 10)
        self.tube = self.conf.get("tube", DEFAULT_TUBE)
        acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 3600)
        self.app_env['account_client'] = AccountClient(
            self.conf,
            logger=self.logger,
            refresh_delay=acct_refresh_interval,
            pool_connections=3,  # 1 account, 1 proxy, 1 extra
        )
        self.app_env['rdir_client'] = RdirClient(
            self.conf,
            logger=self.logger,
            pool_maxsize=self.concurrency,  # 1 cnx per greenthread per host
        )

        if 'handlers_conf' not in self.conf:
            raise ValueError("'handlers_conf' path not defined in conf")
        self.handlers = loadhandlers(self.conf.get('handlers_conf'),
                                     global_conf=self.conf,
                                     app=self)

        for opt in ('acct_update', 'rdir_update', 'retries_per_second',
                    'batch_size'):
            if opt in self.conf:
                self.logger.warn('Deprecated option: %s', opt)

        super(EventWorker, self).init()
Пример #5
0
    def take_action(self, parsed_args):
        from oio.common.easy_value import convert_size
        self.log.debug('take_action(%s)', parsed_args)

        account = self.app.client_manager.account
        self.take_action_container(parsed_args)
        # The command is named 'show' but we must call
        # container_get_properties() because container_show() does
        # not return system properties (and we need them).
        data = self.app.client_manager.storage.container_get_properties(
            account,
            parsed_args.container,
            cid=parsed_args.cid,
            admin_mode=True)
        sys = data['system']
        ctime = float(sys[M2_PROP_CTIME]) / 1000000.
        bytes_usage = sys.get(M2_PROP_USAGE, 0)
        objects = sys.get(M2_PROP_OBJECTS, 0)
        damaged_objects = sys.get(M2_PROP_DAMAGED_OBJECTS, 0)
        missing_chunks = sys.get(M2_PROP_MISSING_CHUNKS, 0)
        if parsed_args.formatter == 'table':
            ctime = int(ctime)
            bytes_usage = convert_size(int(bytes_usage), unit="B")
            objects = convert_size(int(objects))
        info = {
            'account': sys['sys.account'],
            'base_name': sys['sys.name'],
            'container': sys['sys.user.name'],
            'ctime': ctime,
            'bytes_usage': bytes_usage,
            'quota': sys.get(M2_PROP_QUOTA, "Namespace default"),
            'objects': objects,
            'damaged_objects': damaged_objects,
            'missing_chunks': missing_chunks,
            'storage_policy': sys.get(M2_PROP_STORAGE_POLICY,
                                      "Namespace default"),
            'max_versions': sys.get(M2_PROP_VERSIONING_POLICY,
                                    "Namespace default"),
            'status': OIO_DB_STATUS_NAME.get(sys.get('sys.status'), "Unknown"),
        }

        for k in ('stats.page_count', 'stats.freelist_count',
                  'stats.page_size'):
            info[k] = sys.get(k)
        wasted = (float_value(info['stats.freelist_count'], 0) /
                  float_value(info['stats.page_count'], 1))
        wasted_bytes = (int_value(info['stats.freelist_count'], 0) *
                        int_value(info['stats.page_size'], 0))
        info['stats.space_wasted'] = "%5.2f%% (est. %s)" % \
            (wasted * 100, convert_size(wasted_bytes))

        bucket = sys.get(M2_PROP_BUCKET_NAME, None)
        if bucket is not None:
            info['bucket'] = bucket
        delete_exceeding = sys.get(M2_PROP_DEL_EXC_VERSIONS, None)
        if delete_exceeding is not None:
            info['delete_exceeding_versions'] = delete_exceeding != '0'
        for k, v in iteritems(data['properties']):
            info['meta.' + k] = v
        return list(zip(*sorted(info.items())))
Пример #6
0
 def __init__(self,
              conf,
              logger,
              volume,
              input_file=None,
              try_chunk_delete=False,
              beanstalkd_addr=None):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.volume = volume
     self.run_time = 0
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_chunks_processed = 0
     self.dry_run = true_value(conf.get('dry_run', False))
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                           10000000)
     self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
     self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
     self.input_file = input_file
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.content_factory = ContentFactory(conf)
     self.try_chunk_delete = try_chunk_delete
     self.beanstalkd_addr = beanstalkd_addr
     self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild')
     self.beanstalk = None
Пример #7
0
    def list_containers(self,
                        account_id,
                        limit=1000,
                        marker=None,
                        end_marker=None,
                        prefix=None,
                        delimiter=None):
        raw_list = self._raw_listing(account_id,
                                     limit=limit,
                                     marker=marker,
                                     end_marker=end_marker,
                                     prefix=prefix,
                                     delimiter=delimiter)
        pipeline = self.conn.pipeline(True)
        # skip prefix
        for container in [entry for entry in raw_list if not entry[3]]:
            pipeline.hmget(AccountBackend.ckey(account_id, container[0]),
                           'objects', 'bytes', 'mtime')
        res = pipeline.execute()

        i = 0
        for container in raw_list:
            if not container[3]:
                container[1] = int_value(res[i][0], 0)
                container[2] = int_value(res[i][1], 0)
                container[4] = float_value(res[i][2], 0.0)
                i += 1

        return raw_list
Пример #8
0
 def __init__(self, conf, logger, volume):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.volume = volume
     self.run_time = 0
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.last_usage_check = 0
     self.chunks_run_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_chunks_processed = 0
     self.usage_target = int_value(conf.get('usage_target'), 0)
     self.usage_check_interval = int_value(conf.get('usage_check_interval'),
                                           3600)
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                           10000000)
     self.blob_client = BlobClient()
     self.container_client = ContainerClient(conf, logger=self.logger)
     self.content_factory = ContentFactory(conf)
Пример #9
0
 def __init__(self, conf, logger=None, **kwargs):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise ConfigurationException('No volume specified for converter')
     self.volume = volume
     self.namespace, self.volume_id = check_volume(self.volume)
     # cache
     self.name_by_cid = CacheDict()
     self.content_id_by_name = CacheDict()
     # client
     self.container_client = ContainerClient(conf, **kwargs)
     self.content_factory = ContentFactory(conf,
                                           self.container_client,
                                           logger=self.logger)
     # stats/logs
     self.errors = 0
     self.passes = 0
     self.total_chunks_processed = 0
     self.start_time = 0
     self.last_reported = 0
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     # speed
     self.chunks_run_time = 0
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     # backup
     self.no_backup = true_value(conf.get('no_backup', False))
     self.backup_dir = conf.get('backup_dir') or tempfile.gettempdir()
     self.backup_name = 'backup_%s_%f' \
         % (self.volume_id, time.time())
     # dry run
     self.dry_run = true_value(conf.get('dry_run', False))
Пример #10
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.successes = 0
     self.last_reported = 0
     self.total_since_last_reported = 0
     self.chunks_run_time = 0
     self.interval = int_value(
         conf.get('interval'), 300)
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     pm = get_pool_manager(pool_connections=10)
     self.index_client = RdirClient(conf, logger=self.logger,
                                    pool_manager=pm)
     self.namespace, self.volume_id = check_volume(self.volume)
     self.convert_chunks = true_value(conf.get('convert_chunks'))
     if self.convert_chunks:
         converter_conf = self.conf.copy()
         converter_conf['no_backup'] = True
         self.converter = BlobConverter(converter_conf, logger=self.logger,
                                        pool_manager=pm)
     else:
         self.converter = None
Пример #11
0
    def sanitize_params(cls, job_params):
        sanitized_job_params, _ = super(RawxDecommissionJob,
                                        cls).sanitize_params(job_params)

        # specific configuration
        service_id = job_params.get('service_id')
        if not service_id:
            raise ValueError('Missing service ID')
        sanitized_job_params['service_id'] = service_id

        sanitized_job_params['rawx_timeout'] = float_value(
            job_params.get('rawx_timeout'), cls.DEFAULT_RAWX_TIMEOUT)

        sanitized_job_params['min_chunk_size'] = int_value(
            job_params.get('min_chunk_size'), cls.DEFAULT_MIN_CHUNK_SIZE)

        sanitized_job_params['max_chunk_size'] = int_value(
            job_params.get('max_chunk_size'), cls.DEFAULT_MAX_CHUNK_SIZE)

        excluded_rawx = job_params.get('excluded_rawx')
        if excluded_rawx:
            excluded_rawx = excluded_rawx.split(',')
        else:
            excluded_rawx = list()
        sanitized_job_params['excluded_rawx'] = excluded_rawx

        sanitized_job_params['usage_target'] = int_value(
            job_params.get('usage_target'), cls.DEFAULT_USAGE_TARGET)

        sanitized_job_params['usage_check_interval'] = float_value(
            job_params.get('usage_check_interval'),
            cls.DEFAULT_USAGE_CHECK_INTERVAL)

        return sanitized_job_params, 'rawx/%s' % service_id
Пример #12
0
    def __init__(self, conf, logger, volume, container_ids):
        self.conf = conf
        self.logger = logger
        self.volume = volume
        self.volume_ns, self.volume_id = check_volume(self.volume)
        self.container_ids = container_ids or list()
        self.container_ids = [
            container_id.upper() for container_id in self.container_ids
        ]

        self.namespace = self.conf['namespace']
        if self.namespace != self.volume_ns:
            raise ValueError(
                'Namespace (%s) mismatch with volume namespace (%s)',
                self.namespace, self.volume_ns)

        # action
        self.action_name = self.conf['action'].lower()
        if (self.action_name == 'insert'):
            self.action = self._insert_bean
        elif (self.action_name == 'update'):
            self.action = self._update_bean
        elif (self.action_name == 'check'):
            self.action = self._check_bean
        else:
            raise ValueError('Unknown action (%s)', self.action_name)

        # speed
        self.chunks_run_time = 0
        self.max_chunks_per_second = int_value(
            self.conf.get('chunks_per_second'), self.DEFAULT_CHUNKS_PER_SECOND)

        # counters
        self.chunks_processed = 0
        self.chunk_errors = 0
        self.beans_processed = dict()
        self.bean_successes = dict()
        self.bean_already_exists = dict()
        self.bean_orphans = dict()
        self.bean_errors = dict()
        for bean_type in self.BEAN_TYPES:
            self.beans_processed[bean_type] = 0
            self.bean_successes[bean_type] = 0
            self.bean_already_exists[bean_type] = 0
            self.bean_orphans[bean_type] = 0
            self.bean_errors[bean_type] = 0

        # report
        self.start_time = 0
        self.last_report = 0
        self.report_interval = int_value(conf.get('report_interval'),
                                         self.DEFAULT_REPORT_INTERVAL)

        self.client = ContainerClient({'namespace': self.namespace},
                                      logger=self.logger)
        self.ctime = int(time.time())
Пример #13
0
    def __init__(self, conf, service, **kwargs):
        self.conf = conf
        self.running = False

        for k in ['host', 'port', 'type']:
            if k not in service:
                raise Exception('Missing field "%s" in service configuration' %
                                k)
        self.name = '%s|%s|%s' % \
            (service['type'], service['host'], service['port'])

        self.service = service

        self.rise = int_value(self._load_item_config('rise'), 1)
        self.fall = int_value(self._load_item_config('fall'), 1)
        self.check_interval = float_value(
            self._load_item_config('check_interval'), 1)
        self.deregister_on_exit = true_value(
            self._load_item_config('deregister_on_exit', False))

        self.logger = get_logger(self.conf)
        self.pool_manager = get_pool_manager()
        self.cs = ConscienceClient(self.conf,
                                   pool_manager=self.pool_manager,
                                   logger=self.logger)
        # FIXME: explain that
        self.client = ProxyClient(self.conf,
                                  pool_manager=self.pool_manager,
                                  no_ns_in_url=True,
                                  logger=self.logger)
        self.last_status = False
        self.status = False
        self.failed = False
        self.service_definition = {
            'ns': self.conf['namespace'],
            'type': self.service['type'],
            'addr': '%s:%s' % (self.service['host'], self.service['port']),
            'score': 0,
            'tags': {}
        }
        if self.service.get('slots', None):
            self.service_definition['tags']['tag.slots'] = \
                    ','.join(self.service['slots'])
        for name, tag in (('location', 'tag.loc'),
                          ('service_id', 'tag.service_id'), ('tls',
                                                             'tag.tls')):
            if self.service.get(name):
                self.service_definition['tags'][tag] = \
                    self.service[name]

        self.service_checks = list()
        self.service_stats = list()
        self.init_checkers(service)
        self.init_stats(service)
Пример #14
0
 def __init__(self, conf, logger, **kwargs):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.items_run_time = 0
     self.total_items_processed = 0
     self.waiting_time = 0
     self.rebuilder_time = 0
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_items_per_second = int_value(conf.get('items_per_second'), 30)
Пример #15
0
    def __init__(self, volume_path, conf, pool_manager=None):
        """
        Initializes an Indexing worker for indexing meta2 databases.

        Possible values of conf relating to this worker are:
        - interval: (int) in sec time between two full scans. Default: half an
                    hour.
        - report_interval: (int) in sec, time between two reports: Default: 300
        - scanned_per_second: (int) maximum number of indexed databases /s.
        - try_removing_faulty_indexes : In the event where we encounter a
            database that's not supposed to be handled by this volume, attempt
            to remove it from this volume rdir index if it exists
            WARNING: The decision is based off of a proxy response, that could
            be affected by cache inconsistencies for example, use at your own
            risk. Default: False

        :param volume_path: The volume path to be indexed
        :param conf: The configuration to be passed to the needed services
        :param pool_manager: A connection pool manager. If none is given, a
                new one with a default size of 10 will be created.
        """
        self.logger = get_logger(conf)
        self._stop = False
        self.volume = volume_path
        self.success_nb = 0
        self.failed_nb = 0
        self.full_scan_nb = 0
        self.last_report_time = 0
        self.last_scan_time = 0
        self.last_index_time = 0
        self.start_time = 0
        self.indexed_since_last_report = 0
        self.scans_interval = int_value(
            conf.get('interval'), 1800)
        self.report_interval = int_value(
            conf.get('report_interval'), 300)
        self.max_indexed_per_second = int_value(
            conf.get('scanned_per_second'), 3000)
        self.namespace, self.volume_id = check_volume_for_service_type(
            self.volume, "meta2")
        self.attempt_bad_index_removal = boolean_value(
            conf.get('try_removing_faulty_indexes', False)
        )

        if not pool_manager:
            pool_manager = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pool_manager)
        self.dir_client = DirectoryClient(conf, logger=self.logger,
                                          pool_manager=pool_manager)
Пример #16
0
    def sanitize_params(cls, job_params):
        sanitized_job_params, _ = super(TesterJob,
                                        cls).sanitize_params(job_params)

        sanitized_job_params['start'] = int_value(job_params.get('start'),
                                                  cls.DEFAULT_START)

        sanitized_job_params['end'] = int_value(job_params.get('end'),
                                                cls.DEFAULT_END)

        sanitized_job_params['error_percentage'] = int_value(
            job_params.get('error_percentage'), cls.DEFAULT_ERROR_PERCENTAGE)

        return sanitized_job_params, job_params.get('lock', cls.DEFAULT_LOCK)
Пример #17
0
 def __init__(self,
              conf,
              logger,
              volume,
              try_chunk_delete=False,
              beanstalkd_addr=None,
              **kwargs):
     super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs)
     # rdir
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
     # rawx
     self.try_chunk_delete = try_chunk_delete
     # beanstalk
     if beanstalkd_addr:
         self.beanstalkd_listener = BeanstalkdListener(
             beanstalkd_addr,
             conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE),
             self.logger, **kwargs)
     else:
         self.beanstalkd_listener = None
     # counters
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_expected_chunks = None
     # distributed
     self.distributed = False
Пример #18
0
    def on_job_list(self, req):
        limit = int_value(req.args.get('limit'), None)
        marker = req.args.get('marker')

        job_infos = self.backend.list_jobs(limit=limit, marker=marker)
        return Response(
            json.dumps(job_infos), mimetype='application/json')
Пример #19
0
    def __init__(self, conf, beanstalkd_addr=None, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(self.conf)
        self.namespace = conf['namespace']
        self.success = True

        # counters
        self.items_processed = 0
        self.total_items_processed = 0
        self.errors = 0
        self.total_errors = 0
        self.total_expected_items = None

        # report
        self.start_time = 0
        self.last_report = 0
        self.report_interval = int_value(self.conf.get('report_interval'),
                                         self.DEFAULT_REPORT_INTERVAL)

        # dispatcher
        self.dispatcher = None

        # input
        self.beanstalkd = None
        if beanstalkd_addr:
            self.beanstalkd = BeanstalkdListener(
                beanstalkd_addr,
                self.conf.get('beanstalkd_worker_tube')
                or self.DEFAULT_BEANSTALKD_WORKER_TUBE, self.logger)
Пример #20
0
    def __init__(self, conf, tool):
        super(_LocalDispatcher, self).__init__(conf, tool)

        nb_workers = int_value(self.conf.get('workers'),
                               self.tool.DEFAULT_WORKERS)
        self.max_items_per_second = int_value(
            self.conf.get('items_per_second'),
            self.tool.DEFAULT_ITEM_PER_SECOND)
        self.queue_workers = eventlet.Queue(nb_workers * 2)
        self.queue_reply = eventlet.Queue()

        self.workers = list()
        for _ in range(nb_workers):
            worker = self.tool.create_worker(self.queue_workers,
                                             self.queue_reply)
            self.workers.append(worker)
Пример #21
0
    def run(self):
        coros = []
        queue_url = self.conf.get('queue_url', 'beanstalk://127.0.0.1:11300')
        concurrency = int_value(self.conf.get('concurrency'), 10)

        server_gt = greenthread.getcurrent()

        for url in queue_url.split(';'):
            for i in range(concurrency):
                beanstalk = Beanstalk.from_url(url)
                gt = eventlet.spawn(self.handle, beanstalk)
                gt.link(_eventlet_stop, server_gt, beanstalk)
                coros.append(gt)
                beanstalk, gt = None, None

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                [c.kill(StopServe()) for c in coros]
                [c.wait() for c in coros]
        except Timeout as te:
            if te != t:
                raise
            [c.kill() for c in coros]
Пример #22
0
    def on_account_containers(self, req):
        account_id = self._get_account_id(req)

        info = self.backend.info_account(account_id)
        if not info:
            return NotFound('Account not found')

        marker = req.args.get('marker', '')
        end_marker = req.args.get('end_marker', '')
        prefix = req.args.get('prefix', '')
        limit = int(req.args.get('limit', '1000'))
        limit = max(0, min(ACCOUNT_LISTING_MAX_LIMIT, int_value(
            req.args.get('limit'), 0)))
        if limit <= 0:
            limit = ACCOUNT_LISTING_DEFAULT_LIMIT
        delimiter = req.args.get('delimiter', '')
        s3_buckets_only = true_value(req.args.get('s3_buckets_only', False))

        user_list = self.backend.list_containers(
            account_id, limit=limit, marker=marker, end_marker=end_marker,
            prefix=prefix, delimiter=delimiter,
            s3_buckets_only=s3_buckets_only)

        info['listing'] = user_list
        # TODO(FVE): add "truncated" entry telling if the listing is truncated
        result = json.dumps(info)
        return Response(result, mimetype='text/json')
Пример #23
0
    def rebuild(self):
        pile = GreenPile(len(self.meta_chunk))

        nb_data = self.storage_method.ec_nb_data

        headers = {}
        for chunk in self.meta_chunk:
            pile.spawn(self._get_response, chunk, headers)

        # Sort all responses according to the chunk size
        total_resps = 0
        resps_by_size = dict()
        resps_without_chunk_size = list()
        for resp in pile:
            if not resp:
                continue
            chunk_size = int_value(
                resp.getheader(CHUNK_HEADERS['chunk_size'], None), None)
            if chunk_size is None:
                self.logger.warning('Missing chunk size')
                resps_without_chunk_size.append(resp)
                continue
            total_resps += 1
            resps_by_size.setdefault(chunk_size, list()).append(resp)
        # Select the chunk with the majority chunk size
        resps = None
        max_resps = 0
        assumed_chunk_size = None
        for chunk_size, resps in resps_by_size.items():
            nb_resp = len(resps)
            if nb_resp > max_resps:
                max_resps = nb_resp
                assumed_chunk_size = chunk_size
        if assumed_chunk_size is None:
            self.logger.warning(
                'No chunk available with chunk size information')
            resps = list()
        else:
            resps = resps_by_size[assumed_chunk_size]
            if max_resps != total_resps:
                self.logger.warning(
                    '%d/%d chunks are not the same size as others (%d), '
                    'they should be removed',
                    total_resps - max_resps, total_resps, assumed_chunk_size)
        # Check the number of chunks available
        if max_resps < nb_data:
            # Add the chunks without size information
            # assuming they are the correct size
            resps = resps + resps_without_chunk_size
            if len(resps) < nb_data:
                self.logger.error(
                    'Unable to read enough valid sources to rebuild')
                raise exceptions.UnrecoverableContent(
                    'Not enough valid sources to rebuild')
            self.logger.warning(
                'Use chunk(s) without size information to rebuild a chunk')

        rebuild_iter = self._make_rebuild_iter(resps[:nb_data])
        return assumed_chunk_size, rebuild_iter
Пример #24
0
    def __init__(self, conf, beanstalkd_addr=None, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(self.conf)
        self.namespace = conf['namespace']
        self.success = True

        # exit gracefully
        self.running = True
        signal.signal(signal.SIGINT, self.exit_gracefully)
        signal.signal(signal.SIGTERM, self.exit_gracefully)

        # counters
        self.items_processed = 0
        self.total_items_processed = 0
        self.errors = 0
        self.total_errors = 0
        self.total_expected_items = None

        # report
        self.start_time = 0
        self.last_report = 0
        self.report_interval = int_value(self.conf.get(
            'report_interval'), self.DEFAULT_REPORT_INTERVAL)

        # dispatcher
        self.dispatcher = None

        # input
        self.beanstalkd = None
        if beanstalkd_addr:
            self.beanstalkd = BeanstalkdListener(
                beanstalkd_addr,
                self.conf.get('beanstalkd_worker_tube')
                or self.DEFAULT_BEANSTALKD_WORKER_TUBE,
                self.logger)

        # retry
        self.retryer = None
        self.retry_queue = None
        if self.beanstalkd:
            self.retryer = BeanstalkdSender(
                self.beanstalkd.addr, self.beanstalkd.tube, self.logger)
            self.retry_queue = eventlet.Queue()
        self.retry_delay = int_value(self.conf.get('retry_delay'),
                                     self.DEFAULT_RETRY_DELAY)
Пример #25
0
 def __init__(self, conf, logger, volume, input_file=None, **kwargs):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.namespace = conf['namespace']
     self.volume = volume
     self.input_file = input_file
     self.nworkers = int_value(conf.get('workers'), 1)
     # counters
     self.lock_counters = threading.Lock()
     self.items_processed = 0
     self.errors = 0
     self.total_items_processed = 0
     self.total_errors = 0
     # report
     self.lock_report = threading.Lock()
     self.start_time = 0
     self.last_report = 0
     self.report_interval = int_value(conf.get('report_interval'), 3600)
Пример #26
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.successes = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.interval = int_value(conf.get('interval'), 300)
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     self.index_client = RdirClient(conf, logger=self.logger)
     self.namespace, self.volume_id = check_volume(self.volume)
Пример #27
0
 def __init__(self, rebuilder, **kwargs):
     self.rebuilder = rebuilder
     self.conf = rebuilder.conf
     self.logger = rebuilder.logger
     self.namespace = rebuilder.namespace
     self.volume = rebuilder.volume
     self.items_run_time = 0
     self.max_items_per_second = int_value(
         rebuilder.conf.get('items_per_second'), 30)
Пример #28
0
    def list_buckets(self,
                     account_id,
                     limit=1000,
                     marker=None,
                     end_marker=None,
                     prefix=None,
                     **kwargs):
        """
        Get the list of buckets of the specified account.

        :returns: the list of buckets (with metadata), and the next
            marker (in case the list is truncated).
        """
        raw_list, next_marker = self._raw_listing(self.blistkey(account_id),
                                                  limit=limit,
                                                  marker=marker,
                                                  end_marker=end_marker,
                                                  prefix=prefix,
                                                  **kwargs)
        conn = self.get_slave_conn(**kwargs)
        pipeline = conn.pipeline(True)
        for entry in raw_list:
            # For real buckets (not prefixes), fetch metadata.
            if not entry[3]:
                pipeline.hmget(self.bkey(entry[0]), 'objects', 'bytes',
                               'mtime')
        res = pipeline.execute()

        output = list()
        i = 0
        for bucket in raw_list:
            if not bucket[3]:
                bdict = {
                    'name': bucket[0],
                    'objects': int_value(res[i][0], 0),
                    'bytes': int_value(res[i][1], 0),
                    'mtime': float_value(res[i][2], 0.0),
                }
                i += 1
            else:
                bdict = {'prefix': bucket}
            output.append(bdict)

        return output, next_marker
Пример #29
0
 def __init__(self, conf, logger, beanstalkd_addr, **kwargs):
     super(BlobImprover, self).__init__(conf, logger, volume=None, **kwargs)
     self.content_factory = ContentFactory(self.conf, logger=self.logger)
     beanstalkd_tube = self.conf.get('beanstalkd_tube',
                                     DEFAULT_IMPROVER_TUBE)
     self.listener = BeanstalkdListener(beanstalkd_addr, beanstalkd_tube,
                                        self.logger, **kwargs)
     self.sender = BeanstalkdSender(beanstalkd_addr, beanstalkd_tube,
                                    self.logger, **kwargs)
     self.retry_delay = int_value(self.conf.get('retry_delay'), 30)
     self.reqid_prefix = 'blob-impr-'
Пример #30
0
    def __init__(self, conf, input_file=None, service_id=None, **kwargs):
        super(Meta2Rebuilder, self).__init__(conf, **kwargs)

        # input
        self.input_file = input_file
        self.meta2_id = service_id

        # rawx/rdir
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'),
                                          self.DEFAULT_RDIR_FETCH_LIMIT)