def load(self, json_mapping=None, swap_bytes=True, **kwargs): """ Load the mapping from the cluster, from a JSON string or from a dictionary. """ if isinstance(json_mapping, string_types): raw_mapping = json.loads(json_mapping) elif isinstance(json_mapping, dict): raw_mapping = json_mapping else: raw_mapping = self.m0.list(**kwargs) # pylint: disable=no-member for pfx, services_addrs in iteritems(raw_mapping): services = list() # FIXME: this is REALLY annoying # self.prefix_to_base() takes the beginning of the prefix, # but here we have to take the end, because meta0 does # some byte swapping. if swap_bytes: base = pfx[4 - self.digits:] else: base = pfx[:self.digits] for svc_addr in services_addrs: svc = self.services.get(svc_addr, {"addr": svc_addr}) services.append(svc) self.assign_services(base, services) # Deep copy the list self.raw_svc_by_base[base] = [str(x) for x in services_addrs]
def _test_service_decache_all(self, type_): all_svc = self.conscience.all_services(type_) output = self.openio_admin('%s decache' % type_ + self.get_format_opts('json')) decached = {s['Id'] for s in json.loads(output) if s['Status'] == 'OK'} expected = {s['id'] for s in all_svc} self.assertEqual(expected, decached)
def process_reply(self, beanstalkd_job_id, encoded_reply): reply = json.loads(encoded_reply) job_id = reply['job_id'] task_ids = reply['task_ids'] task_results = reply['task_results'] task_errors = reply['task_errors'] self.logger.debug('Tasks processed (job_id=%s): %s', job_id, task_ids) try: finished, exc = self.handle_backend_errors( self.backend.update_tasks_processed, job_id, task_ids, task_errors, task_results) if exc is None: if finished: self.logger.info('Job %s is finished', job_id) else: self.logger.warn( '[job_id=%s] Job has not been updated ' 'with the processed tasks: %s', job_id, exc) except Exception: self.logger.exception('Error processing reply') yield None
def handle_container_listing(self, req, start_response): resp = req.get_response(self.app) if not resp.is_success or resp.content_type != 'application/json': return resp(req.environ, start_response) if resp.content_length is None: return resp(req.environ, start_response) if resp.content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH: self.logger.warn( 'The content length (%d) of the listing is too long (max=%d)', resp.content_length, MAX_CONTAINER_LISTING_CONTENT_LENGTH) return resp(req.environ, start_response) try: listing = json.loads(resp.body) except ValueError: return resp(req.environ, start_response) for item in listing: if 'subdir' in item \ or item.get('content_type') == DELETE_MARKER_CONTENT_TYPE: continue etag, params = parse_header(item['hash']) if 'slo_etag' in params: item['slo_etag'] = '"%s"' % params.pop('slo_etag') item['hash'] = etag + ''.join('; %s=%s' % kv for kv in params.items()) resp.body = json.dumps(listing) return resp(req.environ, start_response)
def put_user_policy(self, account, user, policy, policy_name=''): """ Save an IAM policy for the specified user. :param policy: JSON-formatted string :type policy: str :param policy_name: name of the policy (empty string if not set) """ if not isinstance(policy, str): raise TypeError("policy parameter must be a string") if not policy_name and not self.allow_empty_policy_name: raise ValueError('policy name cannot be empty') if policy_name and not self.name_regex.fullmatch(policy_name): raise ValueError('policy name does not match %s' % (self.name_regex.pattern)) # XXX: we should also match user name, but unfortunately, when using # tempauth, user names have the ':' character between the project name # and the actual user name. try: policy_obj = json.loads(policy) policy_obj['UpdateDate'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Strip spaces and new lines policy = json.dumps(policy_obj, separators=(',', ':')) except ValueError as err: raise ValueError('policy is not JSON-formatted: %s' % err) acct_key = self.key_for_account(account) policy_key = self.subkey_for_policy(user, policy_name) self.redis.conn.hset(acct_key, policy_key, policy.encode('utf-8'))
def take_action(self, parsed_args): self.log.debug('take_action(%s)', parsed_args) super(SetObject, self).take_action(parsed_args) container = parsed_args.container cid = parsed_args.cid obj = parsed_args.object if parsed_args.auto: container = self.flatns_manager(obj) properties = parsed_args.property if parsed_args.tagging: try: tags = jsonlib.loads(parsed_args.tagging) if not isinstance(tags, dict): raise ValueError() except ValueError: from oio.common.exceptions import CommandError raise CommandError('--tags: Not a JSON object') tags_xml = '<Tagging><TagSet>' for k, v in tags.items(): tags_xml += '<Tag><Key>%s</Key><Value>%s</Value></Tag>' \ % (k, v) tags_xml += '</TagSet></Tagging>' properties = properties or dict() from oio.container.lifecycle import TAGGING_KEY properties[TAGGING_KEY] = tags_xml self.app.client_manager.storage.object_set_properties( self.app.client_manager.account, container, obj, properties, version=parsed_args.object_version, clear=parsed_args.clear, cid=cid)
def prepare(self, account, container): assert (self.req) if self.req.headers.get('range') is None: return rnge = ContainerBackup._extract_range(self.req, blocks=None) self._range = [rnge[2], rnge[3]] self.mode = self.MODE_RANGE data = self.redis.get("restore:%s:%s" % (account, container)) if self._range[0] == 0: if data: raise UnprocessableEntity( "A restoration has been already started") self.cur_state = { 'start': -1, 'end': -1, 'manifest': None, 'entry': None, # current entry in process # block offset when appending on existing object 'offset_block': 0, # block offset in data (w/o headers) when appending 'offset': 0} return if not data: raise UnprocessableEntity("First segment is not available") self.cur_state = json.loads(data, object_pairs_hook=OrderedDict) if self._range[0] != self.cur_state['end']: raise UnprocessableEntity( "Segment was already written " "or an error has occured previously") for entry in self.cur_state['manifest']: if self._range[0] > entry['end_block']: continue if self._range[0] == entry['start_block']: self.append = False self.cur_state['offset_block'] = 0 self.cur_state['offset'] = 0 break if self._range[0] >= entry['start_block'] \ + entry['hdr_blocks']: self.append = True self.cur_state['entry'] = entry self.inf = TarInfo() self.inf.name = entry['name'] offset = (self._range[0] - entry['start_block'] - entry['hdr_blocks']) self.cur_state['offset'] = offset * BLOCKSIZE self.inf.size = entry['size'] - offset * BLOCKSIZE self.inf.size = min(self.inf.size, self.req_size) self.cur_state['offset_block'] = (self._range[0] - entry['start_block']) break raise UnprocessableEntity('Header is broken')
def _test_service_decache_one(self, type_): all_svc = self.conscience.all_services(type_) one = all_svc[0] output = self.openio_admin('%s decache %s' % (type_, one['id']) + self.get_format_opts('json')) decached = {s['Id'] for s in json.loads(output) if s['Status'] == 'OK'} expected = set([one['id']]) self.assertEqual(expected, decached)
def _parse_stats_json(body): """Prefix each entry with 'stat.'""" body = json.loads(body) uuid = body.pop('uuid', None) res = {'stat.' + k: body[k] for k in body.keys()} if uuid: res['tag.uuid'] = uuid return res
def safe_decode_job(self, job_id, data): try: env = json.loads(data) env['job_id'] = job_id return env except Exception as exc: self.logger.warn('decoding job "%s"', str(exc.message)) return None
def _load_manifest(self, hdrs, account, container): assert not hdrs, "invalid sequence in TAR" manifest = json.loads(self.read(self.inf.size), object_pairs_hook=OrderedDict) self.cur_state['manifest'] = manifest if self.mode == self.MODE_RANGE: self.cur_state['last_block'] = max( [x['end_block'] for x in manifest]) + 1
def _parse_stats_json(body): """Prefix each entry with 'stat.'""" if isinstance(body, binary_type): body = body.decode('utf-8') body = json.loads(body) uuid = body.pop('uuid', None) res = {'stat.' + k: body[k] for k in body.keys()} if uuid: res['tag.uuid'] = uuid return res
def on_account_update(self, req): account_id = self._get_account_id(req) decoded = json.loads(req.get_data()) metadata = decoded.get('metadata') to_delete = decoded.get('to_delete') success = self.backend.update_account_metadata(account_id, metadata, to_delete) if success: return Response(status=204) return NotFound('Account not found')
def _chunks_from_event(self, job_id, data, **kwargs): decoded = json.loads(data) container_id = decoded['url']['id'] content_id = decoded['url']['content'] more = None reply = decoded.get('reply', None) if reply: more = {'reply': reply} for chunk_id_or_pos in decoded['data']['missing_chunks']: yield [container_id, content_id, str(chunk_id_or_pos), more]
def _take_action(self, parsed_args): import subprocess from oio.directory.meta0 import Meta0Client from oio.common.json import json self.logger.debug("Checking the directory bootstrap.") # Get an official dump from the proxy, check its size m0 = Meta0Client({"namespace": self.app.options.ns}) prefixes = m0.list() if len(prefixes) != CID_PREFIX_COUNT: raise ValueError('Found %d entries in meta0, expected %d' % (len(prefixes), CID_PREFIX_COUNT)) self.logger.info("The proxy serves a full meta0 dump.") # contact each M0 to perform a check: any "get" command will # fail if the meta0 is not complete. Unfortunately we just have # oio-meta0-client to target a specific service. for _, host, port, _ in self.filter_services(self.catalog, 'meta0'): url = '%s:%d' % (host, port) res = subprocess.check_output( ['oio-meta0-client', url, 'get', '0000']) self.logger.info(res) self.logger.info("All meta0 services are complete.") # contact each meta0 to check that all the dumps are identical dump0 = None first = None for _, host, port, _ in self.filter_services(self.catalog, 'meta0'): url = '%s:%d' % (host, port) dump = subprocess.check_output(['oio-meta0-client', url, 'list']) if dump0 is None: dump0 = dump first = url elif dump0 != dump: raise ValueError('The dump returned by meta0 %s differs from ' 'the dump returned by %s' % (url, first)) self.logger.info("All meta0 services serve the same base.") # Check all the meta1 are concerned reverse_dump = set() for _, v in iteritems(json.loads(dump0)): for url in v: reverse_dump.add(url) m1 = { ':'.join((descr[1], str(descr[2]))) for descr in self.filter_services(self.catalog, 'meta1') } if m1 != reverse_dump: raise ValueError('Meta1 used but not visible: %s, ' 'meta1 visible but not used: %s' % (reverse_dump - m1, m1 - reverse_dump)) self.logger.info("All meta1 services have been assigned.") yield ('OK', None)
def safe_decode_job(self, job_id, data): try: env = json.loads(data) env['job_id'] = job_id return env except json.JSONDecodeError as exc: self.logger.warn('Failed to decode job %s: %s', job_id, exc) return None except Exception: self.logger.exception('Failed to decode job %s', job_id) return None
def _rebuilt_chunk_from_event(self, job_id, data, **kwargs): decoded = json.loads(data) rebuilder_id = decoded.get('rebuilder_id') if rebuilder_id != self.rebuilder_id: raise ExplicitBury('Wrong rebuilder ID: %s (expected=%s)' % (rebuilder_id, self.rebuilder_id)) beanstalkd_addr = decoded['beanstalkd'] chunk = (decoded['cid'], decoded['content_id'], decoded['chunk_id_or_pos'], None) bytes_processed = decoded.get('bytes_processed', None) error = decoded.get('error', None) yield beanstalkd_addr, chunk, bytes_processed, error
def on_bucket_update(self, req): """ Update (or delete) bucket metadata. """ bname = self._get_item_id(req, what='bucket') decoded = json.loads(req.get_data()) metadata = decoded.get('metadata') to_delete = decoded.get('to_delete') info = self.backend.update_bucket_metadata(bname, metadata, to_delete) if info is not None: return Response(json.dumps(info), mimetype='text/json') return NotFound('Bucket not found')
def _event_from_job(self, job_id, data, **kwargs): """Decode a JSON string into an event dictionary.""" # pylint: disable=no-member event = json.loads(data) type_ = event.get('event') # Bury events that should not be there if type_ not in self.__class__.supported_events: msg = 'Discarding event %s (type=%s)' % (event.get('job_id'), type_) self.logger.info(msg) raise exceptions.ExplicitBury(msg) yield event
def _wait_for_event(self, timeout=REASONABLE_EVENT_DELAY): """ Wait for an event in the oio-improve tube. """ self.beanstalkd.watch(DEFAULT_IMPROVER_TUBE) try: job_id, data = self.beanstalkd.reserve(timeout=timeout) except ResponseError as exc: logging.warn('No event read from tube %s: %s', DEFAULT_IMPROVER_TUBE, exc) self.fail() self.beanstalkd.delete(job_id) return Event(json.loads(data))
def on_account_container_update(self, req): account_id = self._get_account_id(req) d = json.loads(req.get_data()) name = d.get('name') mtime = d.get('mtime') dtime = d.get('dtime') object_count = d.get('objects') bytes_used = d.get('bytes') # Exceptions are catched by dispatch_request info = self.backend.update_container(account_id, name, mtime, dtime, object_count, bytes_used) result = json.dumps(info) return Response(result)
def on_account_container_reset(self, req): account_id = self._get_account_id(req) data = json.loads(req.get_data()) name = data.get('name') mtime = data.get('mtime') dtime = None object_count = 0 bytes_used = 0 # Exceptions are catched by dispatch_request self.backend.update_container( account_id, name, mtime, dtime, object_count, bytes_used, autocreate_container=False) return Response(status=204)
def _do_put_head(self, req, account, container): results = self.redis.get("restore:%s:%s" % (account, container)) if not results: return UnprocessableEntity("No restoration in progress") in_progress = self.redis.get('restore:%s:%s:lock' % (account, container)) or '0' results = json.loads(results) blocks = sum(i['blocks'] for i in results['manifest']) return Response(headers={ 'X-Tar-Size': blocks * BLOCKSIZE, 'X-Consumed-Size': results['end'] * BLOCKSIZE, 'X-Upload-In-Progress': in_progress }, status=200)
def _unmarshal_job_info(marshalled_job_info): job_info = dict( job=dict(), orchestrator=dict(), tasks=dict(), errors=dict(), results=dict(), config=dict()) for key, value in marshalled_job_info.items(): split_key = key.decode('utf-8').split('.', 1) value = value.decode('utf-8') if len(split_key) == 1: job_info[split_key[0]] = value else: job_info[split_key[0]][split_key[1]] = value job_main_info = job_info['job'] job_main_info['ctime'] = float(job_main_info['ctime']) job_main_info['mtime'] = float(job_main_info['mtime']) job_main_info['request_pause'] = true_value( job_main_info['request_pause']) job_tasks = job_info['tasks'] job_tasks['sent'] = int(job_tasks['sent']) job_tasks.setdefault('last_sent') job_tasks['all_sent'] = true_value(job_tasks['all_sent']) job_tasks['processed'] = int(job_tasks['processed']) # To have a coherent total if the estimate was not correct if job_tasks['all_sent']: job_tasks['total'] = job_tasks['sent'] else: job_tasks['total'] = max(job_tasks['sent'], int(job_tasks['total'])) job_tasks['is_total_temp'] = true_value( job_tasks['is_total_temp']) job_tasks.setdefault('total_marker') job_errors = job_info['errors'] for key, value in job_errors.items(): job_errors[key] = int(value) job_results = job_info.get('results', dict()) for key, value in job_results.items(): job_results[key] = int(value) job_info['config'] = json.loads(job_info['config']) return job_info
def _get(self, success=True, timeout=2, event_id=None): path = '%s/%s/%s' % (self.acct, self.cnt_name, self.obj_name) start = time.time() while time.time() - start < timeout: res = self.pool.request('GET', 'http://127.0.0.1:9081/' + path) if success and res.status == 200: obj = json.loads(res.data) if not event_id or event_id < obj['eventId']: return res, obj if not success and res.status == 404: return res, None time.sleep(0.1) # fixme assert ("Timeout waiting webhook event")
def on_job_create(self, req): job_type = req.args.get('type') if not job_type: raise HTTPBadRequest('Missing job type') job_class = JOB_TYPES.get(job_type) if job_class is None: raise HTTPBadRequest('Unknown job type') job_config, lock = job_class.sanitize_config( json.loads(req.data or '{}')) job_id = self.backend.create(job_type, job_config, lock) job_info = self.backend.get_job_info(job_id) return Response( json.dumps(job_info), mimetype='application/json', status=202)
def load_json(self, json_mapping, **kwargs): """ Load the mapping from a JSON string """ if isinstance(json_mapping, string_types): raw_mapping = json.loads(json_mapping) elif isinstance(json_mapping, dict): raw_mapping = json_mapping else: raw_mapping = self.m0.list(**kwargs) # pylint: disable=no-member for pfx, services_addrs in iteritems(raw_mapping): base = pfx[:self.digits] self._learn(base, services_addrs)
def extract_chunk_qualities(properties, raw=False): """ Extract chunk quality information from a dictionary (or a list) of properties. :param properties: properties object. :param raw: False if `properties` is a dictionary, True if `properties` is a list of "raw" properties. """ if raw: properties = {x['key']: x['value'] for x in properties} qualities = { k[len(CHUNK_SYSMETA_PREFIX):]: json.loads(v) for k, v in properties.items() if k.startswith(CHUNK_SYSMETA_PREFIX) } return qualities
def on_account_container_update(self, req): account_id = self._get_account_id(req) data = json.loads(req.get_data()) name = data.get('name') mtime = data.get('mtime') dtime = data.get('dtime') object_count = data.get('objects') bytes_used = data.get('bytes') damaged_objects = data.get('damaged_objects') missing_chunks = data.get('missing_chunks') bucket_name = data.get('bucket') # can be None # Exceptions are catched by dispatch_request info = self.backend.update_container( account_id, name, mtime, dtime, object_count, bytes_used, damaged_objects, missing_chunks, bucket_name=bucket_name) result = json.dumps(info) return Response(result)
def _take_action(self, parsed_args): import subprocess from oio.directory.meta0 import Meta0Client from oio.common.json import json self.logger.debug("Checking the directory bootstrap.") # Get an official dump from the proxy, check its size m0 = Meta0Client({"namespace": self.app.options.ns}) prefixes = m0.list() assert len(prefixes) == 65536 self.logger.info("The proxy serves a full meta0 dump.") # contact each M0 to perform a check: any "get" command will # fail if the meta0 is not complete. Unfortunately we just have # oio-meta0-client to target a specific service. for t, i, p, s in self.filter_services(self.catalog, 'meta0'): url = '%s:%d' % (i, p) res = subprocess.check_output( ['oio-meta0-client', url, 'get', '0000']) self.logger.info(res) self.logger.info("All meta0 services are complete.") # contact each meta0 to check that all the dumps are identical dump0 = None for t, i, p, s in self.filter_services(self.catalog, 'meta0'): url = '%s:%d' % (i, p) dump = subprocess.check_output(['oio-meta0-client', url, 'list']) if dump0 is None: dump0 = dump else: assert dump0 == dump self.logger.info("All meta0 services serve the same base.") # Check all the meta1 are concerned reverse_dump = set() for _, v in iteritems(json.loads(dump0)): for url in v: reverse_dump.add(url) m1 = list(self.filter_services(self.catalog, 'meta1')) # FIXME(FVE): this check does not guarantee items are the same assert len(m1) == len(reverse_dump) self.logger.info("All meta1 services have been assigned.") yield ('OK', None)