def send_update(self): log.debug( f'pending updates: {len(self.update_mapping)}+{len(self.removals)}' ) try: with rados.WriteOpCtx() as write_op: keys = [] vals = [] dir_keys = list(self.update_mapping.keys())[0:MAX_UPDATE] # gather updates for dir_path in dir_keys: mapping = self.update_mapping.pop(dir_path) keys.append(UpdateDirMapRequest.omap_key(dir_path)) vals.append(pickle.dumps(mapping)) self.ioctx.set_omap(write_op, tuple(keys), tuple(vals)) # gather deletes slicept = MAX_UPDATE - len(dir_keys) removals = [ UpdateDirMapRequest.omap_key(dir_path) for dir_path in self.removals[0:slicept] ] self.removals = self.removals[slicept:] self.ioctx.remove_omap_keys(write_op, tuple(removals)) log.debug( f'applying {len(keys)} updates, {len(removals)} deletes') self.ioctx.operate_aio_write_op(write_op, MIRROR_OBJECT_NAME, oncomplete=self.handle_update) except rados.Error as e: log.error(f'UpdateDirMapRequest.send_update exception: {e}') self.finish(-e.args[0])
def update_instance_map(self): log.debug( f'pending updates: {len(self.instances_added)}+{len(self.instances_removed)}' ) try: with rados.WriteOpCtx() as write_op: keys = [] vals = [] instance_ids = list(self.instances_added.keys())[0:MAX_UPDATE] # gather updates for instance_id in instance_ids: data = self.instances_added.pop(instance_id) keys.append(UpdateInstanceRequest.omap_key(instance_id)) vals.append(pickle.dumps(data)) self.ioctx.set_omap(write_op, tuple(keys), tuple(vals)) # gather deletes slicept = MAX_UPDATE - len(instance_ids) removals = [UpdateInstanceRequest.omap_key(instance_id) \ for instance_id in self.instances_removed[0:slicept]] self.instances_removed = self.instances_removed[slicept:] self.ioctx.remove_omap_keys(write_op, tuple(removals)) log.debug( f'applying {len(keys)} updates, {len(removals)} deletes') self.ioctx.operate_aio_write_op(write_op, MIRROR_OBJECT_NAME, oncomplete=self.handle_update) except rados.Error as e: log.error( f'UpdateInstanceRequest.update_instance_map exception: {e}') self.finish(-e.args[0])
def put_device_metrics(self, ioctx, devid, data): old_key = datetime.utcnow() - timedelta( seconds=int(self.retention_period)) prune = old_key.strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s prune %s' % (devid, prune)) erase = [] try: with rados.ReadOpCtx() as op: omap_iter, ret = ioctx.get_omap_keys(op, "", 500) # fixme assert ret == 0 ioctx.operate_read_op(op, devid) for key, _ in list(omap_iter): if key >= prune: break erase.append(key) except rados.ObjectNotFound: # The object doesn't already exist, no problem. pass except rados.Error as e: # Do not proceed with writes if something unexpected # went wrong with the reads. self.log.exception("Error reading OMAP: {0}".format(e)) return key = datetime.utcnow().strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s key %s = %s, erase %s' % (devid, key, data, erase)) with rados.WriteOpCtx() as op: ioctx.set_omap(op, (key, ), (str(json.dumps(data)), )) if len(erase): ioctx.remove_omap_keys(op, tuple(erase)) ioctx.operate_write_op(op, devid)
def await_starting_gun(): if len(thread_id) == 0: return # skip this unless there are multiple processes running this test # if multiple threads write to the object, this is harmless # just ensuring that object exists before we update its omap thrd_id_bytes = bytes('%8s\n' % thread_id, 'utf-8') ioctx.write_full(threads_ready_obj, thrd_id_bytes) # ensure object exists before writing to omap ioctx.write_full(threads_done_obj, thrd_id_bytes) # ensure this object exists too # tell other threads that this thread has arrived at the starting gate with rados.WriteOpCtx() as op: ioctx.set_omap(op, (thread_id,), (b'',)) ioctx.operate_write_op(op, threads_ready_obj) # wait until all threads are ready to run poll_count=0 # calculate delay based on how long it takes to start up threads sleep_delay = max(threads_total/10.0, 2) while poll_count < poll_timeout: poll_count += 1 threads_ready = count_threads_in_omap(threads_ready_obj) if debug: print('threads_ready now %d' % threads_ready) if threads_ready >= threads_total: break if debug: print('waiting %f sec until next thread count check' % sleep_delay) time.sleep(sleep_delay) if poll_count >= poll_timeout: raise Exception('threads did not become ready within %d polls with interval %f' % (poll_timeout, sleep_delay)) if debug: print('thread %s saw starting gun fired' % thread_id) time.sleep(2) # give threads time to find out that starting gun has fired
def remove_task(self, ioctx, task, remove_in_memory=True): self.log.info("remove_task: task={}".format(str(task))) omap_keys = (task.sequence_key, ) try: with rados.WriteOpCtx() as write_op: ioctx.remove_omap_keys(write_op, omap_keys) ioctx.operate_write_op(write_op, RBD_TASK_OID) except rados.ObjectNotFound: pass if remove_in_memory: try: del self.tasks_by_id[task.task_id] del self.tasks_by_sequence[task.sequence] # keep a record of the last N tasks to help avoid command replay # races if not task.failed and not task.canceled: self.log.debug("remove_task: moving to completed tasks") self.completed_tasks.append(task) self.completed_tasks = self.completed_tasks[ -MAX_COMPLETED_TASKS:] except KeyError: pass
def add_task(self, ioctx, message, refs): self.log.debug("add_task: message={}, refs={}".format(message, refs)) # ensure unique uuid across all pools while True: task_id = str(uuid.uuid4()) if task_id not in self.tasks_by_id: break self.sequence += 1 task = Task(self.sequence, task_id, message, refs) # add the task to the rbd_task omap task_json = task.to_json() omap_keys = (task.sequence_key, ) omap_vals = (str.encode(task_json), ) self.log.info("adding task: {} {}".format(omap_keys[0], omap_vals[0])) with rados.WriteOpCtx() as write_op: ioctx.set_omap(write_op, omap_keys, omap_vals) ioctx.operate_write_op(write_op, RBD_TASK_OID) self.append_task(task) self.condition.notify() return task_json
def put_device_metrics(self, ioctx, devid, data): old_key = datetime.now() - timedelta( seconds=int(self.retention_period)) prune = old_key.strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s prune %s' % (devid, prune)) erase = [] try: with rados.ReadOpCtx() as op: iter, ret = ioctx.get_omap_keys(op, "", 500) # fixme assert ret == 0 ioctx.operate_read_op(op, devid) for key, _ in list(iter): if key >= prune: break erase.append(key) except: pass key = datetime.now().strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s key %s = %s, erase %s' % (devid, key, data, erase)) with rados.WriteOpCtx() as op: ioctx.set_omap(op, (key, ), (str(json.dumps(data)), )) if len(erase): ioctx.remove_omap_keys(op, tuple(erase)) ioctx.operate_write_op(op, devid)
def load_from_pool(self, ioctx: rados.Ioctx, namespace_validator: Optional[Callable], image_validator: Optional[Callable]) -> None: pool_id = ioctx.get_pool_id() pool_name = ioctx.get_pool_name() stale_keys = [] start_after = '' try: while True: with rados.ReadOpCtx() as read_op: self.handler.log.info( "load_schedules: {}, start_after={}".format( pool_name, start_after)) it, ret = ioctx.get_omap_vals(read_op, start_after, "", 128) ioctx.operate_read_op(read_op, self.handler.SCHEDULE_OID) it = list(it) for k, v in it: start_after = k v = v.decode() self.handler.log.info("load_schedule: {} {}".format( k, v)) try: try: level_spec = LevelSpec.from_id( self.handler, k, namespace_validator, image_validator) except ValueError: self.handler.log.debug( "Stale schedule key %s in pool %s", k, pool_name) stale_keys.append(k) continue self.level_specs[level_spec.id] = level_spec schedule = Schedule.from_json(level_spec.name, v) self.schedules[level_spec.id] = schedule except ValueError: self.handler.log.error( "Failed to decode schedule: pool={}, {} {}". format(pool_name, k, v)) if not it: break except StopIteration: pass except rados.ObjectNotFound: pass if stale_keys: with rados.WriteOpCtx() as write_op: ioctx.remove_omap_keys(write_op, stale_keys) ioctx.operate_write_op(write_op, self.handler.SCHEDULE_OID)
def addOmap(ioctx): if needed_count['name'] > 0 or needed_count['id'] > 0: with rados.WriteOpCtx() as write_op: ioctx.set_omap(write_op, tuple(keys), tuple(values)) write_op.set_flags(rados.LIBRADOS_OPERATION_SKIPRWLOCKS) ioctx.operate_write_op(write_op, "rbd_directory") print("Added {} name entries, and {} id entries.".format( needed_count['name'], needed_count['id'])) else: print("No missing entries.") ioctx.close()
def create_mirror_object(rados_inst, pool_id): log.info(f'creating mirror object: {MIRROR_OBJECT_NAME}') try: with rados_inst.open_ioctx2(pool_id) as ioctx: with rados.WriteOpCtx() as write_op: write_op.new(rados.LIBRADOS_CREATE_EXCLUSIVE) ioctx.operate_write_op(write_op, MIRROR_OBJECT_NAME) except rados.Error as e: if e.errno == errno.EEXIST: # be graceful return -e.errno log.error(f'failed to create mirror object: {e}') raise Exception(-e.args[0])
def put_device_metrics(self, ioctx: rados.Ioctx, devid: str, data: Any) -> None: assert devid old_key = datetime.utcnow() - timedelta( seconds=self.retention_period) prune = old_key.strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s prune %s' % (devid, prune)) erase = [] try: with rados.ReadOpCtx() as op: # FIXME omap_iter, ret = ioctx.get_omap_keys(op, "", MAX_SAMPLES) assert ret == 0 ioctx.operate_read_op(op, devid) for key, _ in list(omap_iter): if key >= prune: break erase.append(key) except rados.ObjectNotFound: # The object doesn't already exist, no problem. pass except rados.Error as e: # Do not proceed with writes if something unexpected # went wrong with the reads. self.log.exception("Error reading OMAP: {0}".format(e)) return key = datetime.utcnow().strftime(TIME_FORMAT) self.log.debug('put_device_metrics device %s key %s = %s, erase %s' % (devid, key, data, erase)) with rados.WriteOpCtx() as op: ioctx.set_omap(op, (key,), (str(json.dumps(data)),)) if len(erase): ioctx.remove_omap_keys(op, tuple(erase)) ioctx.operate_write_op(op, devid) # extract wear level? wear_level = get_ata_wear_level(data) if wear_level is None: wear_level = get_nvme_wear_level(data) dev_data = self.get(f"device {devid}") or {} if wear_level is not None: if dev_data.get(wear_level) != str(wear_level): dev_data["wear_level"] = str(wear_level) self.log.debug(f"updating {devid} wear level to {wear_level}") self.set_device_wear_level(devid, wear_level) else: if "wear_level" in dev_data: del dev_data["wear_level"] self.log.debug(f"removing {devid} wear level") self.set_device_wear_level(devid, -1.0)
async def rm(self, key: str) -> None: """Remove key from store""" logger.debug(f"Removing {key}") if self._ioctx: try: with rados.WriteOpCtx() as op: self._ioctx.remove_omap_keys(op, (key,)) self._ioctx.operate_write_op(op, "kvstore") # seems to succeed just fine even if the key doesn't exist except Exception as e: logger.exception(str(e)) if key in self._db: del self._db[key]
def load_pool_schedules(self, ioctx, schedules): pool_id = ioctx.get_pool_id() pool_name = ioctx.get_pool_name() stale_keys = () start_after = '' try: while True: with rados.ReadOpCtx() as read_op: self.log.info("load_schedules: {}, start_after={}".format( pool_name, start_after)) it, ret = ioctx.get_omap_vals(read_op, start_after, "", 128) ioctx.operate_read_op(read_op, SCHEDULE_OID) it = list(it) for k, v in it: start_after = k v = v.decode() self.log.info("load_schedule: {} {}".format(k, v)) try: try: level_spec = LevelSpec.from_id(self, k) except ValueError: self.log.debug( "Stail schedule key {} in pool".format( k, pool_name)) stale_keys += (k, ) continue schedule = Schedule.from_json(level_spec.name, v) schedules[k] = schedule except ValueError: self.log.error( "Failed to decode schedule: pool={}, {} {}". format(pool_name, k, v)) if not it: break except StopIteration: pass except rados.ObjectNotFound: # rbd_mirror_snapshot_schedule DNE pass if stale_keys: with rados.WriteOpCtx() as write_op: ioctx.remove_omap_keys(write_op, stale_keys) ioctx.operate_write_op(write_op, SCHEDULE_OID)
def save(self, level_spec, schedule): if level_spec.is_global(): schedule_cfg = schedule and schedule.to_json() or '' self.handler.module.set_localized_module_option( self.handler.MODULE_OPTION_NAME, schedule_cfg) return pool_id = level_spec.get_pool_id() with self.handler.module.rados.open_ioctx2(int(pool_id)) as ioctx: with rados.WriteOpCtx() as write_op: if schedule: ioctx.set_omap(write_op, (level_spec.id, ), (schedule.to_json(), )) else: ioctx.remove_omap_keys(write_op, (level_spec.id, )) ioctx.operate_write_op(write_op, self.handler.SCHEDULE_OID)
def init_config(self): try: ioctx = self._open_ioctx() except rados.ObjectNotFound: return False try: with rados.WriteOpCtx(ioctx) as op: # try to exclusively create the config object op.new(rados.LIBRADOS_CREATE_EXCLUSIVE) ioctx.operate_write_op(op, self.config_name) self.logger.debug("(init_config) created empty config object") except rados.ObjectExists: self.logger.debug("(init_config) using pre existing config object") ioctx.close() return True
def set_quota_value_by_key(key, value): """ """ try: with rados.Rados(conffile=_CLUSTER_CONFFILE) as cluster: pool_list = cluster.list_pools() meta_pool = [p for p in pool_list if META_POOL_KEY in p][0] with cluster.open_ioctx(meta_pool) as ioctx: with rados.WriteOpCtx(ioctx) as write_op: ret = ioctx.set_omap(write_op, (key,), (value,)) ioctx.operate_write_op(write_op, QUOTA_OBJ_NAME) except rados.ObjectNotFound: print('there is no quota object: {}'.format(QUOTA_OBJ_NAME)) raise e except rados.TimedOut: print('read omap timed out') raise e except Exception as e: print('caught exception with message: {}'.format(e))
def post_done(): if len(thread_id) == 0: return # skip if only 1 thread with rados.WriteOpCtx() as op: ioctx.set_omap(op, (thread_id,), (b'',)) ioctx.operate_write_op(op, threads_done_obj)
async def put(self, key: str, value: str) -> None: """Put key/value pair""" # Try to write to the kvstore in our pool, and also # write to our local cache (whether or not the write # to the pool succeeds) # Or: should we throw an exception if the write fails? # (or gets stuck for too long)? logger.debug(f"Put {key}: {value}") bvalue = value.encode("utf-8") if self._ioctx: try: # Note that current implementation lands in exception handler # with "RADOS rados state (You cannot perform that operation on a # Rados object in state configuring.)" in ... something (probably # the perform op), but we can short-circuit this if we use # the classes ioctx, and check if it's set first, before even # bothering to try anything. Same in every other method that # does something with an ioctx. # But we probably also need to look at a lock on it (because... # we *know* this is a problem, or we just imagine it's a problem?) with rados.WriteOpCtx() as op: self._ioctx.set_omap(op, (key,), (bvalue,)) logger.debug("Doing write op") # write op naturally gets stuck if cluster badly degraded, # need to do this asynchronously somehow...? # TODO: what happens if it never succeeds? The local # cache is newer than the cluster, then later, when # the cluster comes back, it will clobber the local # cache (this is where we're starting to need epochs # and probably re-inventing all sorts of cache and # filesystem logic that others have done before) # Possible solution: # - write async(?) # - write to local cache, but with "not known good" flag # - when async write completes, remove "not known good" flag # from local cache value # - if async write never completes (how to detect?) then # revert value to last known good (presupposes we're storing # last known good values somehow as part of this flag # business) # except that's still no good; ioctx.operate_aio_write_op() # *still* blocks if the cluster is sufficiently hosed - # following is test code to use to demonstrate this (it # hangs before either callback is hit) # def _write_complete(comp): # logger.debug(f"write complete {comp}") # def _write_safe(comp): # logger.debug(f"write safe {comp}") # ioctx.operate_aio_write_op(op, "kvstore", # _write_complete, # _write_safe) # Q: can we determine upfront whether a write will block? self._ioctx.operate_write_op(op, "kvstore") # This next notifies all watchers *INCLUDING* me! self._ioctx.notify("kvstore", key) except Exception as e: # e.g. RADOS state (You cannot perform that operation on a Rados object in state configuring.) logger.exception(str(e)) logger.debug(f"Writing {key}: {value} to local cache") self._db[key] = bvalue
def _cluster_connect(self) -> None: logger.debug("Starting cluster connection thread") logged_missing_config_file: bool = False while self._run: try: if not self._cluster: try: # uses /etc/ceph/ceph.client.admin.keyring # really should do separate keys per node so they can be # evicted if necessary if nodes are decommisioned self._cluster = rados.Rados( conffile="/etc/ceph/ceph.conf" ) logger.info("Got cluster handle") except rados.ObjectNotFound as e: if not logged_missing_config_file: logger.info( f"Can't get cluster handle: '{e}' - will keep retrying" ) logged_missing_config_file = True if self._cluster and self._cluster.state != "connected": # this can throw (auth failed, etc.) logger.info("Connecting to cluster") self._cluster.connect() logger.info("Cluster connected") has_aquarium_pool = "aquarium" in self._cluster.list_pools() if not has_aquarium_pool: logger.info("Creating aquarium pool") # TODO: consider setting pg_num 1 as with device_health_metrics pool self._cluster.create_pool("aquarium") self._ioctx = self._cluster.open_ioctx("aquarium") self._ioctx.application_enable("aquarium") # This actually seems to be safe (doesn't trash existing omap # data if present, which is neat) self._ioctx.write_full( "kvstore", "# aquarium kv store is in this object's omap\n".encode( "utf-8" ), ) # At this point, if it's a new pool, new object, etc. # we need to push everything from our local cache to # the omap on our kvstore, to populate it with whatever # may have been set pre-bootstrap. keys = self._db.keys() values = list(self._db[k] for k in keys) if keys and not has_aquarium_pool: try: with rados.WriteOpCtx() as op: # This is a neat trick to make sure we've got version 1 # of the kvstore object, which will only be the case with # a newly created object in a new pool. If the object # somehow already exists with a greater version, an # exception will be raised with errno set to ERANGE when # we try to perform the write op. I'm having an extremely # hard time seeing how this would be hit in normal operation # (it'd have to be a very bizarre race or bug somewhere), # but since we can handle it, let's do so. op.assert_version(1) self._ioctx.set_omap(op, keys, values) # type: ignore self._ioctx.operate_write_op(op, "kvstore") logger.info( f"Pushed {keys} to kvstore in newly created aquarium pool" ) except rados.OSError as e: if e.errno == errno.ERANGE: logger.warning( f"kvstore object already exists in aquarium pool, not pushing local cache" ) else: raise # Arguably we really only need the config watch if any watches are # requested on specific keys; having one here all the time is not # strictly necessary, but makes the implementation simpler. # TODO: need timeouts, error handlers etc on watch self._config_watch = self._ioctx.watch( "kvstore", self._config_notify ) logger.debug( f"config watch id is {self._config_watch.get_id()}" ) # will raise: # rados.ObjectNotFound: [errno 2] RADOS object not found (watch error) except Exception as e: # e.g. RADOS state (You cannot perform that operation on a Rados object in state configuring.) logger.exception(str(e)) # TODO: Should we sleep for longer? A minute instead of 10 seconds? This is pretty arbitrary... logger.debug("Cluster connection thread sleeping for 10 seconds") self._event.wait(10) self._event.clear() # How on earth to we detect that the cluster has gone away # and trigger a reconnect? # What happens if we try a write op on a down cluster? # might want watch checks? logger.debug("Shutting down cluster connection") if self._config_watch: self._config_watch.close() # Need to set this to None, so it's deallocated before the # cluster is deallocated/shutdown, or we get: # Traceback (most recent call last): # File "rados.pyx", line 477, in rados.Rados.require_state # rados.RadosStateError: RADOS rados state (You cannot perform that operation on a Rados object in state shutdown.) # Exception ignored in: 'rados.Watch.__dealloc__' # Traceback (most recent call last): # File "rados.pyx", line 477, in rados.Rados.require_state # rados.RadosStateError: RADOS rados state (You cannot perform that operation on a Rados object in state shutdown.) self._config_watch = None # Note: https://github.com/ceph/ceph/pull/43107 fixes the # above, so we can get rid of this once that lands everywhere. if self._ioctx: self._ioctx.close() if self._cluster: self._cluster.shutdown() logger.debug("Cluster connection is shut down")
ioctx = conn.open_ioctx(pool_name) if direction == 'write' or direction == 'writeread': try: ioctx.remove_object(obj_name) except rados.ObjectNotFound: pass # ensure object isn't there ioctx.write_full(obj_name, 'hi there') time.sleep(5) # give multiple threads time to set up start_time = time.time() next_power_of_4 = 4 base_key = 0 value = b'' while base_key < total_keys: with rados.WriteOpCtx() as op: for k in range(keys_per_call): omap_key_name = '%s-%09d' % (key_prefix, (keys_per_call - k) + base_key) if debug: print('omap key: %s' % omap_key_name) if value_size > 0: v = omap_key_name while len(v) < value_size: v = v + '.' + v value = v[:value_size] # syntax weirdometer alert ioctx.set_omap(op, (omap_key_name, ), (value, )) ioctx.operate_write_op(op, obj_name) if think_time > 0.0: time.sleep(think_time) base_key += keys_per_call