def fetch(self, sync_type): log.debug("SyncObjects.fetch: %s" % sync_type) self._fetching_at[sync_type] = now() # TODO(Rohan) clean up unused 'since' argument return ceph.get_cluster_object(self._cluster_name, sync_type.str, None)
def on_fetch_complete(self, sync_type, version, data): """:return A SyncObject if this version was new to us, else None """ log.debug( "SyncObjects.on_fetch_complete %s/%s" % ( sync_type.str, version) ) self._fetching_at[sync_type] = None # A fetch might give us a newer version than we knew we had asked for if sync_type.cmp(version, self._known_versions[sync_type]) > 0: self._known_versions[sync_type] = version # Don't store this if we already got something newer if sync_type.cmp(version, self.get_version(sync_type)) <= 0: log.warn( "Ignoring outdated" " update %s/%s" % (sync_type.str, version) ) new_object = None else: log.info("Got new version %s/%s" % (sync_type.str, version)) new_object = self.set_map(sync_type, version, data) # This might not be the latest: if it's not, send out another fetch # right away # if sync_type.cmp(self._known_versions[sync_type], version) > 0: # self.fetch(sync_type) return new_object
def on_sync_object(self, fsid, sync_type, new, old): """Notification that a newer version of a SyncObject is available, or the first version of a SyncObject is available at startup (wherein old will be a null SyncObject) :param fsid: The FSID of the cluster to which the object belongs :param sync_type: A SyncObject subclass :param new: A SyncObject :param old: A SyncObject (same type as new) """ log.debug("Eventer.on_sync_object: %s" % sync_type.str) if old.data is None: return if sync_type == OsdMap: self._on_osd_map(fsid, new, old) elif sync_type == Health: self._on_health(fsid, new, old) elif sync_type == MonStatus: self._on_mon_status(fsid, new, old) self._flush()
def create(self, attributes): commands = [('osd pool create', {'pool': attributes[ 'name'], 'pg_num': attributes['pg_num']})] # Calculate appropriate min_size, including default if none given req_size = attributes.get('size', 0) req_min_size = attributes.get('min_size', 0) attributes['min_size'] = self._pool_min_size(req_size, req_min_size) # Which attributes must we set after the initial create? post_create_attrs = attributes.copy() del post_create_attrs['name'] del post_create_attrs['pg_num'] if 'pgp_num' in post_create_attrs: del post_create_attrs['pgp_num'] commands.extend(self._pool_attribute_commands( attributes['name'], post_create_attrs )) log.debug("Post-create attributes: %s" % post_create_attrs) log.debug("Commands: %s" % commands) return PoolCreatingRequest( "Creating pool '{name}'".format(name=attributes['name']), self._cluster_monitor.fsid, self._cluster_monitor.name, attributes['name'], commands)
def on_tick(self): """Periodically call this to drive non-event-driven events (i.e. things which are based on walltime checks) """ log.debug("Eventer.on_tick")
def _acceptor(self): while True: raw_jobs = self.client.read("/rawops/jobs") jobs = sorted(json.loads(raw_jobs.value), key=lambda k: int(k['updated'])) # Pick up the oldest job that is not locked by any other bridge try: for job in jobs: if not job['locked_by']: # First lock the job log.info("%s found new job_%s" % (self.__class__.__name__, job['job_id'])) log.debug(job['msg']) job['locked_by'] = self.bridge_id job['status'] = "in-progress" job['updated'] = int(time.time()) raw_jobs.value = json.dumps(jobs) self.client.write("/rawops/jobs", raw_jobs.value) log.info("%s Running new job_%s" % (self.__class__.__name__, job['job_id'])) self.__call__(job['msg']['func'], kwargs=job['msg']['kwargs']) job['updated'] = int(time.time()) job['status'] = "complete" raw_jobs.value = json.dumps(jobs) self.client.write("/rawops/jobs", raw_jobs.value) log.info("%s Completed job_%s" % (self.__class__.__name__, job['job_id'])) break except Exception as ex: log.error(ex) gevent.sleep(1)
def _run(self): self._flush() self._complete.wait(GRACE_PERIOD) while not self._complete.is_set(): self.on_tick() self._complete.wait(TICK_SECONDS) log.debug("Eventer complete")
def wrap(*args, **kwargs): log.debug("RpcInterface >> %s(%s, %s)" % (item, args, kwargs)) try: rc = attr(*args, **kwargs) log.debug("RpcInterface << %s" % item) except Exception: log.exception("RpcInterface !! %s" % item) raise return rc
def on_version(self, sync_type, new_version): """Notify me that a particular version of a particular map exists. I may choose to initiate RPC to retrieve the map """ log.debug( "SyncObjects.on_version %s/%s" % (sync_type.str, new_version) ) old_version = self.get_version(sync_type) if sync_type.cmp(new_version, old_version) > 0: known_version = self._known_versions[sync_type] if sync_type.cmp(new_version, known_version) > 0: # We are out of date: request an up to date copy log.info("Advanced known version %s/%s %s->%s" % ( self._cluster_name, sync_type.str, known_version, new_version) ) self._known_versions[sync_type] = new_version else: log.info( "on_version: %s is newer than %s" % ( new_version, old_version ) ) # If we already have a request out for this type of map, # then consider cancelling it if we've already waited for # a while. if self._fetching_at[sync_type] is not None: if now() - self._fetching_at[sync_type] < self.FETCH_TIMEOUT: log.info("Fetch already underway for %s" % sync_type.str) return else: log.warn("Abandoning fetch for %s started at %s" % ( sync_type.str, self._fetching_at[sync_type])) log.info( "on_version: fetching %s/%s , " "currently got %s, know %s" % ( sync_type, new_version, old_version, known_version ) ) return self.fetch(sync_type)
def _run(self): self._ready.set() log.debug("ClusterMonitor._run: ready") while not self._complete.is_set(): data = ceph.heartbeat(heartbeat_type="cluster", fsid=self.fsid) if data is not None: for tag, c_data in data.iteritems(): log.debug( "ClusterMonitor._run.ev:" " %s/tag=%s" % ( c_data['id'] if 'id' in c_data else None, tag) ) try: if tag.startswith("ceph/cluster/{0}".format(self.fsid)): # A ceph.heartbeat beacon self.on_heartbeat(c_data['id'], c_data) else: # This does not concern us, ignore it pass except Exception: # Because this is our main event handling loop, swallow # exceptions instead of letting them end the world. log.exception( "Exception handling message with tag %s" % tag) log.debug("Message content: %s" % data) gevent.sleep(4) log.info("%s complete" % self.__class__.__name__) self.done.set()
def on_heartbeat(self, fqdn, cluster_data): """Handle a ceph.heartbeat from a minion. Heartbeats come from all servers, but we're mostly interested in those which come from a mon (and therefore have the 'clusters' attribute populated) as these tells us whether there are any new versions of cluster maps for us to fetch. """ self.update_time = datetime.datetime.utcnow().replace(tzinfo=utc) log.debug('Checking for version increments in heartbeat') for sync_type in SYNC_OBJECT_TYPES: data = self._sync_objects.on_version( sync_type, cluster_data['versions'][sync_type.str] ) if data: self.on_sync_object(data)
def stop(self): log.debug("Eventer stopping") self._complete.set()