def on_create_request(self, rsrc_id, rsrc_data): with lc.LogContext(_LOGGER, rsrc_id, adapter_cls=lc.ContainerAdapter) as log: log.info('Creating presence: %s', rsrc_data) app_name = appcfg.app_name(rsrc_id) # Register running. path = z.path.running(app_name) _LOGGER.info('Register running: %s, %s', path, self.hostname) if not self._safe_create(rsrc_id, path, self.hostname): _LOGGER.info('Waiting to expire: %s', path) return None self.presence[app_name][path] = rsrc_id # Register endpoints. for endpoint in rsrc_data.get('endpoints', []): internal_port = endpoint['port'] ep_name = endpoint.get('name', str(internal_port)) ep_port = endpoint['real_port'] ep_proto = endpoint.get('proto', 'tcp') hostport = self.hostname + ':' + str(ep_port) path = z.path.endpoint(app_name, ep_proto, ep_name) _LOGGER.info('Register endpoint: %s, %s', path, hostport) if not self._safe_create(rsrc_id, path, hostport): _LOGGER.info('Waiting to expire: %s', path) return None self.presence[app_name][path] = rsrc_id # Register identity. identity_group = rsrc_data.get('identity_group') if identity_group: identity = rsrc_data.get('identity', _INVALID_IDENTITY) identity_data = {'host': self.hostname, 'app': app_name} path = z.path.identity_group(identity_group, str(identity)) _LOGGER.info('Register identity: %s, %s', path, identity_data) if not self._safe_create(rsrc_id, path, identity_data): _LOGGER.info('Waiting to expire: %s', path) return None self.presence[app_name][path] = rsrc_id return {}
def load_app_safe(container, container_dir, app_json=STATE_JSON): """Load app manifest as object. If app manifest is corrupted or invalid, return object with key attributes. """ try: return load_app(container_dir, app_json=app_json) except ValueError as err: _LOGGER.error('Manifest file is corrupted or invalid: %s', err) appname = appcfg.app_name(container) return utils.to_obj({ 'name': appname, 'app': appcfg.appname_basename(appname), 'task': appcfg.appname_task_id(appname), 'uniqueid': appcfg.app_unique_id(container), })
def on_delete_request(self, rsrc_id): with lc.LogContext(_LOGGER, rsrc_id, adapter_cls=lc.ContainerAdapter) as log: log.info('Deleting presence') app_name = appcfg.app_name(rsrc_id) to_delete = [ path for path in self.presence[app_name] if self.presence[app_name][path] == rsrc_id ] for path in to_delete: self._safe_delete(path) del self.presence[app_name][path] if not self.presence[app_name]: del self.presence[app_name] return True
def finish(tm_env, container_dir): """Frees allocated resources and mark then as available. """ container = os.path.basename(container_dir) with lc.LogContext(_LOGGER, container, lc.ContainerAdapter): _LOGGER.info('finishing %r', container_dir) data_dir = os.path.join(container_dir, 'data') appname = appcfg.app_name(container) app = runtime.load_app_safe(container, data_dir) if app: _cleanup(tm_env, data_dir, app) apphook.cleanup(tm_env, app, container_dir) # All resources are cleaned up. If the app terminated inside the # container, remove the node from Zookeeper, which will notify the # scheduler that it is safe to reuse the host for other load. exitinfo, aborted, oom, terminated = _collect_finish_info(data_dir) if aborted is not None: _post_aborted_event(tm_env, appname, aborted) elif oom: _post_oom_event(tm_env, appname) elif terminated: # Terminated (or evicted). # Don't post event, this is handled by the scheduler. _LOGGER.info('Terminated: %s', appname) else: # Container finished because service exited. # It is important that this is checked last. if exitinfo is not None: _post_exit_event(tm_env, appname, exitinfo)
def _synchronize(self): """Synchronize apps to running/cleanup. We need to re-validate three things on startup: - All configured apps should have an associated cache entry. Otherwise, create a link to cleanup. - All configured apps with a cache entry and with a cleanup file should be linked to cleanup. Otherwise, link to running. - Additional cache entries should be configured to run. On restart we need to validate another three things: - All configured apps that have a running link should be checked if in the cache. If not then terminate the app. - All configured apps that have a cleanup link should be left alone as this is handled. - Additional cache entries should be configured to run. On startup run.sh will clear running and cleanup which simplifies the logic for us as we can check running/cleanup first. Then check startup conditions and finally non-configured apps that are in cache. NOTE: a link cannot exist in running and cleanup at the same time. """ # Disable R0912(too-many-branches) # pylint: disable=R0912 configured = { os.path.basename(filename) for filename in glob.glob(os.path.join(self.tm_env.apps_dir, '*')) } cached = { os.path.basename(filename): appcfg.eventfile_unique_name(filename) for filename in glob.glob(os.path.join(self.tm_env.cache_dir, '*')) } for container in configured: appname = appcfg.app_name(container) if os.path.exists(os.path.join(self.tm_env.running_dir, appname)): # App already running.. check if in cache. # No need to check if needs cleanup as that is handled if appname not in cached or cached[appname] != container: self._terminate(appname) else: _LOGGER.info('Ignoring %s as it is running', appname) cached.pop(appname, None) elif os.path.exists(os.path.join(self.tm_env.cleanup_dir, appname)): # Already in the process of being cleaned up _LOGGER.info('Ignoring %s as it is in cleanup', appname) cached.pop(appname, None) else: needs_cleanup = True if appname in cached and cached[appname] == container: data_dir = os.path.join(self.tm_env.apps_dir, container, 'data') for cleanup_file in ['exitinfo', 'aborted', 'oom']: path = os.path.join(data_dir, cleanup_file) if os.path.exists(path): _LOGGER.debug('Found cleanup file %r', path) break else: if self._configure(appname): needs_cleanup = False _LOGGER.debug('Added existing app %r', appname) cached.pop(appname, None) if needs_cleanup: fs.symlink_safe( os.path.join(self.tm_env.cleanup_dir, appname), os.path.join(self.tm_env.apps_dir, container)) _LOGGER.debug('Removed %r', appname) for appname in six.iterkeys(cached): if self._configure(appname): _LOGGER.debug('Added new app %r', appname) self._refresh_supervisor()