Пример #1
0
    def on_create_request(self, rsrc_id, rsrc_data):

        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.info('Creating presence: %s', rsrc_data)
            app_name = appcfg.app_name(rsrc_id)

            # Register running.
            path = z.path.running(app_name)
            _LOGGER.info('Register running: %s, %s', path, self.hostname)
            if not self._safe_create(rsrc_id, path, self.hostname):
                _LOGGER.info('Waiting to expire: %s', path)
                return None

            self.presence[app_name][path] = rsrc_id

            # Register endpoints.
            for endpoint in rsrc_data.get('endpoints', []):
                internal_port = endpoint['port']
                ep_name = endpoint.get('name', str(internal_port))
                ep_port = endpoint['real_port']
                ep_proto = endpoint.get('proto', 'tcp')

                hostport = self.hostname + ':' + str(ep_port)

                path = z.path.endpoint(app_name, ep_proto, ep_name)
                _LOGGER.info('Register endpoint: %s, %s', path, hostport)
                if not self._safe_create(rsrc_id, path, hostport):
                    _LOGGER.info('Waiting to expire: %s', path)
                    return None

                self.presence[app_name][path] = rsrc_id

            # Register identity.
            identity_group = rsrc_data.get('identity_group')
            if identity_group:
                identity = rsrc_data.get('identity', _INVALID_IDENTITY)
                identity_data = {'host': self.hostname, 'app': app_name}

                path = z.path.identity_group(identity_group, str(identity))
                _LOGGER.info('Register identity: %s, %s', path, identity_data)
                if not self._safe_create(rsrc_id, path, identity_data):
                    _LOGGER.info('Waiting to expire: %s', path)
                    return None

                self.presence[app_name][path] = rsrc_id

        return {}
Пример #2
0
def load_app_safe(container, container_dir, app_json=STATE_JSON):
    """Load app manifest as object.

    If app manifest is corrupted or invalid, return object with key attributes.
    """
    try:
        return load_app(container_dir, app_json=app_json)
    except ValueError as err:
        _LOGGER.error('Manifest file is corrupted or invalid: %s', err)
        appname = appcfg.app_name(container)
        return utils.to_obj({
            'name': appname,
            'app': appcfg.appname_basename(appname),
            'task': appcfg.appname_task_id(appname),
            'uniqueid': appcfg.app_unique_id(container),
        })
Пример #3
0
    def on_delete_request(self, rsrc_id):
        with lc.LogContext(_LOGGER, rsrc_id,
                           adapter_cls=lc.ContainerAdapter) as log:
            log.info('Deleting presence')
            app_name = appcfg.app_name(rsrc_id)

            to_delete = [
                path for path in self.presence[app_name]
                if self.presence[app_name][path] == rsrc_id
            ]

            for path in to_delete:
                self._safe_delete(path)
                del self.presence[app_name][path]

            if not self.presence[app_name]:
                del self.presence[app_name]

        return True
Пример #4
0
def finish(tm_env, container_dir):
    """Frees allocated resources and mark then as available.
    """
    container = os.path.basename(container_dir)
    with lc.LogContext(_LOGGER, container, lc.ContainerAdapter):
        _LOGGER.info('finishing %r', container_dir)

        data_dir = os.path.join(container_dir, 'data')

        appname = appcfg.app_name(container)
        app = runtime.load_app_safe(container, data_dir)
        if app:
            _cleanup(tm_env, data_dir, app)
            apphook.cleanup(tm_env, app, container_dir)

        # All resources are cleaned up. If the app terminated inside the
        # container, remove the node from Zookeeper, which will notify the
        # scheduler that it is safe to reuse the host for other load.
        exitinfo, aborted, oom, terminated = _collect_finish_info(data_dir)

        if aborted is not None:
            _post_aborted_event(tm_env, appname, aborted)

        elif oom:
            _post_oom_event(tm_env, appname)

        elif terminated:
            # Terminated (or evicted).
            # Don't post event, this is handled by the scheduler.
            _LOGGER.info('Terminated: %s', appname)

        else:
            # Container finished because service exited.
            # It is important that this is checked last.
            if exitinfo is not None:
                _post_exit_event(tm_env, appname, exitinfo)
Пример #5
0
    def _synchronize(self):
        """Synchronize apps to running/cleanup.

        We need to re-validate three things on startup:

          - All configured apps should have an associated cache entry.
            Otherwise, create a link to cleanup.

          - All configured apps with a cache entry and with a cleanup file
            should be linked to cleanup. Otherwise, link to running.

          - Additional cache entries should be configured to run.

        On restart we need to validate another three things:

          - All configured apps that have a running link should be checked
            if in the cache. If not then terminate the app.

          - All configured apps that have a cleanup link should be left
            alone as this is handled.

          - Additional cache entries should be configured to run.

        On startup run.sh will clear running and cleanup which simplifies
        the logic for us as we can check running/cleanup first. Then check
        startup conditions and finally non-configured apps that are in cache.

        NOTE: a link cannot exist in running and cleanup at the same time.

        """
        # Disable R0912(too-many-branches)
        # pylint: disable=R0912
        configured = {
            os.path.basename(filename)
            for filename in glob.glob(os.path.join(self.tm_env.apps_dir, '*'))
        }
        cached = {
            os.path.basename(filename): appcfg.eventfile_unique_name(filename)
            for filename in glob.glob(os.path.join(self.tm_env.cache_dir, '*'))
        }

        for container in configured:
            appname = appcfg.app_name(container)
            if os.path.exists(os.path.join(self.tm_env.running_dir, appname)):
                # App already running.. check if in cache.
                # No need to check if needs cleanup as that is handled
                if appname not in cached or cached[appname] != container:
                    self._terminate(appname)
                else:
                    _LOGGER.info('Ignoring %s as it is running', appname)

                cached.pop(appname, None)

            elif os.path.exists(os.path.join(self.tm_env.cleanup_dir,
                                             appname)):
                # Already in the process of being cleaned up
                _LOGGER.info('Ignoring %s as it is in cleanup', appname)
                cached.pop(appname, None)

            else:
                needs_cleanup = True
                if appname in cached and cached[appname] == container:
                    data_dir = os.path.join(self.tm_env.apps_dir, container,
                                            'data')
                    for cleanup_file in ['exitinfo', 'aborted', 'oom']:
                        path = os.path.join(data_dir, cleanup_file)
                        if os.path.exists(path):
                            _LOGGER.debug('Found cleanup file %r', path)
                            break
                    else:
                        if self._configure(appname):
                            needs_cleanup = False
                            _LOGGER.debug('Added existing app %r', appname)

                    cached.pop(appname, None)

                if needs_cleanup:
                    fs.symlink_safe(
                        os.path.join(self.tm_env.cleanup_dir, appname),
                        os.path.join(self.tm_env.apps_dir, container))
                    _LOGGER.debug('Removed %r', appname)

        for appname in six.iterkeys(cached):
            if self._configure(appname):
                _LOGGER.debug('Added new app %r', appname)

        self._refresh_supervisor()