def finish(tm_env, container_dir): """Frees allocated resources and mark then as available. """ with lc.LogContext(_LOGGER, os.path.basename(container_dir), lc.ContainerAdapter): _LOGGER.info('finishing %r', container_dir) data_dir = os.path.join(container_dir, 'data') app = runtime.load_app(data_dir) if app is not None: _cleanup(tm_env, data_dir, app) else: app = runtime.load_app(data_dir, appcfg.APP_JSON) if app is not None: # Check if application reached restart limit inside the container. # # The container directory will be moved, this check is done first. # # If restart limit was reached, application node will be removed # from Zookeeper at the end of the cleanup process, indicating to # the scheduler that the server is ready to accept new load. exitinfo, aborted, oom, terminated = _collect_exit_info(data_dir) # All resources are cleaned up. If the app terminated inside the # container, remove the node from Zookeeper, which will notify the # scheduler that it is safe to reuse the host for other load. if aborted is not None: app_abort.report_aborted(tm_env, app.name, why=aborted.get('why'), payload=aborted.get('payload')) elif oom: _post_oom_event(tm_env, app) elif terminated: # Terminated (or evicted). # Don't post event, this is handled by the master. _LOGGER.info('Terminated: %s', app.name) else: # Container finished because service exited. # It is important that this is checked last. if exitinfo is not None: _post_exit_event(tm_env, app, exitinfo) # cleanup monitor with container information if app: apphook.cleanup(tm_env, app, container_dir)
def _finish(self): app = runtime.load_app(self._service.data_dir, runtime.STATE_JSON) if app: client = self._get_client() container = state = None name = appcfg.app_unique_name(app) try: container = client.containers.get(name) state = container.attrs.get('State') except docker.errors.NotFound: pass if container is not None: try: container.remove(force=True) except docker.errors.APIError: _LOGGER.error('Failed to remove %s', container.id) aborted = _check_aborted(self._service.data_dir) if aborted is not None: app_abort.report_aborted(self._tm_env, app.name, why=aborted.get('why'), payload=aborted.get('payload')) elif state is not None: if state.get('OOMKilled', False): event = events.KilledTraceEvent( instanceid=app.name, is_oom=True, ) else: event = events.FinishedTraceEvent(instanceid=app.name, rc=state.get( 'ExitCode', 256), signal=0, payload=state) trace.post(self._tm_env.app_events_dir, event) if os.name == 'nt': credential_spec.cleanup(name, client) try: runtime.archive_logs(self._tm_env, name, self._service.data_dir) except Exception: # pylint: disable=W0703 _LOGGER.exception('Unexpected exception storing local logs.')
def _configure(self, instance_name): """Configures and starts the instance based on instance cached event. - Runs app_configure --approot <rootdir> cache/<instance> :param ``str`` instance_name: Name of the instance to configure :returns ``bool``: True for successfully configured container. """ event_file = os.path.join( self.tm_env.cache_dir, instance_name ) with lc.LogContext(_LOGGER, instance_name): try: _LOGGER.info('Configuring') container_dir = app_cfg.configure(self.tm_env, event_file, self._runtime) if container_dir is None: # configure step failed, skip. fs.rm_safe(event_file) return False # symlink_safe(link, target) fs.symlink_safe( os.path.join(self.tm_env.running_dir, instance_name), container_dir ) return True except exc.ContainerSetupError as err: # pylint: disable=W0703 _LOGGER.exception('Error configuring (%r)', instance_name) app_abort.report_aborted(self.tm_env, instance_name, why=err.reason, payload=traceback.format_exc()) fs.rm_safe(event_file) return False except Exception as err: # pylint: disable=W0703 _LOGGER.exception('Error configuring (%r)', instance_name) app_abort.report_aborted(self.tm_env, instance_name, why=app_abort.AbortedReason.UNKNOWN, payload=traceback.format_exc()) fs.rm_safe(event_file) return False
def test_report_aborted(self): """Tests report abort sequence.""" context.GLOBAL.zk.url = 'zookeeper://xxx@hhh:123/treadmill/mycell' treadmill.zkutils.connect.return_value = kazoo.client.KazooClient() kazoo.client.KazooClient.get_children.return_value = [] kazoo.client.KazooClient.exists.return_value = True kazoo.client.KazooClient.create.reset() kazoo.client.KazooClient.delete.reset() app_abort.report_aborted(self.tm_env, 'proid.myapp#001', why=app_abort.AbortedReason.TICKETS, payload='test') treadmill.appevents.post.assert_called_with( mock.ANY, events.AbortedTraceEvent( instanceid='proid.myapp#001', why='tickets', payload='test', ), )
def _post_aborted_event(tm_env, appname, aborted): """Port aborted event.""" app_abort.report_aborted(tm_env, appname, why=aborted.get('why'), payload=aborted.get('payload'))