def main(): """ The entrypoint for the zoe-master script. :return: int """ config.load_configuration() args = config.get_conf() if args.debug: logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) else: logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) logging.getLogger('kazoo').setLevel(logging.WARNING) logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) logging.getLogger('docker').setLevel(logging.INFO) logging.getLogger("tornado").setLevel(logging.DEBUG) log.info("Initializing DB manager") config.singletons['sql_manager'] = SQLManager(args) log.info("Initializing workspace managers") fswk = ZoeFSWorkspace() config.singletons['workspace_managers'] = [fswk] if config.get_conf().influxdb_enable: metrics_th = InfluxDBMetricSender(config.get_conf()) metrics_th.start() config.singletons['metric'] = metrics_th else: metrics_th = BaseMetricSender('metrics-logger', config.get_conf()) config.singletons['metric'] = metrics_th # stats_th = StatsManager() # stats_th.start() # TODO Broken Docker API # config.singletons['stats_manager'] = stats_th log.info("Initializing scheduler") config.scheduler = ZoeScheduler() restart_resubmit_scheduler() log.info("Starting ZMQ API server...") config.singletons['api_server'] = APIManager() try: config.singletons['api_server'].loop() except KeyboardInterrupt: pass except Exception: log.exception('fatal error') finally: config.scheduler.quit() config.singletons['api_server'].quit()
def get(self): ret = { 'version': ZOE_VERSION, 'api_version': ZOE_API_VERSION, 'application_format_version': ZOE_APPLICATION_FORMAT_VERSION, 'deployment_name': get_conf().deployment_name } return ret
def loop(self): assert isinstance(config.singletons['sql_manager'], zoe_lib.sql_manager.SQLManager) while True: message = self.zmq_s.recv_json() self.debug_has_replied = False start_time = time.time() if message['command'] == 'execution_start': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list( id=exec_id, only_one=True) if execution is None: self._reply_error('Execution ID {} not found'.format( message['exec_id'])) else: execution.set_scheduled() self._reply_ok() zoe_master.execution_manager.execution_submit(execution) elif message['command'] == 'execution_terminate': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list( id=exec_id, only_one=True) if execution is None: self._reply_error('Execution ID {} not found'.format( message['exec_id'])) else: execution.set_cleaning_up() self._reply_ok() zoe_master.execution_manager.execution_terminate(execution) elif message['command'] == 'execution_delete': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list( id=exec_id, only_one=True) if execution is not None: zoe_master.execution_manager.execution_delete(execution) self._reply_ok() elif message['command'] == 'service_inspect': service_id = message['service_id'] service = config.singletons['sql_manager'].service_list( id=service_id, only_one=True) if service is None: self._reply_error('no such service') else: swarm = SwarmClient(config.get_conf()) info = swarm.inspect_container(service.docker_id) self._reply_ok(info) else: log.error('Unknown command: {}'.format(message['command'])) self._reply_error('unknown command') if not self.debug_has_replied: self._reply_error('bug') raise ZoeException('BUG: command {} does not fill a reply') config.singletons['metric'].metric_api_call( start_time, message['command'])
def terminate_execution(execution: Execution) -> None: execution.set_cleaning_up() swarm = SwarmClient(get_conf()) for s in execution.services: assert isinstance(s, Service) if s.docker_id is not None: s.set_terminating() swarm.terminate_container(s.docker_id, delete=True) s.set_inactive() log.debug('Service {} terminated'.format(s.name)) execution.set_terminated()
def loop(self): assert isinstance(config.singletons['sql_manager'], zoe_lib.sql_manager.SQLManager) while True: message = self.zmq_s.recv_json() self.debug_has_replied = False start_time = time.time() if message['command'] == 'execution_start': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list(id=exec_id, only_one=True) if execution is None: self._reply_error('Execution ID {} not found'.format(message['exec_id'])) else: execution.set_scheduled() self._reply_ok() zoe_master.execution_manager.execution_submit(execution) elif message['command'] == 'execution_terminate': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list(id=exec_id, only_one=True) if execution is None: self._reply_error('Execution ID {} not found'.format(message['exec_id'])) else: execution.set_cleaning_up() self._reply_ok() zoe_master.execution_manager.execution_terminate(execution) elif message['command'] == 'execution_delete': exec_id = message['exec_id'] execution = config.singletons['sql_manager'].execution_list(id=exec_id, only_one=True) if execution is not None: zoe_master.execution_manager.execution_delete(execution) self._reply_ok() elif message['command'] == 'service_inspect': service_id = message['service_id'] service = config.singletons['sql_manager'].service_list(id=service_id, only_one=True) if service is None: self._reply_error('no such service') else: swarm = SwarmClient(config.get_conf()) info = swarm.inspect_container(service.docker_id) self._reply_ok(info) else: log.error('Unknown command: {}'.format(message['command'])) self._reply_error('unknown command') if not self.debug_has_replied: self._reply_error('bug') raise ZoeException('BUG: command {} does not fill a reply') config.singletons['metric'].metric_api_call(start_time, message['command'])
def execution_to_containers(execution: Execution): ordered_service_list = sorted(execution.services, key=lambda x: x.description['startup_order']) env_subst_dict = { "execution_name": execution.name, 'user_name': execution.user_id, 'deployment_name': get_conf().deployment_name, } for service in ordered_service_list: env_subst_dict['dns_name#' + service.name] = service.dns_name for service in ordered_service_list: env_subst_dict['dns_name#self'] = service.dns_name service.set_starting() _spawn_service(execution, service, env_subst_dict)
def main(): conf = load_configuration() config.load_configuration(conf) args = config.get_conf() if args.debug: logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) else: logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) logging.getLogger('kazoo').setLevel(logging.WARNING) logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) logging.getLogger('docker').setLevel(logging.INFO) logging.getLogger("tornado").setLevel(logging.DEBUG) state = FakeSQLManager() config.singletons['sql_manager'] = state zapp_description = json.load(args.jsonfile) print('Validating zapp description...') zoe_lib.applications.app_validate(zapp_description) exec_id = state.execution_new('test', 'fake_user', zapp_description) e = state.execution_list(only_one=True, id=exec_id) _digest_application_description(e) print('Zapp digested, starting containers...') execution_to_containers(e) for service in e.services: print("Service {}, docker ID: {}".format(service.name, service.docker_id)) print("Execution as been started, press CTRL-C to terminate it") try: while True: time.sleep(1) except KeyboardInterrupt: pass print('Terminating...') terminate_execution(e)
def main(): conf = load_configuration() config.load_configuration(conf) args = config.get_conf() if args.debug: logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) else: logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) logging.getLogger("kazoo").setLevel(logging.WARNING) logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("docker").setLevel(logging.INFO) logging.getLogger("tornado").setLevel(logging.DEBUG) state = FakeSQLManager() config.singletons["sql_manager"] = state zapp_description = json.load(args.jsonfile) print("Validating zapp description...") zoe_lib.applications.app_validate(zapp_description) exec_id = state.execution_new("test", "fake_user", zapp_description) e = state.execution_list(only_one=True, id=exec_id) _digest_application_description(e) print("Zapp digested, starting containers...") execution_to_containers(e) for service in e.services: print("Service {}, docker ID: {}".format(service.name, service.docker_id)) print("Execution as been started, press CTRL-C to terminate it") try: while True: time.sleep(1) except KeyboardInterrupt: pass print("Terminating...") terminate_execution(e)
def __init__(self): self.base_path = os.path.join(config.get_conf().workspace_base_path, config.get_conf().deployment_name)
def dns_name(self): return "{}-{}-{}".format(self.name, self.execution_id, get_conf().deployment_name)
def configuration(request): load_configuration(test_conf=TestConf()) return get_conf()
def __init__(self): self.context = zmq.Context() self.zmq_s = self.context.socket(zmq.REP) self.listen_uri = config.get_conf().api_listen_uri self.zmq_s.bind(self.listen_uri) self.debug_has_replied = False
def _spawn_service(execution: Execution, service: Service, env_subst_dict: dict): copts = DockerContainerOptions() copts.gelf_log_address = get_conf().gelf_address copts.name = service.dns_name copts.set_memory_limit(service.description['required_resources']['memory']) copts.network_name = get_conf().overlay_network_name copts.labels = { 'zoe.execution.name': execution.name, 'zoe.execution.id': str(execution.id), 'zoe.service.name': service.name, 'zoe.service.id': str(service.id), 'zoe.owner': execution.user_id, 'zoe.deployment_name': get_conf().deployment_name, 'zoe.type': 'app_service' } if service.description['monitor']: copts.labels['zoe.monitor'] = 'true' else: copts.labels['zoe.monitor'] = 'false' copts.restart = not service.description['monitor'] # Monitor containers should not restart # Generate a dictionary containing the current cluster status (before the new container is spawned) # This information is used to substitute template strings in the environment variables for env_name, env_value in service.description['environment']: try: env_value = env_value.format(**env_subst_dict) except KeyError: raise ZoeStartExecutionFatalException("unknown variable in expression {}".format(env_value)) copts.add_env_variable(env_name, env_value) for p in service.description['ports']: if p['expose']: copts.ports.append(p['port_number']) # FIXME UDP ports? if 'volumes' in service.description: for path, mount_point, readonly in service.description['volumes']: copts.add_volume_bind(path, mount_point, readonly) for wks in singletons['workspace_managers']: assert isinstance(wks, zoe_master.workspace.base.ZoeWorkspaceBase) if wks.can_be_attached(): copts.add_volume_bind(wks.get_path(execution.user_id), wks.get_mountpoint(), False) # The same dictionary is used for templates in the command if 'command' in service.description: copts.set_command(service.description['command'].format(**env_subst_dict)) try: swarm = SwarmClient(get_conf()) except Exception as e: raise ZoeStartExecutionFatalException(str(e)) try: cont_info = swarm.spawn_container(service.description['docker_image'], copts) except ZoeException as e: raise ZoeStartExecutionRetryException(str(e)) service.set_active(cont_info["docker_id"]) if 'networks' in service.description: for net in service.description['networks']: try: swarm.connect_to_network(service.docker_id, net) except ZoeException as e: raise ZoeStartExecutionFatalException(str(e)) return
def __init__(self): super().__init__(name='stats', daemon=True) self.swarm = SwarmClient(get_conf()) self._swarm_stats = None