def main(): setup_sentry() args = _parse_args() saved_containers_path = args.saved_containers_path if not args.force and not _is_recovery_completed(): get_logger().info( 'Recovery is not completed. Aborting saving running containers.') return try: wait_for_consul_ready() services_key = 'services/{}'.format(get_ship_name()) containers_parameters = kv.kv_get_recurse(services_key, strip_keys=False) if not containers_parameters: get_logger().info( 'Aborted saving container because list is empty.') return _save_containers_parameters_list_in_file(containers_parameters, saved_containers_path) get_logger().info( 'Containers have been saved to {}.'.format(saved_containers_path)) except Exception as e: get_logger().exception(e) sys.exit(1)
def _deregister_not_running_services(): try: ship = get_ship_name() except: ship = get_ship_ip() services = _get_local_services() running_containers_ids = _get_running_container_ids() for service_id in services.keys(): container_id, is_subservice = _get_container_id_with_subservice( service_id) if container_id in running_containers_ids: continue if not is_subservice: name = services[service_id]['Service'] update_container_status('crashed', ship=ship, service_name=name, container_id=container_id) deregister_services(container_id) for service_key in get_local_services(): container_id = service_key.split('/')[-1] if container_id not in running_containers_ids: update_container_status('crashed', key=service_key) deregister_services(container_id)
def _load_containers_to_kv_store(saved_containers_path): wait_for_consul_ready() try: ship = get_ship_name() saved_containers = _load_saved_containers_parameters( saved_containers_path) _load_from_dict(saved_containers, ship) except: get_logger().exception('Unable to load from %s', saved_containers_path)
def recover_saved_containers_from_parameters(saved_containers): wait_for_consul_ready() try: ship = get_ship_name() _load_from_dict(saved_containers, ship) except Exception as e: get_logger().exception(e) containers_to_be_recovered = recover_containers_from_kv_store() return containers_to_be_recovered
def on_post(self, req, resp): ship_name, error = self.get_post_parameter(req, 'name') if error: return self.status_error(resp, error) other_ship_names = [get_ship_name(ip) for ip in get_other_ship_ips()] name_taken = ship_name in other_ship_names if not ship_name or ship_name == 'None' or name_taken: return self.status_error( resp, 'Incorrect ship name: {}'.format(ship_name)) set_ship_name(ship_name) return self.status_ok(resp)
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local): consul_key = 'services' if filter_local: consul_key = '{}/{}'.format(consul_key, get_ship_name()) services_by_ship = kv.kv_get_recurse(consul_key) if not services_by_ship: return {} return _parse_single_ship(services_by_ship, filter_microservice_name, filter_env, filter_app_id)
def _save_runtime_settings(): consul_settings = { 'is_commander': is_ship_commander(), 'name': get_ship_name(), 'ships': get_other_ship_ips(), 'datacenter': get_current_datacenter(), 'dockyards': alias.get_list(), } with open(consul_config.RUNTIME_SETTINGS_PATH, 'w') as runtime_settings: runtime_settings.write(json.dumps(consul_settings, sort_keys=True, indent=4))
def on_post(self, req, resp): consul_host, error = self.get_post_parameter(req, 'host') if error: return self.status_error(resp, error) ship = get_ship_name() local_services_data = { key: kv.kv_get(key) for key in get_local_services_from_kv_store() } armada_size = _get_armada_size() if armada_size > 1: return self.status_error( resp, 'Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query( 'agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except Exception as e: get_logger().exception(e) return self.status_error( resp, 'Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings( consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpc.client.Server( 'http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup( 'hermes_init') get_logger().info('hermes_init start: %s', hermes_init_output) set_ship_name(ship) for key, data in six.iteritems(local_services_data): kv.kv_set(key, data) return self.status_ok(resp) return self.status_error(resp, 'Waiting for armada restart timed out.')
def _start_container(self, long_container_id): docker_api = docker_client.api(timeout=30) docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) ship = get_ship_name() container_id = shorten_container_id(long_container_id) save_container(ship, container_id, status='started') for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship = get_ship_name() containers_saved_in_kv = get_local_services() sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = create_consul_services_key(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: save_container(ship, service_id, 'started') get_logger().info( 'Added running service: {}'.format(service_id)) except: get_logger().exception('Unable to add running services.')
def on_get(self, req, resp): try: catalog_nodes_dict = consul_query('catalog/nodes') result = [] running_armada_services = _get_running_armada_services() ship_ip_to_armada = _create_ip_to_service(running_armada_services) current_ship_ip = get_ship_ip() for consul_node in catalog_nodes_dict: ship_ip = consul_node['Address'] ship_name = get_ship_name(ship_ip) armada_service = ship_ip_to_armada.get(ship_ip, {}) service_armada_address = armada_service.get('address', ship_ip) service_armada_status = armada_service.get('status', '?') service_armada_version = get_armada_version( service_armada_address) try: ship_role = get_ship_role(ship_ip) except Exception as e: get_logger().exception(e) ship_role = '?' is_current = (ship_ip == current_ship_ip) armada_instance = { 'name': ship_name, 'role': ship_role, 'address': service_armada_address, 'status': service_armada_status, 'version': service_armada_version, 'microservice_id': armada_service.get('microservice_id'), 'is_current': is_current } result.append(armada_instance) except Exception as e: return self.status_exception(resp, 'Could not get armada info.', e) return self.status_ok(resp, {'result': result})
def _update_running_services(): ship_ip = get_ship_ip() try: ship_name = get_ship_name(ship_ip) except: ship_name = ship_ip services = _get_local_services_from_catalog() running_containers_ids = _get_running_container_ids() local_services = get_local_services_from_kv_store() local_services_container_ids = [it.split('/')[-1] for it in local_services] for service_id in services.keys(): container_id, is_subservice = _get_container_id_with_subservice( service_id) if container_id not in local_services_container_ids: save_container(ship_name, container_id, 'started', ship_ip=ship_ip) get_logger().info( 'Saved container in kv-store: {container_id}'.format( container_id=container_id)) if container_id in running_containers_ids: continue if not is_subservice: name = services[service_id]['Service'] update_container_status('crashed', ship=ship_name, service_name=name, container_id=container_id) get_logger().info('Set status to "crashed": {container_id}'.format( container_id=container_id)) deregister_services(container_id) for service_key in local_services: container_id = service_key.split('/')[-1] if container_id not in running_containers_ids: update_container_status('crashed', key=service_key) get_logger().info('Set status to "crashed": {container_id}'.format( container_id=container_id)) deregister_services(container_id)
def on_get(self, req, resp): resp.content_type = 'text/plain' resp.body = get_ship_name()
def GET(self): return get_ship_name()
def get_local_services(): ship = get_ship_name() return get_services_by_ship(ship)