def __get_armada_address(ship_name=None): if not ship_name: if __are_we_in_armada_container(): return 'http://127.0.0.1' try: agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://127.0.0.1:{}'.format(service['Port']) except ConsulException as e: get_logger(__file__).warning( 'Could not get armada port from consul ({}), falling back to 8900.' .format(e)) return 'http://127.0.0.1:8900' else: if not is_ip(ship_name): ship_ip = ship_name_to_ip(ship_name) else: ship_ip = ship_name service_armada_dict = consul_query('catalog/service/armada') for service_armada in service_armada_dict: if service_armada['Address'] == ship_ip: return 'http://{0}:{1}'.format(ship_ip, service_armada['ServicePort']) raise ValueError('Cannot find ship: {0}.'.format(ship_name))
def GET(self): try: get_args = web.input(local=False, microservice_name=None) filter_local = bool(get_args.local) filter_microservice_name = get_args.microservice_name if filter_local: local_microservices_ids = set(consul_query("agent/services").keys()) if filter_microservice_name: microservices_names = [filter_microservice_name] else: microservices_names = list(consul_query("catalog/services").keys()) result = [] for microservice_name in microservices_names: if microservice_name == "consul": continue query = "health/service/{microservice_name}".format(**locals()) instances = consul_query(query) for instance in instances: microservice_checks_statuses = set(check["Status"] for check in (instance["Checks"] or [])) microservice_computed_status = "-" for possible_status in ["passing", "warning", "critical"]: if possible_status in microservice_checks_statuses: microservice_computed_status = possible_status microservice_ip = instance["Node"]["Address"] microservice_port = str(instance["Service"]["Port"]) microservice_id = instance["Service"]["ID"] container_id = microservice_id.split(":")[0] microservice_tags = instance["Service"]["Tags"] or [] microservice_tags_dict = self.__create_dict_from_tags(microservice_tags) if not filter_local or microservice_id in local_microservices_ids: microservice_address = microservice_ip + ":" + microservice_port try: microservice_start_timestamp = kv.get("start_timestamp/" + container_id) except: microservice_start_timestamp = None microservice_dict = { "name": microservice_name, "address": microservice_address, "microservice_id": microservice_id, "container_id": container_id, "status": microservice_computed_status, "tags": microservice_tags_dict, "start_timestamp": microservice_start_timestamp, } result.append(microservice_dict) return self.status_ok({"result": result}) except Exception as e: return self.status_error( "Cannot get the list of services. {exception_class} - {exception}".format( exception_class=type(e).__name__, exception=str(e) ) )
def _get_running_services(filter_microservice_name, filter_env, filter_app_id, filter_local): if filter_local: local_microservices_ids = set(consul_query('agent/services').keys()) if filter_microservice_name: names = list(consul_query('catalog/services').keys()) microservices_names = fnmatch.filter(names, filter_microservice_name) else: microservices_names = list(consul_query('catalog/services').keys()) start_timestamps = kv.kv_get_recurse('start_timestamp/') or {} single_active_instances = kv.kv_get_recurse('single_active_instance/') if single_active_instances: single_active_instances_list = single_active_instances.keys() else: single_active_instances_list = [] services_list_from_catalog = {} for microservice_name in microservices_names: if microservice_name == 'consul': continue query = 'health/service/{microservice_name}'.format(**locals()) instances = consul_query(query) for instance in instances: microservice_checks_statuses = set(check['Status'] for check in (instance['Checks'] or [])) microservice_computed_status = '-' for possible_status in ['passing', 'warning', 'critical']: if possible_status in microservice_checks_statuses: microservice_computed_status = possible_status microservice_ip = instance['Node']['Address'] microservice_port = str(instance['Service']['Port']) microservice_id = instance['Service']['ID'] container_id = microservice_id.split(':')[0] microservice_tags = instance['Service']['Tags'] or [] microservice_tags_dict = __create_dict_from_tags(microservice_tags) matches_env = (filter_env is None) or (filter_env == microservice_tags_dict.get('env')) matches_app_id = (filter_app_id is None) or (filter_app_id == microservice_tags_dict.get('app_id')) if (matches_env and matches_app_id and (not filter_local or microservice_id in local_microservices_ids)): microservice_address = microservice_ip + ':' + microservice_port microservice_start_timestamp = start_timestamps.get(container_id, None) single_active_instance = microservice_id in single_active_instances_list microservice_dict = { 'name': microservice_name, 'address': microservice_address, 'microservice_id': microservice_id, 'container_id': container_id, 'status': microservice_computed_status, 'tags': microservice_tags_dict, 'start_timestamp': microservice_start_timestamp, 'single_active_instance': single_active_instance, } services_list_from_catalog[microservice_id] = microservice_dict return services_list_from_catalog
def command_list(args): if args.service_name: service_names = [args.service_name] else: service_names = list(consul_query('catalog/services').keys()) if args.local: local_ids = set(consul_query('agent/services').keys()) if not args.quiet: output_header = ('Name', 'Address', 'ID', 'Status', 'Tags') if args.uptime: output_header += ("Created (UTC)",) output_rows = [output_header] for service_name in service_names: if service_name == 'consul': continue query = 'health/service/{service_name}'.format(**locals()) instances = consul_query(query) for instance in instances: service_checks_statuses = set(check['Status'] for check in (instance['Checks'] or [])) service_computed_status = '-' for possible_status in ['passing', 'warning', 'critical']: if possible_status in service_checks_statuses: service_computed_status = possible_status service_ip = instance['Node']['Address'] service_port = str(instance['Service']['Port']) service_id = instance['Service']['ID'] container_id = service_id.split(':')[0] service_tags = instance['Service']['Tags'] or [] service_tags_pretty = [str(x) for x in sorted(service_tags)] if service_tags else '-' service_tags_set = set(service_tags) matches_env = (args.env is None) or ('env:' + args.env in service_tags_set) matches_app_id = (args.app_id is None) or ('app_id:' + args.app_id in service_tags_set) if matches_env and matches_app_id and (not args.local or service_id in local_ids): service_address = service_ip + ':' + service_port if args.quiet: print(str(container_id)) else: output_row = (service_name, service_address, container_id, service_computed_status, service_tags_pretty) if args.uptime: try: start_timestamp = kv.get("start_timestamp/" + container_id) creation_time = epoch_to_iso(start_timestamp) except: creation_time = "-" output_row += (creation_time,) output_rows.append(output_row) if not args.quiet: print_table([output_rows[0]] + sorted(output_rows[1:]))
def get_ship_role(ship_ip): status_leader = consul_query('status/leader') ship_role = 'ship' ship_info = consul_query('agent/self', consul_address='{ship_ip}:8500'.format(**locals())) if ship_info['Config']['Server']: ship_role = 'commander' if status_leader.startswith(ship_ip + ':'): ship_role = 'leader' return ship_role
def get_ship_role(ship_ip): status_leader = consul_query("status/leader") ship_role = "ship" ship_info = consul_query("agent/self", consul_address="{ship_ip}:8500".format(**locals())) if ship_info["Config"]["Server"]: ship_role = "commander" if status_leader.startswith(ship_ip + ":"): ship_role = "leader" return ship_role
def get_ship_role(ship_ip): status_leader = consul_query('status/leader') ship_role = 'ship' ship_info = consul_query( 'agent/self', consul_address='{ship_ip}:8500'.format(**locals())) if ship_info['Config']['Server']: ship_role = 'commander' if status_leader.startswith(ship_ip + ':'): ship_role = 'leader' return ship_role
def get_other_ship_ips(): try: catalog_nodes_dict = consul_query('catalog/nodes') ship_ips = list(consul_node['Address'] for consul_node in catalog_nodes_dict) agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] if service_ip in ship_ips: ship_ips.remove(service_ip) return ship_ips except: return []
def get_matched_containers(microservice_name_or_container_id_prefix): service_names = list(consul_query('catalog/services').keys()) matched_containers_by_name = [] matched_containers_by_id = [] for service_name in service_names: try: query = 'catalog/service/{service_name}'.format(**locals()) instances = consul_query(query) except Exception as e: print_err( 'WARNING: query "{query}" failed ({exception_class}: {exception})' .format(query=query, exception_class=type(e).__name__, exception=e)) instances = [] for instance in instances: container_id = instance['ServiceID'].split(':')[0] service_name = instance['ServiceName'] if microservice_name_or_container_id_prefix == service_name: matched_containers_by_name.append(instance) if container_id.startswith(microservice_name_or_container_id_prefix ) and ":" not in instance['ServiceID']: matched_containers_by_id.append(instance) matched_containers_by_name_count = len(matched_containers_by_name) matched_containers_by_id_count = len(matched_containers_by_id) if matched_containers_by_name_count and matched_containers_by_id_count: raise ArmadaCommandException( 'Found matching containers with both microservice name ({matched_containers_by_name_count}) ' 'and container_id ({matched_containers_by_id_count}). ' 'Please provide more specific criteria.'.format(**locals())) if matched_containers_by_id_count > 1: raise ArmadaCommandException( 'There are too many ({matched_containers_by_id_count}) matching containers. ' 'Please provide more specific container_id.'.format(**locals())) matched_containers = matched_containers_by_name + matched_containers_by_id matches_count = len(matched_containers) if matches_count == 0: raise ArmadaCommandException( 'There are no running containers with microservice: ' '{microservice_name_or_container_id_prefix}'.format(**locals())) return matched_containers
def __get_armada_address(ship_name = None): if not ship_name: agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://{0}:{1}'.format(ARMADA_IP, str(service['Port'])) else: service_armada_dict = consul_query('catalog/service/armada') for service_armada in service_armada_dict: if service_armada['Node'] in (ship_name, 'ship-' + ship_name) or service_armada['Address'] == ship_name: return 'http://{0}:{1}'.format(service_armada['Address'], service_armada['ServicePort']) raise ValueError('Cannot find ship: {ship_name}.'.format(ship_name = ship_name)) return None
def __get_armada_address(ship_name=None): if not ship_name: if __are_we_in_armada_container(): return 'http://127.0.0.1' agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://127.0.0.1:{}'.format(service['Port']) else: service_armada_dict = consul_query('catalog/service/armada') for service_armada in service_armada_dict: if service_armada['Node'] in (ship_name, 'ship-' + ship_name) or service_armada['Address'] == ship_name: return 'http://{0}:{1}'.format(service_armada['Address'], service_armada['ServicePort']) raise ValueError('Cannot find ship: {ship_name}.'.format(ship_name=ship_name))
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) armada_size = _get_armada_size() if armada_size > 1: return self.status_error( 'Currently only single ship armadas can join the others. ' 'Your armada has size: {armada_size}.'.format( armada_size=armada_size)) try: agent_self_dict = consul_query( 'agent/self', consul_address='{consul_host}:8500'.format(**locals())) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error( 'Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings( consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def _add_running_services_at_startup(): wait_for_consul_ready() try: ship_ip, ship_name = get_ship_ip_and_name() containers_saved_in_kv = get_local_services_from_kv_store() sleep(10) all_services = consul_query('agent/services') if 'consul' in all_services: del all_services['consul'] for service_id, service_dict in six.iteritems(all_services): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = create_consul_services_key(ship_name, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: save_container(ship_name, service_id, 'started', ship_ip=ship_ip) get_logger().info( 'Added running service: {}'.format(service_id)) except Exception: get_logger().exception('Unable to add running services.')
def _get_armada_size(): try: catalog_nodes_dict = consul_query('catalog/nodes') return len(catalog_nodes_dict) except Exception as e: get_logger().exception(e) return 0
def get_ship_names(): try: catalog_nodes_dict = consul_query('catalog/nodes') ship_names = list(get_ship_name(consul_node['Address']) for consul_node in catalog_nodes_dict) return ship_names except: return []
def __get_armada_address(): if __are_we_in_armada_container(): return 'http://127.0.0.1' agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://127.0.0.1:{}'.format(service['Port'])
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) armada_size = _get_armada_size() if armada_size > 1: return self.status_error('Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query('agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error('Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings(consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup('hermes_init') get_logger().info('hermes_init start: {}'.format(hermes_init_output)) return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def POST(self): consul_host, error = self.get_post_parameter('host') if error: return self.status_error(error) armada_size = _get_armada_size() if armada_size > 1: return self.status_error('Currently only single ship armadas can join the others. ' 'Your armada has size: {armada_size}.'.format(armada_size=armada_size)) try: agent_self_dict = consul_query('agent/self', consul_address='{consul_host}:8500'.format(**locals())) datacenter = agent_self_dict['Config']['Datacenter'] except: return self.status_error('Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings(consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): return self.status_ok() return self.status_error('Waiting for armada restart timed out.')
def get_armada_address(ship_name): catalog_service_armada = consul_query('catalog/service/armada') for service_armada in catalog_service_armada: if service_armada['Node'] == ship_name: return service_armada['Address'] + ':' + str(service_armada['ServicePort']) return None
def wait_for_consul_ready(timeout_seconds=60): timeout_expiration = time.time() + timeout_seconds while time.time() < timeout_expiration: time.sleep(1) try: agent_self_dict = consul_query('agent/self') ship_name = agent_self_dict['Config']['NodeName'] health_service_armada = consul_query('health/service/armada') for health_armada in health_service_armada: if health_armada['Node']['Node'] == ship_name: if all(check['Status'] == 'passing' for check in health_armada['Checks']): return True except: pass return False
def __get_armada_address(ship_name=None): if not ship_name: if __are_we_in_armada_container(): return 'http://127.0.0.1' agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://127.0.0.1:{}'.format(service['Port']) else: ship_ip = ship_name_to_ip(ship_name) service_armada_dict = consul_query('catalog/service/armada') for service_armada in service_armada_dict: if service_armada['Address'] == ship_ip: return 'http://{0}:{1}'.format(ship_ip, service_armada['ServicePort']) raise ValueError('Cannot find ship: {0}.'.format(ship_name))
def get_ship_ip(): """ It get ship advertise IP address. It can be different than external IP, when external IP changes after ship first start. """ agent_self_dict = consul_query('agent/self') return agent_self_dict['Config']['AdvertiseAddr']
def _get_local_services_from_catalog(): all_services = consul_query('agent/services') if 'consul' in all_services: del all_services['consul'] return { key: value for key, value in all_services.items() if value['Service'] != 'armada' }
def get_armada_address(ship_name): catalog_service_armada = consul_query('catalog/service/armada') for service_armada in catalog_service_armada: if service_armada['Node'] == ship_name: return service_armada['Address'] + ':' + str( service_armada['ServicePort']) return None
def get_matched_containers(microservice_name_or_container_id_prefix): service_names = list(consul_query('catalog/services').keys()) matched_containers_by_name = [] matched_containers_by_id = [] for service_name in service_names: try: query = 'catalog/service/{service_name}'.format(**locals()) instances = consul_query(query) except Exception as e: print_err('WARNING: query "{query}" failed ({exception_class}: {exception})'.format( query=query, exception_class=type(e).__name__, exception=e)) instances = [] for instance in instances: container_id = instance['ServiceID'].split(':')[0] service_name = instance['ServiceName'] if microservice_name_or_container_id_prefix == service_name: matched_containers_by_name.append(instance) if container_id.startswith(microservice_name_or_container_id_prefix) and ":" not in instance['ServiceID']: matched_containers_by_id.append(instance) matched_containers_by_name_count = len(matched_containers_by_name) matched_containers_by_id_count = len(matched_containers_by_id) if matched_containers_by_name_count and matched_containers_by_id_count: raise ArmadaCommandException( 'Found matching containers with both microservice name ({matched_containers_by_name_count}) ' 'and container_id ({matched_containers_by_id_count}). ' 'Please provide more specific criteria.'.format(**locals())) if matched_containers_by_id_count > 1: raise ArmadaCommandException( 'There are too many ({matched_containers_by_id_count}) matching containers. ' 'Please provide more specific container_id.'.format(**locals())) matched_containers = matched_containers_by_name + matched_containers_by_id matches_count = len(matched_containers) if matches_count == 0: raise ArmadaCommandException( 'There are no running containers with microservice: ' '{microservice_name_or_container_id_prefix}'.format(**locals())) return matched_containers
def _restart_consul(): # Services will be registered again by their script 'register_in_service_discovery'. agent_self_dict = consul_query('agent/self') node_name = agent_self_dict['Config']['NodeName'] request_body = json.dumps({'Node': node_name}) consul_put('catalog/deregister', data=request_body) os.system('consul leave') return wait_for_consul_ready()
def deregister_services(container_id): services_dict = consul_query('agent/services') for service_id, service_dict in services_dict.items(): if service_id.startswith(container_id): consul_get('agent/service/deregister/{service_id}'.format(**locals())) try: kv.kv_remove("start_timestamp/" + container_id) except Exception as e: get_logger().exception(e)
def get_ship_names(): try: catalog_nodes_dict = consul_query('catalog/nodes') ship_names = list( get_ship_name(consul_node['Address']) for consul_node in catalog_nodes_dict) return ship_names except: return []
def deregister_services(container_id): services_dict = consul_query('agent/services') for service_id, service_dict in services_dict.items(): if service_id.startswith(container_id): consul_get('agent/service/deregister/{service_id}'.format(**locals())) try: kv.kv_remove("start_timestamp/" + container_id) except Exception as e: traceback.print_exc()
def kv_get_recurse(key): query_result = consul_query('kv/{key}?recurse=true'.format(**locals())) if query_result is None: return None return { item['Key'].replace(key, ''): json.loads(base64.b64decode(item['Value'])) for item in query_result }
def command_info(args): catalog_nodes_dict = consul_query('catalog/nodes') output_header = [ 'Ship name', 'Ship role', 'API address', 'API status', 'Version' ] output_rows = [output_header] ship_role_counts = {'ship': 0, 'commander': 0, 'leader': 0, '?': 0} for consul_node in catalog_nodes_dict: ship_name = consul_node['Node'] ship_ip = consul_node['Address'] service_armada_address = get_armada_address(ship_name) or ship_ip service_armada_status = get_armada_status(ship_name) service_armada_version = get_armada_version(service_armada_address) try: ship_role = get_ship_role(ship_ip) except: ship_role = '?' if service_armada_status == 'passing': ship_role_counts[ship_role] += 1 if ship_name.startswith('ship-'): ship_name = ship_name[5:] output_rows.append([ ship_name, ship_role, service_armada_address, service_armada_status, service_armada_version ]) print_table(output_rows) if ship_role_counts['leader'] == 0: print('\nERROR: There is no active leader. Armada is not working!', file=sys.stderr) elif ship_role_counts['commander'] == 0: print('\nWARNING: We cannot survive leader leaving/failure.', file=sys.stderr) print( 'Such configuration should only be used in development environments.', file=sys.stderr) elif ship_role_counts['commander'] == 1: print( '\nWARNING: We can survive leaving of commander but commander failure or leader leave/failure will be ' 'fatal.', file=sys.stderr) print( 'Such configuration should only be used in development environments.', file=sys.stderr) else: failure_tolerance = ship_role_counts['commander'] / 2 print('\nWe can survive failure of {0} {1} (including leader).'.format( failure_tolerance, 'commander' if failure_tolerance == 1 else 'commanders'), file=sys.stderr)
def command_ssh(args): microservice_name = args.microservice_name or os.environ["MICROSERVICE_NAME"] if not microservice_name: raise ValueError("No microservice name supplied.") instances = armada_utils.get_matched_containers(microservice_name) instances_count = len(instances) if instances_count > 1: raise armada_utils.ArmadaCommandException( "There are too many ({instances_count}) matching containers. " "Provide more specific container_id or microservice name.".format(**locals()) ) instance = instances[0] if "kv_index" in instance: raise armada_utils.ArmadaCommandException("Cannot connect to not running service.") service_id = instance["ServiceID"] container_id = service_id.split(":")[0] payload = {"container_id": container_id} is_local = False local_microservices_ids = set(consul_query("agent/services").keys()) if container_id in local_microservices_ids: is_local = True if args.command: command = " ".join(args.command) else: command = "bash" args.tty = True args.interactive = True tty = "-t" if args.tty else "" interactive = "-i" if args.interactive else "" term = os.environ.get("TERM") or "dummy" command = pipes.quote(command) docker_command = "docker exec {interactive} {tty} {container_id} env TERM={term} " "sh -c {command}".format( **locals() ) if is_local: print("Connecting to {0}...".format(instance["ServiceName"])) ssh_args = shlex.split(docker_command) else: ssh_host = instance["Address"] docker_key_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "keys/docker.key") remote_ssh_chunk = "ssh -t {tty} -p 2201 -i {docker_key_file} -o StrictHostKeyChecking=no docker@{ssh_host}".format( **locals() ) ssh_args = shlex.split(remote_ssh_chunk) ssh_args.extend(("sudo", docker_command)) print("Connecting to {0} on host {1}...".format(instance["ServiceName"], ssh_host)) os.execvp(ssh_args[0], ssh_args)
def get_container_ssh_address(container_id): docker_api = docker_client.api() docker_inspect = docker_api.inspect_container(container_id) ssh_port = docker_inspect['NetworkSettings']['Ports']['22/tcp'][0]['HostPort'] agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] return '{service_ip}:{ssh_port}'.format(**locals())
def command_ssh(args): microservice_name = args.microservice_name or os.environ[ 'MICROSERVICE_NAME'] if not microservice_name: raise ValueError('No microservice name supplied.') instances = armada_utils.get_matched_containers(microservice_name) instances_count = len(instances) if instances_count > 1: raise armada_utils.ArmadaCommandException( 'There are too many ({instances_count}) matching containers. ' 'Provide more specific container_id or microservice name.'.format( **locals())) instance = instances[0] service_id = instance['ServiceID'] container_id = service_id.split(':')[0] payload = {'container_id': container_id} is_local = False local_microservices_ids = set(consul_query('agent/services').keys()) if container_id in local_microservices_ids: is_local = True if args.command: command = ' '.join(args.command) else: command = 'bash' args.tty = True args.interactive = True tty = '-t' if args.tty else '' interactive = '-i' if args.interactive else '' term = os.environ.get('TERM') or 'dummy' command = pipes.quote(command) docker_command = 'docker exec {interactive} {tty} {container_id} env TERM={term} ' \ 'sh -c {command}'.format(**locals()) if is_local: print("Connecting to {0}...".format(instance['ServiceName'])) ssh_args = shlex.split(docker_command) else: ssh_host = instance['Address'] docker_key_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'keys/docker.key') remote_ssh_chunk = 'ssh -t {tty} -p 2201 -i {docker_key_file} -o StrictHostKeyChecking=no docker@{ssh_host}'\ .format(**locals()) ssh_args = shlex.split(remote_ssh_chunk) ssh_args.extend(('sudo', docker_command)) print("Connecting to {0} on host {1}...".format( instance['ServiceName'], ssh_host)) os.execvp(ssh_args[0], ssh_args)
def get_other_ship_ips(): try: catalog_nodes_dict = consul_query('catalog/nodes') ship_ips = list(consul_node['Address'] for consul_node in catalog_nodes_dict) my_ship_ip = get_ship_ip() if my_ship_ip in ship_ips: ship_ips.remove(my_ship_ip) return ship_ips except: return []
def deregister_services(container_id): services_dict = consul_query('agent/services') for service_id, service_dict in services_dict.items(): if service_id.startswith(container_id): consul_get('agent/service/deregister/{service_id}'.format(**locals())) try: kv.kv_remove("start_timestamp/" + container_id) except Exception as e: get_logger().exception(e) try: kv.kv_remove("single_active_instance/" + service_id) except Exception as e: get_logger().exception(e)
def _start_container(self, long_container_id): docker_api = docker_client.api() docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def get_armada_status(ship_name): health_service_armada = consul_query('health/service/armada') service_armada_status = '-' for health_armada in health_service_armada: if health_armada['Node']['Node'] == ship_name: service_checks_statuses = set(check['Status'] for check in (health_armada['Checks'] or [])) for possible_status in ['passing', 'warning', 'critical']: if possible_status in service_checks_statuses: service_armada_status = possible_status return service_armada_status return service_armada_status
def command_ssh(args): microservice_name = args.microservice_name or os.environ['MICROSERVICE_NAME'] if not microservice_name: raise ValueError('No microservice name supplied.') instances = armada_utils.get_matched_containers(microservice_name) instances_count = len(instances) if instances_count > 1: raise armada_utils.ArmadaCommandException( 'There are too many ({instances_count}) matching containers. ' 'Provide more specific container_id or microservice name.'.format(**locals())) instance = instances[0] service_id = instance['ServiceID'] container_id = service_id.split(':')[0] payload = {'container_id': container_id} is_local = False local_microservices_ids = set(consul_query('agent/services').keys()) if container_id in local_microservices_ids: is_local = True if args.command: command = ' '.join(args.command) else: command = 'bash' args.tty = True args.interactive = True tty = '-t' if args.tty else '' interactive = '-i' if args.interactive else '' term = os.environ.get('TERM') or 'dummy' command = pipes.quote(command) docker_command = 'docker exec {interactive} {tty} {container_id} env TERM={term} ' \ 'sh -c {command}'.format(**locals()) if is_local: print("Connecting to {0}...".format(instance['ServiceName'])) ssh_args = shlex.split(docker_command) else: ssh_host = instance['Address'] docker_key_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'keys/docker.key') remote_ssh_chunk = 'ssh -t {tty} -p 2201 -i {docker_key_file} -o StrictHostKeyChecking=no docker@{ssh_host}' \ .format(**locals()) ssh_args = shlex.split(remote_ssh_chunk) ssh_args.extend(('sudo', docker_command)) print("Connecting to {0} on host {1}...".format(instance['ServiceName'], ssh_host)) os.execvp(ssh_args[0], ssh_args)
def _add_running_services_at_startup(containers_saved_in_kv, ship): wait_for_consul_ready() # wait for registering services sleep(10) all_services = consul_query('agent/services') del all_services['consul'] for service_id, service_dict in all_services.items(): if ':' in service_id: continue if service_dict['Service'] == 'armada': continue key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id) if not containers_saved_in_kv or key not in containers_saved_in_kv: kv.save_service(ship, service_id, 'started')
def __get_armada_address(ship_name=None): if not ship_name: if __are_we_in_armada_container(): return 'http://127.0.0.1' try: agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': return 'http://127.0.0.1:{}'.format(service['Port']) except ConsulException as e: get_logger(__file__).warning('Could not get armada port from consul ({}), falling back to 8900.'.format(e)) return 'http://127.0.0.1:8900' else: if not is_ip(ship_name): ship_ip = ship_name_to_ip(ship_name) else: ship_ip = ship_name service_armada_dict = consul_query('catalog/service/armada') for service_armada in service_armada_dict: if service_armada['Address'] == ship_ip: return 'http://{0}:{1}'.format(ship_ip, service_armada['ServicePort']) raise ValueError('Cannot find ship: {0}.'.format(ship_name))
def command_version(args): version = "none" agent_services_dict = consul_query('agent/services') for service in agent_services_dict.values(): if service['Service'] == 'armada': port = service['Port'] url = "http://localhost:{port}/version".format(**locals()) result = requests.get(url) try: version = result.text except AttributeError: version = "error" break print(version)
def get_armada_status(ship_name): health_service_armada = consul_query('health/service/armada') service_armada_status = '-' for health_armada in health_service_armada: if health_armada['Node']['Node'] == ship_name: service_checks_statuses = set( check['Status'] for check in (health_armada['Checks'] or [])) for possible_status in ['passing', 'warning', 'critical']: if possible_status in service_checks_statuses: service_armada_status = possible_status return service_armada_status return service_armada_status
def on_post(self, req, resp): consul_host, error = self.get_post_parameter(req, 'host') if error: return self.status_error(resp, error) ship = get_ship_name() local_services_data = { key: kv.kv_get(key) for key in get_local_services_from_kv_store() } armada_size = _get_armada_size() if armada_size > 1: return self.status_error( resp, 'Currently only single ship armadas can join the others. ' 'Your armada has size: {0}.'.format(armada_size)) try: agent_self_dict = consul_query( 'agent/self', consul_address='{0}:8500'.format(consul_host)) datacenter = agent_self_dict['Config']['Datacenter'] except Exception as e: get_logger().exception(e) return self.status_error( resp, 'Could not read remote host datacenter address.') current_consul_mode = _get_current_consul_mode() if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP: override_runtime_settings( consul_mode=consul_config.ConsulMode.CLIENT, ship_ips=[consul_host], datacenter=datacenter) else: override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(), datacenter=datacenter) if _restart_consul(): supervisor_server = xmlrpc.client.Server( 'http://localhost:9001/RPC2') hermes_init_output = supervisor_server.supervisor.startProcessGroup( 'hermes_init') get_logger().info('hermes_init start: %s', hermes_init_output) set_ship_name(ship) for key, data in six.iteritems(local_services_data): kv.kv_set(key, data) return self.status_ok(resp) return self.status_error(resp, 'Waiting for armada restart timed out.')
def command_ssh(args): microservice_name = args.microservice_name or os.environ['MICROSERVICE_NAME'] if not microservice_name: raise ValueError('No microservice name supplied.') instances = armada_utils.get_matched_containers(microservice_name) instances_count = len(instances) if instances_count > 1: raise armada_utils.ArmadaCommandException( 'There are too many ({instances_count}) matching containers. ' 'Provide more specific container_id or microservice name.'.format(**locals())) instance = instances[0] service_id = instance['ServiceID'] container_id = service_id.split(':')[0] payload = {'container_id': container_id} is_local = False local_microservices_ids = set(consul_query('agent/services').keys()) if container_id in local_microservices_ids: is_local = True if not is_local: result = json.loads(armada_api.get('ssh-address', payload, ship_name=instance['Node'])) if result['status'] != 'ok': raise armada_utils.ArmadaCommandException('armada API error: {0}'.format(result['error'])) ssh_host = result['ssh'].split(':')[0] docker_key_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'keys/docker.key') tty = '-t' if args.command: command = ' '.join(args.command) if command.startswith('bash'): tty = '' else: command = 'bash' ssh_command = 'docker exec -i {tty} {container_id} env TERM=$TERM {command}'.format(**locals()) if is_local: print("Connecting to {0}...".format(instance['ServiceName'])) else: ssh_command = 'ssh -t {tty} -p 2201 -i {docker_key_file} -o StrictHostKeyChecking=no docker@{ssh_host} sudo {ssh_command}'.format(**locals()) print("Connecting to {0} on host {1}...".format(instance['ServiceName'], ssh_host)) subprocess.call(ssh_command, shell=True)
def _start_container(self, long_container_id): docker_api = docker_client.api(timeout=30) docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) ship = get_ship_name() container_id = shorten_container_id(long_container_id) kv.save_container(ship, container_id, status='started') for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def _start_container(self, long_container_id): docker_api = docker_client.api(timeout=30) docker_api.start(long_container_id) service_endpoints = {} agent_self_dict = consul_query('agent/self') service_ip = agent_self_dict['Config']['AdvertiseAddr'] docker_inspect = docker_api.inspect_container(long_container_id) ship = get_ship_name() container_id = shorten_container_id(long_container_id) save_service(ship, container_id, status='started') for container_port, host_address in docker_inspect['NetworkSettings']['Ports'].items(): service_endpoints['{0}:{1}'.format(service_ip, host_address[0]['HostPort'])] = container_port return service_endpoints
def deregister_not_running_services(): services_ids = get_local_services_ids() containers_ids = get_running_container_ids() for service_id in services_ids: if service_id != 'consul': container_id = service_id.split(':')[0] if container_id not in containers_ids: name = consul_query('agent/services')[service_id]['Service'] params = get_container_parameters(container_id) kv_index = 0 if kv.kv_list('service/{}/'.format(name)): kv_index = int( kv.kv_list( 'service/{}/'.format(name))[-1].split('/')[2]) + 1 kv.save_service(name, kv_index, 'crashed', params, container_id) deregister_services(container_id)
def command_info(args): catalog_nodes_dict = consul_query('catalog/nodes') output_header = ['Ship name', 'Ship role', 'API address', 'API status', 'Version'] output_rows = [output_header] ship_role_counts = {'ship': 0, 'commander': 0, 'leader': 0, '?': 0} for consul_node in catalog_nodes_dict: ship_name = consul_node['Node'] ship_ip = consul_node['Address'] service_armada_address = get_armada_address(ship_name) or ship_ip service_armada_status = get_armada_status(ship_name) service_armada_version = get_armada_version(service_armada_address) try: ship_role = get_ship_role(ship_ip) except: ship_role = '?' if service_armada_status == 'passing': ship_role_counts[ship_role] += 1 if ship_name.startswith('ship-'): ship_name = ship_name[5:] output_rows.append([ship_name, ship_role, service_armada_address, service_armada_status, service_armada_version]) print_table(output_rows) if ship_role_counts['leader'] == 0: print('\nERROR: There is no active leader. Armada is not working!', file=sys.stderr) elif ship_role_counts['commander'] == 0: print('\nWARNING: We cannot survive leader leaving/failure.', file=sys.stderr) print('Such configuration should only be used in development environments.', file=sys.stderr) elif ship_role_counts['commander'] == 1: print('\nWARNING: We can survive leaving of commander but commander failure or leader leave/failure will be ' 'fatal.', file=sys.stderr) print('Such configuration should only be used in development environments.', file=sys.stderr) else: failure_tolerance = ship_role_counts['commander'] / 2 print('\nWe can survive failure of {0} {1} (including leader).'.format( failure_tolerance, 'commander' if failure_tolerance == 1 else 'commanders'), file=sys.stderr)
def _consul_discover(service_name): service_addresses = set() try: query = 'health/service/{service_name}'.format(service_name=service_name) instances = consul_query(query) except ConsulException: pass for instance in instances: service_checks_statuses = (check['Status'] for check in instance['Checks']) if any(status == 'critical' for status in service_checks_statuses): continue service_ip = instance['Node']['Address'] service_port = instance['Service']['Port'] service_address = '{service_ip}:{service_port}'.format( service_ip=service_ip, service_port=service_port) service_addresses.add(service_address) return service_addresses