Пример #1
0
def _get_runtime_settings():
    try:
        shutil.copy(consul_config.RUNTIME_SETTINGS_PATH,
                    consul_config.ORIGINAL_RUNTIME_SETTINGS_PATH)
        with open(consul_config.ORIGINAL_RUNTIME_SETTINGS_PATH
                  ) as runtime_settings_json:
            runtime_settings = json.load(runtime_settings_json)
    except Exception as e:
        get_logger().exception(e)
        runtime_settings = {}

    try:
        if os.path.isfile(consul_config.OVERRIDE_RUNTIME_SETTINGS_PATH):
            with open(consul_config.OVERRIDE_RUNTIME_SETTINGS_PATH
                      ) as runtime_settings_json:
                runtime_settings.update(json.load(runtime_settings_json))
    except Exception as e:
        get_logger().exception(e)

    ship_ips = runtime_settings.get('ships', [])
    consul_mode = consul_config.ConsulMode.BOOTSTRAP
    if runtime_settings.get('is_commander') is True:
        if ship_ips and len(ship_ips) > 0:
            consul_mode = consul_config.ConsulMode.SERVER
    if runtime_settings.get('is_commander') is False:
        consul_mode = consul_config.ConsulMode.CLIENT

    if runtime_settings.get('datacenter'):
        datacenter = runtime_settings.get('datacenter')
    else:
        datacenter = 'dc-' + str(random.randrange(1000000))

    ship_name = runtime_settings.get('name')

    return consul_mode, ship_ips, datacenter, ship_name
Пример #2
0
 def status_exception(self, message, exception):
     get_logger().exception(exception)
     error_msg = "API exception: {0}. {1} - {2}".format(
         message,
         type(exception).__name__, str(exception))
     web.header('Content-Type', 'application/json')
     return _create_response_with_error(error_msg)
Пример #3
0
def _get_armada_size():
    try:
        catalog_nodes_dict = consul_query('catalog/nodes')
        return len(catalog_nodes_dict)
    except Exception as e:
        get_logger().exception(e)
        return 0
Пример #4
0
def _parse_single_ship(services_dict, filter_microservice_name, filter_env,
                       filter_app_id):
    try:
        services_list = list(services_dict)
    except AttributeError:
        services_list = None

    result = {}
    if not services_list:
        return result

    if filter_microservice_name:
        services_list = fnmatch.filter(
            services_list, 'services/*/{}/*'.format(filter_microservice_name))

    for service in services_list:
        service_dict = services_dict[service]
        microservice_name = service_dict['ServiceName']
        microservice_status = service_dict['Status']
        microservice_id = service_dict['ServiceID']
        container_id = service_dict['container_id']
        microservice_start_timestamp = service_dict['start_timestamp']
        single_active_instance = service_dict.get('single_active_instance',
                                                  False)
        microservice_version = service_dict.get('microservice_version')
        not_available = 'n/a'

        microservice_tags_dict = {}
        try:
            if service_dict['params']['microservice_env']:
                microservice_tags_dict['env'] = service_dict['params'][
                    'microservice_env']
            if service_dict['params']['microservice_app_id']:
                microservice_tags_dict['app_id'] = service_dict['params'][
                    'microservice_app_id']
        except KeyError as e:
            get_logger().warning(repr(e))

        matches_env = (filter_env is None) or (
            filter_env == microservice_tags_dict.get('env'))
        matches_app_id = (filter_app_id is None) or (
            filter_app_id == microservice_tags_dict.get('app_id'))

        if matches_env and matches_app_id:
            microservice_dict = {
                'name': microservice_name,
                'status': microservice_status,
                'address': not_available,
                'microservice_id': microservice_id,
                'container_id': container_id,
                'tags': microservice_tags_dict,
                'start_timestamp': microservice_start_timestamp,
                'single_active_instance': single_active_instance,
            }
            if microservice_version:
                microservice_dict[
                    'microservice_version'] = microservice_version
            result[microservice_id] = microservice_dict

    return result
Пример #5
0
def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        kv.update_container_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: %s", json.dumps(services_to_be_recovered))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
            else:
                kv.kv_remove(service)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        services_to_be_recovered = services_not_recovered
        recovery_retry_count += 1

    for service in services_to_be_recovered:
        kv.update_container_status('not-recovered', key=service)

    return services_to_be_recovered
Пример #6
0
    def POST(self):
        consul_host, error = self.get_post_parameter('host')
        if error:
            return self.status_error(error)

        armada_size = _get_armada_size()
        if armada_size > 1:
            return self.status_error('Currently only single ship armadas can join the others. '
                                     'Your armada has size: {0}.'.format(armada_size))

        try:
            agent_self_dict = consul_query('agent/self', consul_address='{0}:8500'.format(consul_host))
            datacenter = agent_self_dict['Config']['Datacenter']
        except:
            return self.status_error('Could not read remote host datacenter address.')

        current_consul_mode = _get_current_consul_mode()
        if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP:
            override_runtime_settings(consul_mode=consul_config.ConsulMode.CLIENT,
                                      ship_ips=[consul_host],
                                      datacenter=datacenter)
        else:
            override_runtime_settings(ship_ips=[consul_host] + get_other_ship_ips(),
                                      datacenter=datacenter)

        if _restart_consul():
            supervisor_server = xmlrpclib.Server('http://localhost:9001/RPC2')
            hermes_init_output = supervisor_server.supervisor.startProcessGroup('hermes_init')
            get_logger().info('hermes_init start: {}'.format(hermes_init_output))
            return self.status_ok()
        return self.status_error('Waiting for armada restart timed out.')
Пример #7
0
def main():
    args = _parse_args()
    if not args.force and not _is_recovery_completed():
        get_logger().warning('Recovery is not completed. Aborting saving running containers.')
        return
    saved_containers_path = args.saved_containers_path
    try:
        wait_for_consul_ready()
        containers_ids = get_local_containers_ids()
        containers_parameters_list = []
        errors_count = 0
        for container_id in containers_ids:
            try:
                container_parameters = get_container_parameters(container_id)
                if container_parameters:
                    containers_parameters_list.append(container_parameters)
            except:
                errors_count += 1
                get_logger().error('ERROR on getting container parameters for {}:'.format(container_id))
                traceback.print_exc()
        containers_parameters_list.sort()
        # Don't overwrite saved containers' list if it would become empty because of errors.
        if containers_parameters_list or not errors_count:
            _save_containers_parameters_list_in_file(containers_parameters_list, saved_containers_path)
            get_logger().info('Containers have been saved to {}.'.format(saved_containers_path))
            try:
                _save_containers_parameters_list_in_kv_store(containers_parameters_list)
                get_logger().info('Containers have been saved to kv store.')
            except:
                traceback.print_exc()
        else:
            get_logger().info('Aborted saving container because of errors.')
    except:
        traceback.print_exc()
        sys.exit(1)
Пример #8
0
def _load_from_list(saved_containers, ship_name, ship_ip):
    wait_for_consul_ready()
    running_containers = _get_local_running_containers()
    containers_to_be_added = _multiset_difference(saved_containers, running_containers)
    for container_parameters in containers_to_be_added:
        get_logger().info('Added service: {}'.format(container_parameters))
        save_container(ship_name, _generate_id(), 'crashed', params=container_parameters, ship_ip=ship_ip)
Пример #9
0
def _load_from_list(saved_containers, ship):
    wait_for_consul_ready()
    running_containers = _get_local_running_containers()
    containers_to_be_added = _multiset_difference(saved_containers, running_containers)
    for container_parameters in containers_to_be_added:
        get_logger().info('Added service: {}'.format(container_parameters))
        kv.save_container(ship, _generate_id(), 'crashed', params=container_parameters)
Пример #10
0
def _add_running_services_at_startup():
    wait_for_consul_ready()
    try:
        ship_ip, ship_name = get_ship_ip_and_name()
        containers_saved_in_kv = get_local_services_from_kv_store()
        sleep(10)
        all_services = consul_query('agent/services')
        if 'consul' in all_services:
            del all_services['consul']
        for service_id, service_dict in six.iteritems(all_services):
            if ':' in service_id:
                continue
            if service_dict['Service'] == 'armada':
                continue
            key = create_consul_services_key(ship_name,
                                             service_dict['Service'],
                                             service_id)
            if not containers_saved_in_kv or key not in containers_saved_in_kv:
                save_container(ship_name,
                               service_id,
                               'started',
                               ship_ip=ship_ip)
                get_logger().info(
                    'Added running service: {}'.format(service_id))
    except Exception:
        get_logger().exception('Unable to add running services.')
Пример #11
0
def recover_containers_from_kv_store():
    services_to_be_recovered = _get_crashed_services()

    for service in services_to_be_recovered:
        kv.update_container_status('recovering', key=service)

    recovery_retry_count = 0
    while services_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: %s",
                          json.dumps(services_to_be_recovered))
        services_not_recovered = []

        for service in services_to_be_recovered:
            service_parameters = kv.kv_get(service)['params']
            if not _recover_container(service_parameters):
                services_not_recovered.append(service)
            else:
                kv.kv_remove(service)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        services_to_be_recovered = services_not_recovered
        recovery_retry_count += 1

    for service in services_to_be_recovered:
        kv.update_container_status('not-recovered', key=service)

    return services_to_be_recovered
Пример #12
0
def _load_containers_to_kv_store(saved_containers_path):
    wait_for_consul_ready()
    try:
        ship_ip, ship_name = get_ship_ip_and_name()
        saved_containers = _load_saved_containers_parameters(saved_containers_path)
        _load_from_dict(saved_containers, ship_name, ship_ip)
    except:
        get_logger().exception('Unable to load from %s', saved_containers_path)
Пример #13
0
def _is_recovery_completed():
    try:
        if os.path.isfile(RECOVERY_COMPLETED_PATH):
            with open(RECOVERY_COMPLETED_PATH) as recovery_completed_file:
                if recovery_completed_file.read() == '1':
                    return True
    except Exception as e:
        get_logger().exception(e)
    return False
Пример #14
0
 def get_get_parameter(self, req, parameter_name):
     try:
         get_data = req.get_parameter(parameter_name)
         result = get_data[parameter_name]
     except Exception as e:
         get_logger().debug(e, exc_info=True)
         return None, "Invalid input data - no parameter '{0}'.".format(
             parameter_name)
     return result, None
Пример #15
0
    def _create_service(self, image_path=None, microservice_name=None, microservice_env=None, microservice_app_id=None,
                        dockyard_user=None, dockyard_password=None, ports=None, environment=None, volumes=None,
                        run_command=None, resource_limits=None, configs=None, **kwargs):
        # Check required fields in received JSON:
        if not image_path:
            raise ValueError('Field image_path cannot be empty.')
        if not run_command:
            raise ValueError('Field run_command cannot be empty.')

        if kwargs:
            get_logger().warning('JSON data sent to API contains unrecognized keys: {}'.format(list(kwargs.keys())))

        # Set default values:
        environment = environment or {}
        ports = ports or {}
        volumes = volumes or {}
        resource_limits = resource_limits or {}
        configs = configs or []
        image_name = split_image_path(image_path)[1]
        microservice_name = microservice_name or environment.get('MICROSERVICE_NAME') or image_name
        microservice_env = microservice_env or environment.get('MICROSERVICE_ENV')
        microservice_app_id = microservice_app_id or environment.get('MICROSERVICE_APP_ID')

        # Update environment variables with armada-specific values:
        restart_parameters = {
            'image_path': image_path,
            'microservice_name': microservice_name,
            'microservice_env': microservice_env,
            'microservice_app_id': microservice_app_id,
            'dockyard_user': dockyard_user,
            'dockyard_password': dockyard_password,
            'ports': ports,
            'environment': environment,
            'volumes': volumes,
            'run_command': run_command,
            'resource_limits': resource_limits,
            'configs': configs,
        }
        environment['RESTART_CONTAINER_PARAMETERS'] = base64.b64encode(json.dumps(restart_parameters, sort_keys=True))
        environment['ARMADA_RUN_COMMAND'] = base64.b64encode(run_command)
        environment['MICROSERVICE_NAME'] = microservice_name
        if microservice_env:
            environment['MICROSERVICE_ENV'] = microservice_env
        if microservice_app_id:
            environment['MICROSERVICE_APP_ID'] = microservice_app_id
        config_path, hermes_volumes = process_hermes(microservice_name, image_name, microservice_env,
                                                     microservice_app_id, configs)
        if config_path:
            environment['CONFIG_PATH'] = config_path

        volumes[docker_client.DOCKER_SOCKET_PATH] = docker_client.DOCKER_SOCKET_PATH
        volumes.update(hermes_volumes or {})
        long_container_id = self._create_container(
            image_path, microservice_name, ports, environment, volumes,
            dockyard_user, dockyard_password, resource_limits)
        return long_container_id
def recover_saved_containers_from_parameters(saved_containers):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        _load_from_dict(saved_containers, ship)
    except Exception as e:
        get_logger().exception(e)

    containers_to_be_recovered = recover_containers_from_kv_store()
    return containers_to_be_recovered
def _check_if_we_should_recover(saved_containers_path):
    try:
        if int(os.environ.get('DOCKER_START_TIMESTAMP')) > int(os.path.getmtime(saved_containers_path)):
            get_logger().info('Docker daemon restart detected.')
            return True
        else:
            get_logger().info('No need to recover.')
            return False
    except:
        return False
Пример #18
0
def _get_services_list(filter_microservice_name, filter_env, filter_app_id, filter_local):
    if filter_local:
        ship_list = [get_ship_name()]
    else:
        ship_list = get_ship_names()
    services_dict = {}
    if not ship_list:
        return {}
    for ship in ship_list:
        containers = kv.kv_get('containers_parameters_list/{}'.format(ship))
        if containers and isinstance(containers, dict):
            services_dict.update(containers)

    services_list = services_dict.keys()

    result = {}
    if not services_list:
        return result

    if filter_microservice_name:
        services_list = fnmatch.filter(services_list, 'ships/*/service/{}/*'.format(filter_microservice_name))

    for service in services_list:
        service_dict = services_dict[service]
        microservice_name = service_dict['ServiceName']
        microservice_status = service_dict['Status']
        microservice_id = service_dict['ServiceID']
        container_id = service_dict['container_id']
        microservice_start_timestamp = service_dict['start_timestamp']
        not_available = 'n/a'

        microservice_tags_dict = {}
        try:
            if service_dict['params']['microservice_env']:
                microservice_tags_dict['env'] = service_dict['params']['microservice_env']
            if service_dict['params']['microservice_app_id']:
                microservice_tags_dict['app_id'] = service_dict['params']['microservice_app_id']
        except KeyError as e:
            get_logger().warning(repr(e))

        matches_env = (filter_env is None) or (filter_env == microservice_tags_dict.get('env'))
        matches_app_id = (filter_app_id is None) or (filter_app_id == microservice_tags_dict.get('app_id'))

        if matches_env and matches_app_id:
            microservice_dict = {
                'name': microservice_name,
                'status': microservice_status,
                'address': not_available,
                'microservice_id': microservice_id,
                'container_id': container_id,
                'tags': microservice_tags_dict,
                'start_timestamp': microservice_start_timestamp,
            }
            result[microservice_id] = microservice_dict
    return result
Пример #19
0
def _wait_for_armada_start():
    timeout_expiration = time.time() + 30
    while time.time() < timeout_expiration:
        time.sleep(1)
        try:
            health_status = requests.get('http://localhost/health').text
            if health_status == 'ok':
                return
        except:
            pass
    get_logger().error('Could not connect to armada.')
Пример #20
0
def _load_containers_to_kv_store(saved_containers_path):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        saved_containers = _load_saved_containers_parameters_list(saved_containers_path)
        if isinstance(saved_containers, dict):
            _load_from_dict(saved_containers, ship)
        else:
            _load_from_list(saved_containers, ship)
    except:
        get_logger().exception('Unable to load from %s', saved_containers_path)
Пример #21
0
def _wait_for_armada_start():
    timeout_expiration = time.time() + 30
    while time.time() < timeout_expiration:
        time.sleep(1)
        try:
            health_status = requests.get('http://localhost/health').text
            if health_status == 'ok':
                return
        except:
            pass
    get_logger().error('Could not connect to armada.')
Пример #22
0
def _check_if_we_should_recover(saved_containers_path):
    try:
        if int(os.environ.get('DOCKER_START_TIMESTAMP')) > int(
                os.path.getmtime(saved_containers_path)):
            get_logger().info('Docker daemon restart detected.')
            return True
        else:
            get_logger().info('No need to recover.')
            return False
    except:
        return False
Пример #23
0
def get_armada_version(address):
    url = "http://{address}/version".format(address=address)
    version = "error"
    try:
        result = requests.get(url, timeout=0.5)
        result.raise_for_status()
        version = result.text.split()[0]
    except Exception as e:
        get_logger().exception(e)

    return version
Пример #24
0
def _recover_saved_containers_from_path(saved_containers_path):
    wait_for_consul_ready()
    try:
        not_recovered = recover_containers_from_kv_store()
        if not_recovered:
            get_logger().error('Following containers were not recovered: %s', not_recovered)
            return False
        else:
            return True
    except:
        get_logger().exception('Unable to recover from %s.', saved_containers_path)
    return False
Пример #25
0
def _load_containers_to_kv_store(saved_containers_path):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        saved_containers = _load_saved_containers_parameters_list(
            saved_containers_path)
        if isinstance(saved_containers, dict):
            _load_from_dict(saved_containers, ship)
        else:
            _load_from_list(saved_containers, ship)
    except:
        get_logger().exception('Unable to load from %s', saved_containers_path)
Пример #26
0
def _recover_saved_containers_from_path(saved_containers_path):
    wait_for_consul_ready()
    try:
        not_recovered = recover_containers_from_kv_store()
        if not_recovered:
            get_logger().error('Following containers were not recovered: %s', not_recovered)
            return False
        else:
            return True
    except:
        get_logger().exception('Unable to recover from %s.', saved_containers_path)
    return False
Пример #27
0
def recover_saved_containers_from_parameters(saved_containers):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        if isinstance(saved_containers, dict):
            _load_from_dict(saved_containers, ship)
        else:
            _load_from_list(saved_containers, ship)
    except Exception as e:
        get_logger().exception(e)

    containers_to_be_recovered = recover_containers_from_kv_store()
    return containers_to_be_recovered
Пример #28
0
def _fetch_hermes_from_couriers(courier_addresses):
    my_ssh_address = get_container_ssh_address(socket.gethostname())
    for courier_address in courier_addresses:
        courier_url = 'http://{courier_address}/update_hermes'.format(**locals())
        try:
            payload = {'ssh': my_ssh_address, 'path': HERMES_DIRECTORY}
            response = requests.post(courier_url, json.dumps(payload))
            response.raise_for_status()
            if response.text.strip() != 'ok':
                raise Exception('Error response from courier:\n{}'.format(response.text))
        except Exception as e:
            get_logger().error('Fetching all sources from courier %s failed:', courier_address)
            get_logger().exception(e)
def _recover_saved_containers_from_path(saved_containers_path):
    wait_for_consul_ready()
    try:
        not_recovered = recover_containers_from_kv_store()
        if not_recovered:
            get_logger().error('Following containers were not recovered: {}'.format(not_recovered))
            return False
        else:
            return True
    except:
        traceback.print_exc()
        get_logger().error('Unable to recover from {}.'.format(saved_containers_path))
    return False
Пример #30
0
def get_other_ship_ips():
    try:
        catalog_nodes_dict = consul_query('catalog/nodes')
        ship_ips = list(consul_node['Address']
                        for consul_node in catalog_nodes_dict)

        my_ship_ip = get_ship_ip()
        if my_ship_ip in ship_ips:
            ship_ips.remove(my_ship_ip)
        return ship_ips
    except Exception as e:
        get_logger().exception(e)
        return []
def _recover_saved_containers_from_path(saved_containers_path):
    try:
        saved_containers = _load_saved_containers_parameters_list(saved_containers_path)
        not_recovered = recover_saved_containers(saved_containers)
        if not_recovered:
            get_logger().error('Following containers were not recovered: {}'.format(not_recovered))
            return False
        else:
            return True
    except:
        traceback.print_exc()
        get_logger().error('Unable to recover from {}.'.format(saved_containers_path))
    return False
Пример #32
0
def _fetch_hermes_from_couriers(courier_addresses):
    my_ssh_address = get_container_ssh_address(socket.gethostname())
    for courier_address in courier_addresses:
        courier_url = 'http://{courier_address}/update_hermes'.format(**locals())
        try:
            payload = {'ssh': my_ssh_address, 'path': HERMES_DIRECTORY}
            response = requests.post(courier_url, json.dumps(payload))
            response.raise_for_status()
            if response.text.strip() != 'ok':
                raise Exception('Error response from courier:\n{}'.format(response.text))
        except Exception as e:
            get_logger().error('Fetching all sources from courier %s failed:', courier_address)
            get_logger().exception(e)
Пример #33
0
def _load_containers_to_kv_store(saved_containers_path):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        saved_containers = _load_saved_containers_parameters_list(
            saved_containers_path)
        if isinstance(saved_containers, dict):
            _load_from_dict(saved_containers, ship)
        else:
            _load_from_list(saved_containers, ship)
    except:
        traceback.print_exc()
        get_logger().error(
            'Unable to load from {}.'.format(saved_containers_path))
def _load_containers_to_kv_store(saved_containers_path):
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship))
        saved_containers = _load_saved_containers_parameters_list(saved_containers_path)
        _add_running_services_at_startup(containers_saved_in_kv, ship)
        if isinstance(saved_containers, dict):
            _load_from_dict(saved_containers, containers_saved_in_kv, ship)
        else:
            _load_from_list(saved_containers, ship)
    except:
        traceback.print_exc()
        get_logger().error('Unable to load from {}.'.format(saved_containers_path))
Пример #35
0
def _clean_up_kv_store():
    global next_kv_clean_up_timestamp
    if time.time() < next_kv_clean_up_timestamp:
        return
    get_logger().info('Cleaning up kv-store:')
    next_kv_clean_up_timestamp = get_next_kv_clean_up_timestamp()

    services = armada_api.get_json('list')
    valid_container_ids = set(
        service.get('container_id') for service in services)

    start_timestamp_keys = kv.kv_list('start_timestamp/') or []
    for key in start_timestamp_keys:
        container_id = key.split('/')[-1]
        if container_id not in valid_container_ids:
            get_logger().info('Removing key: {}'.format(key))
            kv.kv_remove(key)

    single_active_instance_keys = kv.kv_list('single_active_instance/') or []
    for key in single_active_instance_keys:
        container_id = key.split('/')[-1].split(':')[0]
        if container_id not in valid_container_ids:
            get_logger().info('Removing key: {}'.format(key))
            kv.kv_remove(key)
    get_logger().info('Finished cleaning up kv-store.')
Пример #36
0
def main():
    setup_sentry()
    args = _parse_args()
    saved_containers_path = args.saved_containers_path

    if not args.force and not _is_recovery_completed():
        get_logger().info(
            'Recovery is not completed. Aborting saving running containers.')
        return

    try:
        wait_for_consul_ready()
        saved_containers = get_local_services()
        containers_parameters_dict = {}

        for container in saved_containers:
            container_dict = kv.kv_get(container)
            containers_parameters_dict[container] = container_dict

        if not containers_parameters_dict:
            get_logger().info(
                'Aborted saving container because list is empty.')
            return

        _save_containers_parameters_list_in_file(containers_parameters_dict,
                                                 saved_containers_path)
        get_logger().info(
            'Containers have been saved to {}.'.format(saved_containers_path))

    except Exception as e:
        get_logger().exception(e)
        sys.exit(1)
Пример #37
0
def main():
    setup_sentry()
    args = _parse_args()
    saved_containers_path = args.saved_containers_path

    if not args.force and not _is_recovery_completed():
        get_logger().info(
            'Recovery is not completed. Aborting saving running containers.')
        return

    try:
        wait_for_consul_ready()
        services_key = 'services/{}'.format(get_ship_name())
        containers_parameters = kv.kv_get_recurse(services_key,
                                                  strip_keys=False)

        if not containers_parameters:
            get_logger().info(
                'Aborted saving container because list is empty.')
            return

        _save_containers_parameters_list_in_file(containers_parameters,
                                                 saved_containers_path)
        get_logger().info(
            'Containers have been saved to {}.'.format(saved_containers_path))

    except Exception as e:
        get_logger().exception(e)
        sys.exit(1)
def main():
    args = _parse_args()
    if not args.force and not _is_recovery_completed():
        get_logger().warning('Recovery is not completed. Aborting saving running containers.')
        return
    saved_containers_path = args.saved_containers_path
    try:
        wait_for_consul_ready()
        ship = get_ship_name()
        saved_containers = kv.kv_list('ships/{}/service/'.format(ship))
        containers_parameters_dict = {}
        if saved_containers:
            for container in saved_containers:
                container_dict = kv.kv_get(container)
                containers_parameters_dict[container] = container_dict

        if containers_parameters_dict:
            _save_containers_parameters_list_in_file(containers_parameters_dict, saved_containers_path)
            get_logger().info('Containers have been saved to {}.'.format(saved_containers_path))
            try:
                _save_containers_parameters_list_in_kv_store(containers_parameters_dict)
                get_logger().info('Containers have been saved to kv store.')
            except:
                traceback.print_exc()
        else:
            get_logger().info('Aborted saving container because of errors.')
    except:
        traceback.print_exc()
        sys.exit(1)
Пример #39
0
def _parse_single_ship(services_dict, filter_microservice_name, filter_env, filter_app_id):
    try:
        services_list = services_dict.keys()
    except AttributeError:
        services_list = None

    result = {}
    if not services_list:
        return result

    if filter_microservice_name:
        services_list = fnmatch.filter(services_list, 'ships/*/service/{}/*'.format(filter_microservice_name))

    for service in services_list:
        service_dict = services_dict[service]
        microservice_name = service_dict['ServiceName']
        microservice_status = service_dict['Status']
        microservice_id = service_dict['ServiceID']
        container_id = service_dict['container_id']
        microservice_start_timestamp = service_dict['start_timestamp']
        single_active_instance = service_dict.get('single_active_instance', False)
        not_available = 'n/a'

        microservice_tags_dict = {}
        try:
            if service_dict['params']['microservice_env']:
                microservice_tags_dict['env'] = service_dict['params']['microservice_env']
            if service_dict['params']['microservice_app_id']:
                microservice_tags_dict['app_id'] = service_dict['params']['microservice_app_id']
        except KeyError as e:
            get_logger().warning(repr(e))

        matches_env = (filter_env is None) or (filter_env == microservice_tags_dict.get('env'))
        matches_app_id = (filter_app_id is None) or (filter_app_id == microservice_tags_dict.get('app_id'))

        if matches_env and matches_app_id:
            microservice_dict = {
                'name': microservice_name,
                'status': microservice_status,
                'address': not_available,
                'microservice_id': microservice_id,
                'container_id': container_id,
                'tags': microservice_tags_dict,
                'start_timestamp': microservice_start_timestamp,
                'single_active_instance': single_active_instance,
            }
            result[microservice_id] = microservice_dict

    return result
Пример #40
0
def main():
    setup_sentry()
    try:
        args = _parse_args()
        _add_running_services_at_startup()
        if args.force or _check_if_we_should_recover(args.saved_containers_path):
            _load_containers_to_kv_store(args.saved_containers_path)
            not_recovered = recover_containers_from_kv_store()
            if not_recovered:
                get_logger().error("Containers not recovered: %s", json.dumps(not_recovered))
                sys.exit(1)
            get_logger().info("All containers recovered :)")
    finally:
        with open(RECOVERY_COMPLETED_PATH, 'w') as recovery_completed_file:
            recovery_completed_file.write('1')
Пример #41
0
    def on_post(self, req, resp):
        consul_host, error = self.get_post_parameter(req, 'host')
        if error:
            return self.status_error(resp, error)
        ship = get_ship_name()
        local_services_data = {
            key: kv.kv_get(key)
            for key in get_local_services_from_kv_store()
        }

        armada_size = _get_armada_size()
        if armada_size > 1:
            return self.status_error(
                resp,
                'Currently only single ship armadas can join the others. '
                'Your armada has size: {0}.'.format(armada_size))

        try:
            agent_self_dict = consul_query(
                'agent/self', consul_address='{0}:8500'.format(consul_host))
            datacenter = agent_self_dict['Config']['Datacenter']
        except Exception as e:
            get_logger().exception(e)
            return self.status_error(
                resp, 'Could not read remote host datacenter address.')

        current_consul_mode = _get_current_consul_mode()
        if current_consul_mode == consul_config.ConsulMode.BOOTSTRAP:
            override_runtime_settings(
                consul_mode=consul_config.ConsulMode.CLIENT,
                ship_ips=[consul_host],
                datacenter=datacenter)
        else:
            override_runtime_settings(ship_ips=[consul_host] +
                                      get_other_ship_ips(),
                                      datacenter=datacenter)

        if _restart_consul():
            supervisor_server = xmlrpc.client.Server(
                'http://localhost:9001/RPC2')
            hermes_init_output = supervisor_server.supervisor.startProcessGroup(
                'hermes_init')
            get_logger().info('hermes_init start: %s', hermes_init_output)
            set_ship_name(ship)
            for key, data in six.iteritems(local_services_data):
                kv.kv_set(key, data)
            return self.status_ok(resp)
        return self.status_error(resp, 'Waiting for armada restart timed out.')
Пример #42
0
def _recover_saved_containers_from_path(saved_containers_path):
    wait_for_consul_ready()
    try:
        not_recovered = recover_containers_from_kv_store()
        if not_recovered:
            get_logger().error(
                'Following containers were not recovered: {}'.format(
                    not_recovered))
            return False
        else:
            return True
    except:
        traceback.print_exc()
        get_logger().error(
            'Unable to recover from {}.'.format(saved_containers_path))
    return False
def recover_saved_containers(saved_containers):
    wait_for_consul_ready()
    running_containers = _get_local_running_containers()
    containers_to_be_recovered = _multiset_difference(saved_containers, running_containers)
    recovery_retry_count = 0
    while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: {}".format(json.dumps(containers_to_be_recovered)))
        containers_not_recovered = []
        for container_parameters in containers_to_be_recovered:
            if not _recover_container(container_parameters):
                containers_not_recovered.append(container_parameters)
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        running_containers = _get_local_running_containers()
        containers_to_be_recovered = _multiset_difference(containers_not_recovered, running_containers)
        recovery_retry_count += 1
    return containers_to_be_recovered
Пример #44
0
def main():
    setup_sentry()
    consul_mode, ship_ips, datacenter, ship_name = _get_runtime_settings()
    ship_external_ip = get_external_ip()
    if ship_name is None:
        ship_name = ship_external_ip
    consul_config_content = consul_config.get_consul_config(consul_mode, ship_ips, datacenter, ship_external_ip,
                                                            ship_name)

    with open(consul_config.CONFIG_PATH, 'w') as config_file:
        config_file.write(consul_config_content)

    command = '/usr/local/bin/consul agent -config-file {config_path}'.format(config_path=consul_config.CONFIG_PATH)
    get_logger().info('RUNNING: %s', command)

    args = command.split()
    os.execv(args[0], args)
Пример #45
0
def _recover_saved_containers_from_path(saved_containers_path):
    try:
        saved_containers = _load_saved_containers_parameters_list(
            saved_containers_path)
        not_recovered = recover_saved_containers(saved_containers)
        if not_recovered:
            get_logger().error(
                'Following containers were not recovered: {}'.format(
                    not_recovered))
            return False
        else:
            return True
    except:
        traceback.print_exc()
        get_logger().error(
            'Unable to recover from {}.'.format(saved_containers_path))
    return False
Пример #46
0
    def _stop_service(self, container_id, force=False):
        if force:
            service_list = get_services_by_ship()
        else:
            service_list = get_local_services_from_kv_store()

        try:
            keys = fnmatch.filter(service_list, '*/{}'.format(container_id))
        except (IndexError, TypeError) as e:
            get_logger().exception(e)
            keys = []

        if not is_container_running(container_id):
            for key in keys:
                kv_remove(key)
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
        else:
            run_command_in_container('supervisorctl stop armada_agent',
                                     container_id)
            trigger_hook('pre-stop', container_id)

            docker_api = docker_client.api()
            last_exception = None
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
            for i in range(3):
                try:
                    docker_api.stop(container_id)
                except Exception as e:
                    get_logger().debug(e, exc_info=True)
                    last_exception = e
                if not is_container_running(container_id):
                    for key in keys:
                        kv_remove(key)
                    break
            if is_container_running(container_id):
                get_logger().error('Could not stop container: %s',
                                   container_id)
                raise last_exception
Пример #47
0
def _add_running_services_at_startup():
    wait_for_consul_ready()
    try:
        ship = get_ship_name()
        containers_saved_in_kv = kv.kv_list('ships/{}/service/'.format(ship))
        sleep(10)
        all_services = consul_query('agent/services')
        del all_services['consul']
        for service_id, service_dict in all_services.items():
            if ':' in service_id:
                continue
            if service_dict['Service'] == 'armada':
                continue
            key = 'ships/{}/service/{}/{}'.format(ship, service_dict['Service'], service_id)
            if not containers_saved_in_kv or key not in containers_saved_in_kv:
                kv.save_container(ship, service_id, 'started')
                get_logger().info('Added running service: {}'.format(service_id))
    except:
        get_logger().exception('Unable to add running services.')
Пример #48
0
 def on_get(self, req, resp, microservice_id):
     if not exists_service(microservice_id):
         resp.status = falcon.HTTP_404
         resp.json = {
             'error':
             'Could not find service "{microservice_id}"'.format(
                 **locals()),
             'error_id':
             'SERVICE_NOT_FOUND',
         }
         return
     try:
         container_id = microservice_id.split(':')[0]
         mapping = get_container_ports_mapping(container_id)
         resp.json = mapping
     except Exception as e:
         get_logger().exception(e)
         resp.json = {'error': 'Could not get ports: {}'.format(repr(e))}
         resp.status = falcon.HTTP_500
Пример #49
0
    def _login_to_dockyard(self, docker_api, dockyard_address, dockyard_user, dockyard_password):
        if dockyard_user and dockyard_password:
            logged_in = False
            # Workaround for abrupt changes in docker-py library.
            login_exceptions = []
            registry_endpoints = [
                'https://{0}/v1/'.format(dockyard_address),
                'https://{0}'.format(dockyard_address),
                dockyard_address
            ]
            for registry_endpoint in registry_endpoints:
                try:
                    docker_api.login(dockyard_user, dockyard_password, registry=registry_endpoint)
                    logged_in = True
                    break
                except Exception as e:
                    get_logger().debug(e)
                    login_exceptions.append(e)

            if not logged_in:
                raise login_exceptions[0]
def _recover_container(container_parameters):
    get_logger().info('Recovering: {}...\n'.format(json.dumps(container_parameters)))
    recovery_result = armada_api.post('run', container_parameters)
    if recovery_result.get('status') == 'ok':
        get_logger().info('Recovered container: {}'.format(json.dumps(recovery_result)))
        return True
    else:
        get_logger().error('Could not recover container: {}'.format(json.dumps(recovery_result)))
        return False
Пример #51
0
def recover_saved_containers(saved_containers):
    wait_for_consul_ready()
    running_containers = _get_local_running_containers()
    containers_to_be_recovered = _multiset_difference(saved_containers, running_containers)
    recovery_retry_count = 0
    while containers_to_be_recovered and recovery_retry_count < RECOVERY_RETRY_LIMIT:
        get_logger().info("Recovering containers: {}".format(json.dumps(containers_to_be_recovered)))
        containers_not_recovered = []
        counter_to_be_recovered = Counter(json.dumps(x, sort_keys=True) for x in containers_to_be_recovered)
        to_be_recovered = []
        for container_parameters in counter_to_be_recovered.elements():
            try:
                if to_be_recovered[-1][0] == container_parameters:
                    index = to_be_recovered[-1][1] + 1
                else:
                    index = 0
            except IndexError:
                index = 0
            to_be_recovered.append((container_parameters, index))
            name = json.loads(container_parameters)['microservice_name']
            kv.save_service(name, index, 'recovering', json.loads(container_parameters))

        for container_parameters, index in to_be_recovered:
            container_parameters = json.loads(container_parameters)
            name = container_parameters['microservice_name']
            if not _recover_container(container_parameters):
                containers_not_recovered.append(container_parameters)
                if recovery_retry_count == (RECOVERY_RETRY_LIMIT - 1):
                    kv.save_service(name, index, 'not-recovered', json.loads(container_parameters))
            else:
                kv.kv_remove('service/{}/{}'.format(name, index))
        sleep(DELAY_BETWEEN_RECOVER_RETRY_SECONDS)
        running_containers = _get_local_running_containers()
        containers_to_be_recovered = _multiset_difference(containers_not_recovered, running_containers)
        recovery_retry_count += 1

    return containers_to_be_recovered
Пример #52
0
    def _stop_service(self, container_id):
        ship = get_ship_name()
        service_list = kv_list("ships/{}/service/".format(ship))
        try:
            key = fnmatch.filter(service_list, "*/{}".format(container_id))[0]
        except (IndexError, TypeError):
            key = None

        if not is_container_running(container_id):
            if key:
                kv_remove(key)
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
        else:
            run_command_in_container("supervisorctl stop armada_agent", container_id)

            # TODO: Compatibility with old microservice images. Should be removed in future armada version.
            run_command_in_container("supervisorctl stop register_in_service_discovery", container_id)

            docker_api = docker_client.api()
            last_exception = None
            try:
                deregister_services(container_id)
            except Exception as e:
                get_logger().exception(e)
            for i in range(3):
                try:
                    docker_api.stop(container_id)
                except Exception as e:
                    get_logger().debug(e, exc_info=True)
                    last_exception = e
                if not is_container_running(container_id):
                    if key:
                        kv_remove(key)
                    break
            if is_container_running(container_id):
                get_logger().error("Could not stop container: %s", container_id)
                raise last_exception
Пример #53
0
def _clean_up_kv_store():
    global next_kv_clean_up_timestamp
    if time.time() < next_kv_clean_up_timestamp:
        return
    get_logger().info('Cleaning up kv-store:')
    next_kv_clean_up_timestamp = get_next_kv_clean_up_timestamp()

    services = armada_api.get_json('list')
    valid_container_ids = set(service.get('container_id') for service in services)

    start_timestamp_keys = kv.kv_list('start_timestamp/') or []
    for key in start_timestamp_keys:
        container_id = key.split('/')[-1]
        if container_id not in valid_container_ids:
            get_logger().info('Removing key: {}'.format(key))
            kv.kv_remove(key)

    single_active_instance_keys = kv.kv_list('single_active_instance/') or []
    for key in single_active_instance_keys:
        container_id = key.split('/')[-1].split(':')[0]
        if container_id not in valid_container_ids:
            get_logger().info('Removing key: {}'.format(key))
            kv.kv_remove(key)
    get_logger().info('Finished cleaning up kv-store.')
Пример #54
0
def _get_courier_addresses():
    courier_addresses = set()
    courier_is_running = False

    timeout_expiration = time.time() + 30
    last_exception = None
    while time.time() < timeout_expiration:
        time.sleep(1)
        try:
            courier_addresses = _consul_discover('courier')
            last_exception = None
            if courier_addresses:
                courier_is_running = True
                break
        except Exception as e:
            last_exception = e
    if last_exception is not None:
        get_logger().error('Could not determine if courier is running:')
        get_logger().exception(last_exception)
    elif not courier_is_running:
        get_logger().info('No running couriers found.')
    return courier_addresses
Пример #55
0
 def status_exception(self, message, exception):
     get_logger().exception(exception)
     error_msg = "API exception: {0}. {1} - {2}".format(message, type(exception).__name__, str(exception))
     web.header('Content-Type', 'application/json')
     return _create_response_with_error(error_msg)
Пример #56
0
 def status_error(self, message=None):
     get_logger().error('API error: %s', message)
     web.header('Content-Type', 'application/json')
     return _create_response_with_error(message)