def ovs_reset_bridges(): mconfig = get_mconfig_manager().load_service_mconfig( 'pipelined', mconfigs_pb2.PipelineD(), ) service_config = load_service_config('pipelined') if service_config.get('enable_nat', mconfig.nat_enabled): non_nat_sgi_interface = "" sgi_management_iface_ip_addr = "" sgi_management_iface_gw = "" else: non_nat_sgi_interface = service_config['nat_iface'] sgi_management_iface_ip_addr = service_config.get( 'sgi_management_iface_ip_addr', mconfig.sgi_management_iface_ip_addr, ) sgi_management_iface_gw = service_config.get( 'sgi_management_iface_gw', mconfig.sgi_management_iface_gw, ) sgi_bridge_name = service_config.get('uplink_bridge', "uplink_br0") reset_br = "magma-bridge-reset.sh -n %s %s %s %s" % \ ( sgi_bridge_name, non_nat_sgi_interface, sgi_management_iface_ip_addr, sgi_management_iface_gw, ) print("ovs-restart: ", reset_br) subprocess.call(reset_br.split())
def test_happy_path_feature_unspecified(self, new_manager_mock): manager_instance = Mock() manager_instance.load_service_mconfig.return_value = MagmaD() new_manager_mock.return_value = manager_instance actual = get_mconfig_manager() self.assertIsInstance(actual, MconfigManagerImpl) manager_instance.load_service_mconfig.assert_called_once_with('magmad')
def reload_mconfig(self): """ Reloads the managed config for the service """ try: # reload mconfig manager in case feature flag for streaming changed self._mconfig_manager = get_mconfig_manager() self._mconfig = self._mconfig_manager.load_service_mconfig( self._name, ) except LoadConfigError as e: logging.warning(e)
def __init__(self, name, empty_mconfig, loop=None): self._name = name self._port = 0 self._get_status_callback = None self._get_operational_states_cb = None self._log_count_handler = MsgCounterHandler() # Init logging before doing anything logging.basicConfig( level=logging.INFO, format='[%(asctime)s %(levelname)s %(name)s] %(message)s') # Add a handler to count errors logging.root.addHandler(self._log_count_handler) # Set gRPC polling strategy self._set_grpc_poll_strategy() # Load the managed config if present self._mconfig = empty_mconfig self._mconfig_metadata = None self._mconfig_manager = get_mconfig_manager() self.reload_mconfig() self._state = ServiceInfo.STARTING self._health = ServiceInfo.APP_UNHEALTHY if loop is None: loop = asyncio.get_event_loop() self._loop = loop self._start_time = int(time.time()) self._register_signal_handlers() # Load the service config if present self._config = None self.reload_config() # Count errors self.log_counter = ServiceLogErrorReporter( loop=self._loop, service_config=self._config, handler=self._log_count_handler, ) self.log_counter.start() # Operational States self._operational_states = [] self._version = '0.0.0' # Load the service version if available try: self._version = pkg_resources.get_distribution('orc8r').version except pkg_resources.ResolutionError as e: logging.info(e) self._server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_Service303Servicer_to_server(self, self._server)
def test_happy_path_feature_off(self, new_manager_mock): """ Test happy path with feature flag turned off explicitly """ manager_instance = Mock() manager_instance.load_service_mconfig.return_value = MagmaD( feature_flags={'kafka_config_streamer': False}, ) new_manager_mock.return_value = manager_instance actual = get_mconfig_manager() self.assertIsInstance(actual, MconfigManagerImpl) manager_instance.load_service_mconfig.assert_called_once_with('magmad')
def test_happy_path_feature_on(self, new_manager_mock): """ Test happy path with feature flag turned on """ manager_instance = Mock() manager_instance.load_service_mconfig.return_value = MagmaD( feature_flags={'kafka_config_streamer': True}, ) new_manager_mock.return_value = manager_instance actual = get_mconfig_manager() # We should have instantiated a StreamedMconfigManager, which in the # context of this test method will return the mock instance. self.assertIs(actual, manager_instance) manager_instance.load_service_mconfig.assert_called_once_with('magmad')
def reload_mconfig(self): """Reload the managed config for the service""" # reload mconfig manager in case feature flag for streaming changed self._mconfig_manager = get_mconfig_manager() try: self._mconfig_metadata = self._mconfig_manager.load_mconfig_metadata( ) self._mconfig = self._mconfig_manager.load_service_mconfig( self._name, self._mconfig, ) self._shared_mconfig = self._mconfig_manager.load_shared_mconfig( self._shared_mconfig) except LoadConfigError as e: logging.warning(e)
def __init__(self, name, loop=None): self._name = name self._port = 0 self._get_status_callback = None self._reload_config_callback = None # Init logging before doing anything logging.basicConfig( level=logging.INFO, format='[%(asctime)s %(levelname)s %(name)s] %(message)s') # Set gRPC polling strategy self._set_grpc_poll_strategy() # Load the managed config if present self._mconfig = None self._mconfig_manager = get_mconfig_manager() self.reload_mconfig() self._state = ServiceInfo.STARTING self._health = ServiceInfo.APP_UNHEALTHY if loop is None: loop = asyncio.get_event_loop() self._loop = loop self._start_time = int(time.time()) self._register_signal_handlers() # Load the service config if present self._config = None try: self._config = load_service_config(name) except LoadConfigError as e: logging.warning(e) self._setup_logging() self._version = '0.0.0' # Load the service version if available try: self._version = pkg_resources.get_distribution('orc8r').version except pkg_resources.ResolutionError as e: logging.info(e) self._server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_Service303Servicer_to_server(self, self._server)
def test_new_mconfig_load_error(self, new_manager_mock, old_manager_mock): """ Test feature flag on, but new mconfig manager errors out on load """ new_manager_instance = Mock() new_manager_instance.load_service_mconfig = Mock( side_effect=LoadConfigError('mock'), ) new_manager_mock.return_value = new_manager_instance old_manager_instance = Mock() old_manager_instance.load_service_mconfig.return_value = MagmaD( feature_flags={'kafka_config_streamer': True}, ) old_manager_mock.return_value = old_manager_instance actual = get_mconfig_manager() self.assertIs(actual, new_manager_instance) new_manager_instance.load_service_mconfig\ .assert_called_once_with('magmad') old_manager_instance.load_service_mconfig\ .assert_called_once_with('magmad')
def test_both_mconfigs_load_error(self, new_manager_mock, old_manager_mock): """ Test both mconfig managers erroring out on load """ new_manager_instance = Mock() new_manager_instance.load_service_mconfig = Mock( side_effect=LoadConfigError('mock'), ) new_manager_mock.return_value = new_manager_instance old_manager_instance = Mock() old_manager_instance.load_service_mconfig = Mock( side_effect=LoadConfigError('mock2'), ) old_manager_mock.return_value = old_manager_instance actual = get_mconfig_manager() self.assertIs(actual, old_manager_instance) new_manager_instance.load_service_mconfig \ .assert_called_once_with('magmad') old_manager_instance.load_service_mconfig \ .assert_called_once_with('magmad')
async def bootstrap_success_cb(certs_generated: bool): nonlocal first_time_bootstrap if first_time_bootstrap: if stream_client: stream_client.start() if sync_rpc_client: sync_rpc_client.start() first_time_bootstrap = False if certs_generated: svcs_to_restart = [] if 'control_proxy' in services: svcs_to_restart.append('control_proxy') # fluent-bit caches TLS client certs in memory, so we need to # restart it whenever the certs change fresh_mconfig = get_mconfig_manager().load_service_mconfig( 'magmad', mconfigs_pb2.MagmaD(), ) dynamic_svcs = fresh_mconfig.dynamic_services or [] if 'td-agent-bit' in dynamic_svcs: svcs_to_restart.append('td-agent-bit') await service_manager.restart_services(services=svcs_to_restart)
def main(): """ Main magmad function """ service = MagmaService('magmad', mconfigs_pb2.MagmaD()) # Optionally pipe errors to Sentry sentry_init(service_name=service.name) logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake()) # Create service manager services = service.config.get('magma_services') init_system = service.config.get('init_system', 'systemd') registered_dynamic_services = service.config.get( 'registered_dynamic_services', [], ) enabled_dynamic_services = [] if service.mconfig is not None: enabled_dynamic_services = service.mconfig.dynamic_services # Poll the services' Service303 interface service_poller = ServicePoller( service.loop, service.config, enabled_dynamic_services, ) service_poller.start() service_manager = ServiceManager( services, init_system, service_poller, registered_dynamic_services, enabled_dynamic_services, ) # Get metrics service config metrics_config = service.config.get('metricsd') metrics_services = metrics_config['services'] collect_interval = metrics_config['collect_interval'] sync_interval = metrics_config['sync_interval'] grpc_timeout = metrics_config['grpc_timeout'] grpc_msg_size = metrics_config.get('max_grpc_msg_size_mb', 4) metrics_post_processor_fn = metrics_config.get('post_processing_fn') metric_scrape_targets = [ ScrapeTarget(t['url'], t['name'], t['interval']) for t in metrics_config.get('metric_scrape_targets', []) ] # Create local metrics collector metrics_collector = MetricsCollector( services=metrics_services, collect_interval=collect_interval, sync_interval=sync_interval, grpc_timeout=grpc_timeout, grpc_max_msg_size_mb=grpc_msg_size, loop=service.loop, post_processing_fn=get_metrics_postprocessor_fn( metrics_post_processor_fn, ), scrape_targets=metric_scrape_targets, ) # Poll and sync the metrics collector loops metrics_collector.run() # Start a background thread to stream updates from the cloud stream_client = None if service.config.get('enable_config_streamer', False): stream_client = StreamerClient( { CONFIG_STREAM_NAME: ConfigManager( services, service_manager, service, MconfigManagerImpl(), ), }, service.loop, ) # Create sync rpc client with a heartbeat of 30 seconds (timeout = 60s) sync_rpc_client = None if service.config.get('enable_sync_rpc', False): sync_rpc_client = SyncRPCClient( service.loop, 30, service.config.get('print_grpc_payload', False), ) first_time_bootstrap = True # This is called when bootstrap succeeds and when _bootstrap_check is # invoked but bootstrap is not needed. If it's invoked right after certs # are generated, certs_generated is true, control_proxy will restart. async def bootstrap_success_cb(certs_generated: bool): nonlocal first_time_bootstrap if first_time_bootstrap: if stream_client: stream_client.start() if sync_rpc_client: sync_rpc_client.start() first_time_bootstrap = False if certs_generated: svcs_to_restart = [] if 'control_proxy' in services: svcs_to_restart.append('control_proxy') # fluent-bit caches TLS client certs in memory, so we need to # restart it whenever the certs change fresh_mconfig = get_mconfig_manager().load_service_mconfig( 'magmad', mconfigs_pb2.MagmaD(), ) dynamic_svcs = fresh_mconfig.dynamic_services or [] if 'td-agent-bit' in dynamic_svcs: svcs_to_restart.append('td-agent-bit') await service_manager.restart_services(services=svcs_to_restart) # Create bootstrap manager bootstrap_manager = BootstrapManager(service, bootstrap_success_cb) # Initialize kernel version poller if it is enabled kernel_version_poller = None if service.config.get('enable_kernel_version_checking', False): kernel_version_poller = KernelVersionsPoller(service) kernel_version_poller.start() # gateway status generator to bundle various information about this # gateway into an object. gateway_status_factory = GatewayStatusFactory( service=service, service_poller=service_poller, kernel_version_poller=kernel_version_poller, ) # _grpc_client_manager to manage grpc client recycling grpc_client_manager = GRPCClientManager( service_name="state", service_stub=StateServiceStub, max_client_reuse=60, ) # Initialize StateReporter state_reporter = StateReporter( config=service.config, mconfig=service.mconfig, loop=service.loop, bootstrap_manager=bootstrap_manager, gw_status_factory=gateway_status_factory, grpc_client_manager=grpc_client_manager, ) # Initialize ServiceHealthWatchdog service_health_watchdog = ServiceHealthWatchdog( config=service.config, loop=service.loop, service_poller=service_poller, service_manager=service_manager, ) # Start _bootstrap_manager bootstrap_manager.start_bootstrap_manager() # Start all services when magmad comes up service.loop.create_task(service_manager.start_services()) # Start state reporting loop state_reporter.start() # Start service timeout health check loop service_health_watchdog.start() # Start upgrade manager loop if service.config.get('enable_upgrade_manager', False): upgrader = _get_upgrader_impl(service) service.loop.create_task(start_upgrade_loop(service, upgrader)) # Start network health metric collection if service.config.get('enable_network_monitor', False): service.loop.create_task(metrics_collection_loop(service.config)) # Create generic command executor command_executor = None if service.config.get('generic_command_config', None): command_executor = get_command_executor_impl(service) # Start loop to monitor unattended upgrade status service.loop.create_task(monitor_unattended_upgrade_status()) # Add all servicers to the server magmad_servicer = MagmadRpcServicer( service, services, service_manager, get_mconfig_manager(), command_executor, service.loop, service.config.get('print_grpc_payload', False), ) magmad_servicer.add_to_server(service.rpc_server) if SDWatchdog.has_notify(): # Create systemd watchdog sdwatchdog = SDWatchdog( tasks=[bootstrap_manager, state_reporter], update_status=True, ) # Start watchdog loop service.loop.create_task(sdwatchdog.run()) # Run the service loop service.run() # Cleanup the service service.close()
def __init__(self, name, empty_mconfig, loop=None): self._name = name self._port = 0 self._get_status_callback = None self._get_operational_states_cb = None self._log_count_handler = MsgCounterHandler() # Init logging before doing anything logging.basicConfig( level=logging.INFO, format='[%(asctime)s %(levelname)s %(name)s] %(message)s') # Add a handler to count errors logging.root.addHandler(self._log_count_handler) # Set gRPC polling strategy self._set_grpc_poll_strategy() # Load the managed config if present self._mconfig = empty_mconfig self._mconfig_metadata = None self._mconfig_manager = get_mconfig_manager() self.reload_mconfig() self._state = ServiceInfo.STARTING self._health = ServiceInfo.APP_UNHEALTHY if loop is None: loop = asyncio.get_event_loop() self._loop = loop self._start_time = int(time.time()) self._register_signal_handlers() # Load the service config if present self._config = None self.reload_config() # Count errors self.log_counter = ServiceLogErrorReporter( loop=self._loop, service_config=self._config, handler=self._log_count_handler, ) self.log_counter.start() # Operational States self._operational_states = [] self._version = '0.0.0' # Load the service version if available try: # Check if service on docker if self._config and 'init_system' in self._config \ and self._config['init_system'] == 'docker': # image comes in form of "feg_gateway_python:<IMAGE_TAG>\n" # Skip the "feg_gateway_python:" part image = os.popen( 'docker ps --filter name=magmad --format "{{.Image}}" | ' 'cut -d ":" -f 2') image_tag = image.read().strip('\n') self._version = image_tag else: self._version = pkg_resources.get_distribution('orc8r').version except pkg_resources.ResolutionError as e: logging.info(e) self._server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_Service303Servicer_to_server(self, self._server)
def main(): """ Main magmad function """ service = MagmaService('magmad') logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake()) # Create service manager services = service.config['magma_services'] init_system = service.config.get('init_system', 'systemd') registered_dynamic_services = service.config.get( 'registered_dynamic_services', []) enabled_dynamic_services = [] if service.mconfig is not None: enabled_dynamic_services = service.mconfig.dynamic_services # Poll the services' Service303 interface service_poller = ServicePoller(service.loop, service.config) service_poller.start() service_manager = ServiceManager(services, init_system, service_poller, registered_dynamic_services, enabled_dynamic_services) # Start a background thread to stream updates from the cloud stream_client = None if service.config.get('enable_config_streamer', False): stream_client = StreamerClient( { CONFIG_STREAM_NAME: ConfigManager( services, service_manager, service, MconfigManagerImpl(), ), MCONFIG_VIEW_STREAM_NAME: StreamingMconfigCallback( services, service_manager, service, StreamedMconfigManager(), ) }, service.loop, ) # Schedule periodic checkins checkin_manager = CheckinManager(service, service_poller) # Create sync rpc client with a timeout of 60 seconds sync_rpc_client = None if service.config.get('enable_sync_rpc', False): sync_rpc_client = SyncRPCClient(service.loop, 60) first_time_bootstrap = True # This is called when bootstrap succeeds and when _bootstrap_check is # invoked but bootstrap is not needed. If it's invoked right after certs # are generated, certs_generated is true, control_proxy will restart. def bootstrap_success_cb(certs_generated): nonlocal first_time_bootstrap if first_time_bootstrap: if stream_client: stream_client.start() checkin_manager.try_checkin() if sync_rpc_client: sync_rpc_client.start() first_time_bootstrap = False if certs_generated and 'control_proxy' in services: service.loop.create_task( service_manager.restart_services(services=['control_proxy'])) # Create bootstrap manager bootstrap_manager = BootstrapManager(service, bootstrap_success_cb) def checkin_failure_cb(err_code): bootstrap_manager.on_checkin_fail(err_code) checkin_manager.set_failure_cb(checkin_failure_cb) # Start bootstrap_manager after checkin_manager's callback is set bootstrap_manager.start_bootstrap_manager() # Start all services when magmad comes up service.loop.create_task(service_manager.start_services()) # Start upgrade manager loop if service.config.get('enable_upgrade_manager', False): upgrader = _get_upgrader_impl(service) service.loop.create_task(start_upgrade_loop(service, upgrader)) # Start network health metric collection if service.config.get('enable_network_monitor', False): service.loop.create_task(metrics_collection_loop(service.config)) if service.config.get('enable_systemd_tailer', False): service.loop.create_task(start_systemd_tailer(service.config)) # Start loop to monitor unattended upgrade status service.loop.create_task(monitor_unattended_upgrade_status(service.loop)) # Add all servicers to the server magmad_servicer = MagmadRpcServicer( service, services, service_manager, get_mconfig_manager(), service.loop, ) magmad_servicer.add_to_server(service.rpc_server) if SDWatchdog.has_notify(): # Create systemd watchdog sdwatchdog = SDWatchdog(tasks=[bootstrap_manager, checkin_manager], update_status=True) # Start watchdog loop service.loop.create_task(sdwatchdog.run()) # Run the service loop service.run() # Cleanup the service service.close()
def main(): """ Main magmad function """ service = MagmaService('magmad', mconfigs_pb2.MagmaD()) logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake()) # Create service manager services = service.config['magma_services'] init_system = service.config.get('init_system', 'systemd') registered_dynamic_services = service.config.get( 'registered_dynamic_services', []) enabled_dynamic_services = [] if service.mconfig is not None: enabled_dynamic_services = service.mconfig.dynamic_services # Poll the services' Service303 interface service_poller = ServicePoller(service.loop, service.config) service_poller.start() service_manager = ServiceManager(services, init_system, service_poller, registered_dynamic_services, enabled_dynamic_services) # Get metrics service config metrics_config = service.config['metricsd'] metrics_services = metrics_config['services'] collect_interval = metrics_config['collect_interval'] sync_interval = metrics_config['sync_interval'] grpc_timeout = metrics_config['grpc_timeout'] queue_length = metrics_config['queue_length'] metrics_post_processor_fn = metrics_config.get('post_processing_fn') # Create local metrics collector metrics_collector = MetricsCollector( metrics_services, collect_interval, sync_interval, grpc_timeout, queue_length, service.loop, get_metrics_postprocessor_fn(metrics_post_processor_fn), ) # Poll and sync the metrics collector loops metrics_collector.run() # Start a background thread to stream updates from the cloud stream_client = None if service.config.get('enable_config_streamer', False): stream_client = StreamerClient( { CONFIG_STREAM_NAME: ConfigManager( services, service_manager, service, MconfigManagerImpl(), ), }, service.loop, ) # Schedule periodic checkins checkin_manager = CheckinManager(service, service_poller) # Create sync rpc client with a heartbeat of 30 seconds (timeout = 60s) sync_rpc_client = None if service.config.get('enable_sync_rpc', False): sync_rpc_client = SyncRPCClient(service.loop, 30) first_time_bootstrap = True # This is called when bootstrap succeeds and when _bootstrap_check is # invoked but bootstrap is not needed. If it's invoked right after certs # are generated, certs_generated is true, control_proxy will restart. async def bootstrap_success_cb(certs_generated): nonlocal first_time_bootstrap if first_time_bootstrap: if stream_client: stream_client.start() await checkin_manager.try_checkin() if sync_rpc_client: sync_rpc_client.start() first_time_bootstrap = False if certs_generated and 'control_proxy' in services: service.loop.create_task( service_manager.restart_services(services=['control_proxy'])) # Create bootstrap manager bootstrap_manager = BootstrapManager(service, bootstrap_success_cb) async def checkin_failure_cb(err_code): await bootstrap_manager.on_checkin_fail(err_code) checkin_manager.set_failure_cb(checkin_failure_cb) # Start bootstrap_manager after checkin_manager's callback is set bootstrap_manager.start_bootstrap_manager() # Schedule periodic state reporting state_manager = StateReporter(service, checkin_manager) state_manager.start() # Start all services when magmad comes up service.loop.create_task(service_manager.start_services()) # Start upgrade manager loop if service.config.get('enable_upgrade_manager', False): upgrader = _get_upgrader_impl(service) service.loop.create_task(start_upgrade_loop(service, upgrader)) # Start network health metric collection if service.config.get('enable_network_monitor', False): service.loop.create_task(metrics_collection_loop(service.config)) if service.config.get('enable_systemd_tailer', False): service.loop.create_task(start_systemd_tailer(service.config)) # Create generic command executor command_executor = None if service.config.get('generic_command_config', None): command_executor = get_command_executor_impl(service) # Start loop to monitor unattended upgrade status service.loop.create_task(monitor_unattended_upgrade_status(service.loop)) # Add all servicers to the server magmad_servicer = MagmadRpcServicer( service, services, service_manager, get_mconfig_manager(), command_executor, service.loop, ) magmad_servicer.add_to_server(service.rpc_server) if SDWatchdog.has_notify(): # Create systemd watchdog sdwatchdog = SDWatchdog(tasks=[bootstrap_manager, checkin_manager], update_status=True) # Start watchdog loop service.loop.create_task(sdwatchdog.run()) # Run the service loop service.run() # Cleanup the service service.close()