def start(self): """Start the machine agent. Creates state directories on the machine, retrieves the machine state, and enables watch on assigned units. """ if not os.path.exists(self.units_directory): os.makedirs(self.units_directory) if not os.path.exists(self.unit_state_directory): os.makedirs(self.unit_state_directory) # Get state managers we'll be utilizing. self.service_state_manager = ServiceStateManager(self.client) self.unit_deployer = UnitDeployer( self.client, self.get_machine_id(), self.config["juju_directory"]) yield self.unit_deployer.start() # Retrieve the machine state for the machine we represent. machine_manager = MachineStateManager(self.client) self.machine_state = yield machine_manager.get_machine_state( self.get_machine_id()) # Watch assigned units for the machine. if self.get_watch_enabled(): self.machine_state.watch_assigned_units( self.watch_service_units) # Connect the machine agent, broadcasting presence to the world. yield self.machine_state.connect_agent() log.info("Machine agent started id:%s" % self.get_machine_id())
def get_ip_address_for_machine(client, provider, machine_id): """Returns public DNS name and machine state for the machine id. :param client: a connected zookeeper client. :param provider: the `MachineProvider` in charge of the juju. :param machine_id: machine ID of the desired machine to connect to. :return: tuple of the DNS name and a `MachineState`. """ manager = MachineStateManager(client) machine_state = yield manager.get_machine_state(machine_id) instance_id = yield machine_state.get_instance_id() provider_machine = yield provider.get_machine(instance_id) returnValue((provider_machine.dns_name, machine_state))
def test_initialize(self): yield self.layout.initialize() yield self.assert_existence_and_acl("/charms") yield self.assert_existence_and_acl("/services") yield self.assert_existence_and_acl("/units") yield self.assert_existence_and_acl("/machines") yield self.assert_existence_and_acl("/relations") yield self.assert_existence_and_acl("/initialized") # To check that the constraints landed correctly, we need the # environment config to have been sent, or we won't be able to # get a provider to help us construct the appropriate objects. yield self.push_default_config(with_constraints=False) esm = EnvironmentStateManager(self.client) env_constraints = yield esm.get_constraints() self.assertEquals(env_constraints, { "provider-type": "dummy", "ubuntu-series": None, "arch": "arm", "cpu": None, "mem": 512}) machine_state_manager = MachineStateManager(self.client) machine_state = yield machine_state_manager.get_machine_state(0) machine_constraints = yield machine_state.get_constraints() self.assertTrue(machine_constraints.complete) self.assertEquals(machine_constraints, { "provider-type": "dummy", "ubuntu-series": "cranky", "arch": "arm", "cpu": None, "mem": 512}) instance_id = yield machine_state.get_instance_id() self.assertEqual(instance_id, "i-abcdef") settings_manager = GlobalSettingsStateManager(self.client) self.assertEqual((yield settings_manager.get_provider_type()), "dummy") self.assertEqual( self.log.getvalue().strip(), "Initializing zookeeper hierarchy")
def test_initialize(self): yield self.layout.initialize() yield self.assert_existence_and_acl("/charms") yield self.assert_existence_and_acl("/services") yield self.assert_existence_and_acl("/units") yield self.assert_existence_and_acl("/machines") yield self.assert_existence_and_acl("/relations") yield self.assert_existence_and_acl("/initialized") machine_state_manager = MachineStateManager(self.client) machine_state = yield machine_state_manager.get_machine_state(0) self.assertTrue(machine_state) instance_id = yield machine_state.get_instance_id() self.assertEqual(instance_id, "i-abcdef") settings_manager = GlobalSettingsStateManager(self.client) self.assertEqual((yield settings_manager.get_provider_type()), "dummy") self.assertEqual(self.log.getvalue().strip(), "Initializing zookeeper hierarchy")
def test_initialize(self): yield self.layout.initialize() yield self.assert_existence_and_acl("/charms") yield self.assert_existence_and_acl("/services") yield self.assert_existence_and_acl("/units") yield self.assert_existence_and_acl("/machines") yield self.assert_existence_and_acl("/relations") yield self.assert_existence_and_acl("/initialized") machine_state_manager = MachineStateManager(self.client) machine_state = yield machine_state_manager.get_machine_state(0) self.assertTrue(machine_state) instance_id = yield machine_state.get_instance_id() self.assertEqual(instance_id, "i-abcdef") settings_manager = GlobalSettingsStateManager(self.client) self.assertEqual((yield settings_manager.get_provider_type()), "dummy") self.assertEqual( self.log.getvalue().strip(), "Initializing zookeeper hierarchy")
def start(self): """Start the machine agent. Creates state directories on the machine, retrieves the machine state, and enables watch on assigned units. """ # Initialize directory paths. if not os.path.exists(self.charms_directory): os.makedirs(self.charms_directory) if not os.path.exists(self.units_directory): os.makedirs(self.units_directory) if not os.path.exists(self.unit_state_directory): os.makedirs(self.unit_state_directory) # Get state managers we'll be utilizing. self.service_state_manager = ServiceStateManager(self.client) self.charm_state_manager = CharmStateManager(self.client) # Retrieve the machine state for the machine we represent. machine_manager = MachineStateManager(self.client) self.machine_state = yield machine_manager.get_machine_state( self.get_machine_id()) # Watch assigned units for the machine. if self.get_watch_enabled(): self.machine_state.watch_assigned_units( self.watch_service_units) # Find out what provided the machine, and how to deploy units. settings = GlobalSettingsStateManager(self.client) self.provider_type = yield settings.get_provider_type() self.deploy_factory = get_deploy_factory(self.provider_type) # Connect the machine agent, broadcasting presence to the world. yield self.machine_state.connect_agent() log.info("Machine agent started id:%s deploy:%r provider:%r" % ( self.get_machine_id(), self.deploy_factory, self.provider_type))
def start(self): """Start the machine agent. Creates state directories on the machine, retrieves the machine state, and enables watch on assigned units. """ # Initialize directory paths. if not os.path.exists(self.charms_directory): os.makedirs(self.charms_directory) if not os.path.exists(self.units_directory): os.makedirs(self.units_directory) if not os.path.exists(self.unit_state_directory): os.makedirs(self.unit_state_directory) # Get state managers we'll be utilizing. self.service_state_manager = ServiceStateManager(self.client) self.charm_state_manager = CharmStateManager(self.client) # Retrieve the machine state for the machine we represent. machine_manager = MachineStateManager(self.client) self.machine_state = yield machine_manager.get_machine_state( self.get_machine_id()) # Watch assigned units for the machine. if self.get_watch_enabled(): self.machine_state.watch_assigned_units(self.watch_service_units) # Find out what provided the machine, and how to deploy units. settings = GlobalSettingsStateManager(self.client) self.provider_type = yield settings.get_provider_type() self.deploy_factory = get_deploy_factory(self.provider_type) # Connect the machine agent, broadcasting presence to the world. yield self.machine_state.connect_agent() log.info( "Machine agent started id:%s deploy:%r provider:%r" % (self.get_machine_id(), self.deploy_factory, self.provider_type))
def constraints_get(env_config, environment, entity_names, log): """ Show the complete set of applicable constraints for each specified entity. This will show the final computed values of all constraints (including internal constraints which cannot be set directly via set-constraints). """ provider = environment.get_machine_provider() client = yield provider.connect() result = {} try: yield sync_environment_state(client, env_config, environment.name) if entity_names: msm = MachineStateManager(client) ssm = ServiceStateManager(client) for name in entity_names: if name.isdigit(): kind = "machine" entity = yield msm.get_machine_state(name) elif "/" in name: kind = "service unit" entity = yield ssm.get_unit_state(name) else: kind = "service" entity = yield ssm.get_service_state(name) log.info("Fetching constraints for %s %s", kind, name) constraints = yield entity.get_constraints() result[name] = dict(constraints) else: esm = EnvironmentStateManager(client) log.info("Fetching constraints for environment") constraints = yield esm.get_constraints() result = dict(constraints) yaml.safe_dump(result, sys.stdout) finally: yield client.close()
class ProvisioningAgent(BaseAgent): name = "juju-provisoning-agent" _current_machines = () # time in seconds machine_check_period = 60 def get_agent_name(self): return "provision:%s" % (self.environment.type) @inlineCallbacks def start(self): self._running = True self.environment = yield self.configure_environment() self.provider = self.environment.get_machine_provider() self.machine_state_manager = MachineStateManager(self.client) self.service_state_manager = ServiceStateManager(self.client) self.firewall_manager = FirewallManager( self.client, self.is_running, self.provider) if self.get_watch_enabled(): self.machine_state_manager.watch_machine_states( self.watch_machine_changes) self.service_state_manager.watch_service_states( self.firewall_manager.watch_service_changes) from twisted.internet import reactor reactor.callLater( self.machine_check_period, self.periodic_machine_check) log.info("Started provisioning agent") else: log.info("Started provisioning agent without watches enabled") def stop(self): log.info("Stopping provisioning agent") self._running = False return succeed(True) def is_running(self): """Whether this agent is running or not.""" return self._running @inlineCallbacks def configure_environment(self): """The provisioning agent configure its environment on start or change. The environment contains the configuration th agent needs to interact with its machine provider, in order to do its work. This configuration data is deployed lazily over an encrypted connection upon first usage. The agent waits for this data to exist before completing its startup. """ try: get_d, watch_d = self.client.get_and_watch("/environment") environment_data, stat = yield get_d watch_d.addCallback(self._on_environment_changed) except NoNodeException: # Wait till the environment node appears. play twisted gymnastics exists_d, watch_d = self.client.exists_and_watch("/environment") stat = yield exists_d if stat: environment = yield self.configure_environment() else: watch_d.addCallback( lambda result: self.configure_environment()) if not stat: environment = yield watch_d returnValue(environment) config = EnvironmentsConfig() config.parse(environment_data) returnValue(config.get_default()) @inlineCallbacks def _on_environment_changed(self, event): """Reload the environment if its data changes.""" if event.type_name == "deleted": return self.environment = yield self.configure_environment() self.provider = self.environment.get_machine_provider() def periodic_machine_check(self): """A periodic checking of machine states and provider machines. In addition to the on demand changes to zookeeper states that are monitored by L{watch_machine_changes}, the periodic machine check performs non zookeeper state related verification by periodically checking the last current provider machine states against the last known zookeeper state. Primarily this helps in recovering from transient error conditions which may have prevent processing of an individual machine state, as well as verifying the current state of the provider's running machines against the zk state, thus pruning unused resources. """ from twisted.internet import reactor d = self.process_machines(self._current_machines) d.addBoth( lambda result: reactor.callLater( self.machine_check_period, self.periodic_machine_check)) return d @inlineCallbacks def watch_machine_changes(self, old_machines, new_machines): """Watches and processes machine state changes. This function is used to subscribe to topology changes, and specifically changes to machines within the topology. It performs work against the machine provider to ensure that the currently running state of the juju cluster corresponds to the topology via creation and deletion of machines. The subscription utilized is a permanent one, meaning that this function will automatically be rescheduled to run whenever a topology state change happens that involves machines. This functional also caches the current set of machines as an agent instance attribute. @param old_machines machine ids as existed in the previous topology. @param new_machines machine ids as exist in the current topology. """ if not self._running: raise StopWatcher() log.debug("Machines changed old:%s new:%s", old_machines, new_machines) self._current_machines = new_machines try: yield self.process_machines(self._current_machines) except Exception: # Log and effectively retry later in periodic_machine_check log.exception( "Got unexpected exception in processing machines," " will retry") @concurrent_execution_guard("_processing_machines") @inlineCallbacks def process_machines(self, current_machines): """Ensure the currently running machines correspond to state. At the end of each process_machines execution, verify that all running machines within the provider correspond to machine_ids within the topology. If they don't then shut them down. Utilizes concurrent execution guard, to ensure that this is only being executed at most once per process. """ # XXX this is obviously broken, but the margins of 80 columns prevent # me from describing. hint think concurrent agents, and use a lock. # map of instance_id -> machine try: provider_machines = yield self.provider.get_machines() except ProviderError: log.exception("Cannot get machine list") return provider_machines = dict( [(m.instance_id, m) for m in provider_machines]) instance_ids = [] for machine_state_id in current_machines: try: instance_id = yield self.process_machine( machine_state_id, provider_machines) except (MachineStateNotFound, ProviderError): log.exception("Cannot process machine %s", machine_state_id) continue instance_ids.append(instance_id) # Terminate all unused juju machines running within the cluster. unused = set(provider_machines.keys()) - set(instance_ids) for instance_id in unused: log.info("Shutting down machine id:%s ...", instance_id) machine = provider_machines[instance_id] try: yield self.provider.shutdown_machine(machine) except ProviderError: log.exception("Cannot shutdown machine %s", instance_id) continue @inlineCallbacks def process_machine(self, machine_state_id, provider_machine_map): """Ensure a provider machine for a machine state id. For each machine_id in new machines which represents the current state of the topology: * Check to ensure its state reflects that it has been launched. If it hasn't then create the machine and update the state. * Watch the machine's assigned services so that changes can be applied to the firewall for service exposing support. """ # fetch the machine state machine_state = yield self.machine_state_manager.get_machine_state( machine_state_id) instance_id = yield machine_state.get_instance_id() # Verify a machine id has state and is running, else launch it. if instance_id is None or not instance_id in provider_machine_map: log.info("Starting machine id:%s ...", machine_state.id) constraints = yield machine_state.get_constraints() machines = yield self.provider.start_machine( {"machine-id": machine_state.id, "constraints": constraints}) instance_id = machines[0].instance_id yield machine_state.set_instance_id(instance_id) # The firewall manager also needs to be checked for any # outstanding retries on this machine yield self.firewall_manager.process_machine(machine_state) returnValue(instance_id)
class MachineStateManagerTest(StateTestBase): @inlineCallbacks def setUp(self): yield super(MachineStateManagerTest, self).setUp() self.charm_state_manager = CharmStateManager(self.client) self.machine_state_manager = MachineStateManager(self.client) self.service_state_manager = ServiceStateManager(self.client) self.charm_state = yield self.charm_state_manager.add_charm_state( local_charm_id(self.charm), self.charm, "") @inlineCallbacks def add_service(self, service_name): service_state = yield self.service_state_manager.add_service_state( service_name, self.charm_state) returnValue(service_state) @inlineCallbacks def test_add_machine(self): """ Adding a machine state should register it in zookeeper. """ machine_state1 = yield self.machine_state_manager.add_machine_state() machine_state2 = yield self.machine_state_manager.add_machine_state() self.assertEquals(machine_state1.id, 0) self.assertEquals(machine_state1.internal_id, "machine-0000000000") self.assertEquals(machine_state2.id, 1) self.assertEquals(machine_state2.internal_id, "machine-0000000001") children = yield self.client.get_children("/machines") self.assertEquals(sorted(children), ["machine-0000000000", "machine-0000000001"]) topology = yield self.get_topology() self.assertTrue(topology.has_machine("machine-0000000000")) self.assertTrue(topology.has_machine("machine-0000000001")) @inlineCallbacks def test_machine_str_representation(self): """The str(machine) value includes the machine id. """ machine_state1 = yield self.machine_state_manager.add_machine_state() self.assertEqual( str(machine_state1), "<MachineState id:machine-%010d>" % (0)) @inlineCallbacks def test_remove_machine(self): """ Adding a machine state should register it in zookeeper. """ machine_state1 = yield self.machine_state_manager.add_machine_state() yield self.machine_state_manager.add_machine_state() removed = yield self.machine_state_manager.remove_machine_state( machine_state1.id) self.assertTrue(removed) children = yield self.client.get_children("/machines") self.assertEquals(sorted(children), ["machine-0000000001"]) topology = yield self.get_topology() self.assertFalse(topology.has_machine("machine-0000000000")) self.assertTrue(topology.has_machine("machine-0000000001")) # Removing a non-existing machine again won't fail, since the end # intention is preserved. This makes dealing with concurrency easier. # However, False will be returned in this case. removed = yield self.machine_state_manager.remove_machine_state( machine_state1.id) self.assertFalse(removed) @inlineCallbacks def test_remove_machine_with_agent(self): """Removing a machine with a connected machine agent should succeed. The removal signals intent to remove a working machine (with an agent) with the provisioning agent to remove it subsequently. """ # Add two machines. machine_state1 = yield self.machine_state_manager.add_machine_state() yield self.machine_state_manager.add_machine_state() # Connect an agent yield machine_state1.connect_agent() # Remove a machine removed = yield self.machine_state_manager.remove_machine_state( machine_state1.id) self.assertTrue(removed) # Verify the second one is still present children = yield self.client.get_children("/machines") self.assertEquals(sorted(children), ["machine-0000000001"]) # Verify the topology state. topology = yield self.get_topology() self.assertFalse(topology.has_machine("machine-0000000000")) self.assertTrue(topology.has_machine("machine-0000000001")) @inlineCallbacks def test_get_machine_and_check_attributes(self): """ Getting a machine state should be possible using both the user-oriented id and the internal id. """ yield self.machine_state_manager.add_machine_state() yield self.machine_state_manager.add_machine_state() machine_state = yield self.machine_state_manager.get_machine_state(0) self.assertEquals(machine_state.id, 0) machine_state = yield self.machine_state_manager.get_machine_state("0") self.assertEquals(machine_state.id, 0) yield self.assertFailure( self.machine_state_manager.get_machine_state("a"), MachineStateNotFound) @inlineCallbacks def test_get_machine_not_found(self): """ Getting a machine state which is not available should errback a meaningful error. """ # No state whatsoever. try: yield self.machine_state_manager.get_machine_state(0) except MachineStateNotFound, e: self.assertEquals(e.machine_id, 0) else:
class FirewallManager(object): """Manages the opening and closing of ports in the firewall. """ def __init__(self, client, is_running, provider): """Initialize a Firewall Manager. :param client: A connected zookeeper client. :param is_running: A function (usually a bound method) that returns whether the associated agent is still running or not. :param provider: A machine provider, used for making the actual changes in the environment to firewall settings. """ self.machine_state_manager = MachineStateManager(client) self.service_state_manager = ServiceStateManager(client) self.is_running = is_running self.provider = provider # Track all currently watched machines, using machine ID. self._watched_machines = set() # Map service name to either NotExposed or set of exposed unit names. # If a service name is present in the dictionary, it means its # respective expose node is being watched. self._watched_services = {} # Machines to retry open_close_ports because of earlier errors self._retry_machines_on_port_error = set() # Registration of observers for corresponding actions self._open_close_ports_observers = set() self._open_close_ports_on_machine_observers = set() @inlineCallbacks def process_machine(self, machine_state): """Ensures watch is setup per machine and performs any necessary retry. :param machine_state: The machine state of the machine to be checked. The watch that is established, via :method:`juju.state.machine.MachineState.watch_assigned_units`, handles the scenario where a service or service unit is removed from the topology. Because the service unit is no longer in the topology, the corresponding watch terminates and is unable to `open_close_ports` in response to the change. However, the specific machine watch will be called in this case, and that suffices to determine that its port policy should be checked. In addition, this method can rely on the fact that the provisioning agent periodically rechecks machines so as to support retries of security group operations that failed for that provider. This method is called by the corresponding :method:`juju.agents.provision.ProvisioningAgent.process_machine` in the provisioning agent. """ if machine_state.id in self._retry_machines_on_port_error: self._retry_machines_on_port_error.remove(machine_state.id) try: yield self.open_close_ports_on_machine(machine_state.id) except StopWatcher: # open_close_ports_on_machine can also be called from # a watch, so simply ignore this since it's just used # to shutdown a watch in the case of agent shutdown pass def cb_watch_assigned_units(old_units, new_units): """Watch assigned units for changes possibly require port mgmt. """ log.debug("Assigned units for machine %r: old=%r, new=%r", machine_state.id, old_units, new_units) return self.open_close_ports_on_machine(machine_state.id) if machine_state.id not in self._watched_machines: self._watched_machines.add(machine_state.id) yield machine_state.watch_assigned_units(cb_watch_assigned_units) @inlineCallbacks def watch_service_changes(self, old_services, new_services): """Manage watching service exposed status. This method is called upon every change to the set of services currently deployed. All services are then watched for changes to their exposed flag setting. :param old_services: the set of services before this change. :param new_services: the current set of services. """ removed_services = old_services - new_services for service_name in removed_services: self._watched_services.pop(service_name, None) for service_name in new_services: yield self._setup_new_service_watch(service_name) @inlineCallbacks def _setup_new_service_watch(self, service_name): """Sets up the watching of the exposed flag for a new service. If `service_name` is not watched (as known by `self._watched_services`), adds the watch and a corresponding entry in self._watched_services. (This dict is necessary because there is currently no way to introspect a service for whether it is watched or not.) """ if service_name in self._watched_services: return # already watched self._watched_services[service_name] = NotExposed try: service_state = yield self.service_state_manager.get_service_state( service_name) except ServiceStateNotFound: log.debug("Cannot setup watch, since service %r no longer exists", service_name) self._watched_services.pop(service_name, None) return @inlineCallbacks def cb_watch_service_exposed_flag(exposed): if not self.is_running(): raise StopWatcher() if exposed: log.debug("Service %r is exposed", service_name) else: log.debug("Service %r is unexposed", service_name) try: unit_states = yield service_state.get_all_unit_states() except StateChanged: log.debug("Stopping watch on %r, no longer in topology", service_name) raise StopWatcher() for unit_state in unit_states: yield self.open_close_ports(unit_state) if not exposed: log.debug("Service %r is unexposed", service_name) self._watched_services[service_name] = NotExposed else: log.debug("Service %r is exposed", service_name) self._watched_services[service_name] = set() yield self._setup_service_unit_watch(service_state) yield service_state.watch_exposed_flag(cb_watch_service_exposed_flag) log.debug("Started watch of %r on changes to being exposed", service_name) @inlineCallbacks def _setup_service_unit_watch(self, service_state): """Setup watches on service units of newly exposed `service_name`.""" @inlineCallbacks def cb_check_service_units(old_service_units, new_service_units): watched_units = self._watched_services.get( service_state.service_name, NotExposed) if not self.is_running() or watched_units is NotExposed: raise StopWatcher() removed_service_units = old_service_units - new_service_units for unit_name in removed_service_units: watched_units.discard(unit_name) if not self.is_running(): raise StopWatcher() try: unit_state = yield service_state.get_unit_state(unit_name) except (ServiceUnitStateNotFound, StateChanged): log.debug("Not setting up watch on %r, not in topology", unit_name) continue yield self.open_close_ports(unit_state) for unit_name in new_service_units: if unit_name not in watched_units: watched_units.add(unit_name) yield self._setup_watch_ports(service_state, unit_name) yield service_state.watch_service_unit_states(cb_check_service_units) log.debug("Started watch of service units for exposed service %r", service_state.service_name) @inlineCallbacks def _setup_watch_ports(self, service_state, unit_name): """Setup the watching of ports for `unit_name`.""" try: unit_state = yield service_state.get_unit_state(unit_name) except (ServiceUnitStateNotFound, StateChanged): log.debug("Cannot setup watch on %r (no longer exists), ignoring", unit_name) return @inlineCallbacks def cb_watch_ports(value): """Permanently watch ports until service is no longer exposed.""" watched_units = self._watched_services.get( service_state.service_name, NotExposed) if (not self.is_running() or watched_units is NotExposed or unit_name not in watched_units): log.debug("Stopping ports watch for %r", unit_name) raise StopWatcher() yield self.open_close_ports(unit_state) yield unit_state.watch_ports(cb_watch_ports) log.debug("Started watch of %r on changes to open ports", unit_name) def add_open_close_ports_observer(self, observer): """Set `observer` for calls to `open_close_ports`. :param observer: The callback is called with the corresponding :class:`juju.state.service.UnitState`. """ self._open_close_ports_observers.add(observer) @inlineCallbacks def open_close_ports(self, unit_state): """Called upon changes that *may* open/close ports for a service unit. """ if not self.is_running(): raise StopWatcher() try: try: machine_id = yield unit_state.get_assigned_machine_id() except StateChanged: log.debug("Stopping watch, machine %r no longer in topology", unit_state.unit_name) raise StopWatcher() if machine_id is not None: yield self.open_close_ports_on_machine(machine_id) finally: # Ensure that the observations runs after the # corresponding action completes. In particular, tests # that use observation depend on this ordering to ensure # that the action has in fact happened before they can # proceed. observers = list(self._open_close_ports_observers) for observer in observers: yield observer(unit_state) def add_open_close_ports_on_machine_observer(self, observer): """Add `observer` for calls to `open_close_ports`. :param observer: A callback receives the machine id for each call. """ self._open_close_ports_on_machine_observers.add(observer) @inlineCallbacks def open_close_ports_on_machine(self, machine_id): """Called upon changes that *may* open/close ports for a machine. :param machine_id: The machine ID of the machine that needs to be checked. This machine supports multiple service units being assigned to a machine; all service units are checked each time this is called to determine the active set of ports to be opened. """ if not self.is_running(): raise StopWatcher() try: machine_state = yield self.machine_state_manager.get_machine_state( machine_id) instance_id = yield machine_state.get_instance_id() machine = yield self.provider.get_machine(instance_id) unit_states = yield machine_state.get_all_service_unit_states() policy_ports = set() for unit_state in unit_states: service_state = yield self.service_state_manager.\ get_service_state(unit_state.service_name) exposed = yield service_state.get_exposed_flag() if exposed: ports = yield unit_state.get_open_ports() for port in ports: policy_ports.add((port["port"], port["proto"])) current_ports = yield self.provider.get_opened_ports( machine, machine_id) to_open = policy_ports - current_ports to_close = current_ports - policy_ports for port, proto in to_open: yield self.provider.open_port(machine, machine_id, port, proto) for port, proto in to_close: yield self.provider.close_port(machine, machine_id, port, proto) except MachinesNotFound: log.info("No provisioned machine for machine %r", machine_id) except Exception: log.exception("Got exception in opening/closing ports, will retry") self._retry_machines_on_port_error.add(machine_id) finally: # Ensure that the observation runs after the corresponding # action completes. In particular, tests that use # observation depend on this ordering to ensure that this # action has happened before they can proceed. observers = list(self._open_close_ports_on_machine_observers) for observer in observers: yield observer(machine_id)
class FirewallManager(object): """Manages the opening and closing of ports in the firewall. """ def __init__(self, client, is_running, provider): """Initialize a Firewall Manager. :param client: A connected zookeeper client. :param is_running: A function (usually a bound method) that returns whether the associated agent is still running or not. :param provider: A machine provider, used for making the actual changes in the environment to firewall settings. """ self.machine_state_manager = MachineStateManager(client) self.service_state_manager = ServiceStateManager(client) self.is_running = is_running self.provider = provider # Track all currently watched machines, using machine ID. self._watched_machines = set() # Map service name to either NotExposed or set of exposed unit names. # If a service name is present in the dictionary, it means its # respective expose node is being watched. self._watched_services = {} # Machines to retry open_close_ports because of earlier errors self._retry_machines_on_port_error = set() # Registration of observers for corresponding actions self._open_close_ports_observers = set() self._open_close_ports_on_machine_observers = set() @inlineCallbacks def process_machine(self, machine_state): """Ensures watch is setup per machine and performs any necessary retry. :param machine_state: The machine state of the machine to be checked. The watch that is established, via :method:`juju.state.machine.MachineState.watch_assigned_units`, handles the scenario where a service or service unit is removed from the topology. Because the service unit is no longer in the topology, the corresponding watch terminates and is unable to `open_close_ports` in response to the change. However, the specific machine watch will be called in this case, and that suffices to determine that its port policy should be checked. In addition, this method can rely on the fact that the provisioning agent periodically rechecks machines so as to support retries of security group operations that failed for that provider. This method is called by the corresponding :method:`juju.agents.provision.ProvisioningAgent.process_machine` in the provisioning agent. """ if machine_state.id in self._retry_machines_on_port_error: self._retry_machines_on_port_error.remove(machine_state.id) try: yield self.open_close_ports_on_machine(machine_state.id) except StopWatcher: # open_close_ports_on_machine can also be called from # a watch, so simply ignore this since it's just used # to shutdown a watch in the case of agent shutdown pass def cb_watch_assigned_units(old_units, new_units): """Watch assigned units for changes possibly require port mgmt. """ log.debug("Assigned units for machine %r: old=%r, new=%r", machine_state.id, old_units, new_units) return self.open_close_ports_on_machine(machine_state.id) if machine_state.id not in self._watched_machines: self._watched_machines.add(machine_state.id) yield machine_state.watch_assigned_units(cb_watch_assigned_units) @inlineCallbacks def watch_service_changes(self, old_services, new_services): """Manage watching service exposed status. This method is called upon every change to the set of services currently deployed. All services are then watched for changes to their exposed flag setting. :param old_services: the set of services before this change. :param new_services: the current set of services. """ removed_services = old_services - new_services for service_name in removed_services: self._watched_services.pop(service_name, None) for service_name in new_services: yield self._setup_new_service_watch(service_name) @inlineCallbacks def _setup_new_service_watch(self, service_name): """Sets up the watching of the exposed flag for a new service. If `service_name` is not watched (as known by `self._watched_services`), adds the watch and a corresponding entry in self._watched_services. (This dict is necessary because there is currently no way to introspect a service for whether it is watched or not.) """ if service_name in self._watched_services: return # already watched self._watched_services[service_name] = NotExposed try: service_state = yield self.service_state_manager.get_service_state( service_name) except ServiceStateNotFound: log.debug("Cannot setup watch, since service %r no longer exists", service_name) self._watched_services.pop(service_name, None) return @inlineCallbacks def cb_watch_service_exposed_flag(exposed): if not self.is_running(): raise StopWatcher() if exposed: log.debug("Service %r is exposed", service_name) else: log.debug("Service %r is unexposed", service_name) try: unit_states = yield service_state.get_all_unit_states() except StateChanged: log.debug("Stopping watch on %r, no longer in topology", service_name) raise StopWatcher() for unit_state in unit_states: yield self.open_close_ports(unit_state) if not exposed: log.debug("Service %r is unexposed", service_name) self._watched_services[service_name] = NotExposed else: log.debug("Service %r is exposed", service_name) self._watched_services[service_name] = set() yield self._setup_service_unit_watch(service_state) yield service_state.watch_exposed_flag(cb_watch_service_exposed_flag) log.debug("Started watch of %r on changes to being exposed", service_name) @inlineCallbacks def _setup_service_unit_watch(self, service_state): """Setup watches on service units of newly exposed `service_name`.""" @inlineCallbacks def cb_check_service_units(old_service_units, new_service_units): watched_units = self._watched_services.get( service_state.service_name, NotExposed) if not self.is_running() or watched_units is NotExposed: raise StopWatcher() removed_service_units = old_service_units - new_service_units for unit_name in removed_service_units: watched_units.discard(unit_name) if not self.is_running(): raise StopWatcher() try: unit_state = yield service_state.get_unit_state(unit_name) except (ServiceUnitStateNotFound, StateChanged): log.debug("Not setting up watch on %r, not in topology", unit_name) continue yield self.open_close_ports(unit_state) for unit_name in new_service_units: if unit_name not in watched_units: watched_units.add(unit_name) yield self._setup_watch_ports(service_state, unit_name) yield service_state.watch_service_unit_states(cb_check_service_units) log.debug("Started watch of service units for exposed service %r", service_state.service_name) @inlineCallbacks def _setup_watch_ports(self, service_state, unit_name): """Setup the watching of ports for `unit_name`.""" try: unit_state = yield service_state.get_unit_state(unit_name) except (ServiceUnitStateNotFound, StateChanged): log.debug("Cannot setup watch on %r (no longer exists), ignoring", unit_name) return @inlineCallbacks def cb_watch_ports(value): """Permanently watch ports until service is no longer exposed.""" watched_units = self._watched_services.get( service_state.service_name, NotExposed) if (not self.is_running() or watched_units is NotExposed or unit_name not in watched_units): log.debug("Stopping ports watch for %r", unit_name) raise StopWatcher() yield self.open_close_ports(unit_state) yield unit_state.watch_ports(cb_watch_ports) log.debug("Started watch of %r on changes to open ports", unit_name) def add_open_close_ports_observer(self, observer): """Set `observer` for calls to `open_close_ports`. :param observer: The callback is called with the corresponding :class:`juju.state.service.UnitState`. """ self._open_close_ports_observers.add(observer) @inlineCallbacks def open_close_ports(self, unit_state): """Called upon changes that *may* open/close ports for a service unit. """ if not self.is_running(): raise StopWatcher() try: try: machine_id = yield unit_state.get_assigned_machine_id() except StateChanged: log.debug("Stopping watch, machine %r no longer in topology", unit_state.unit_name) raise StopWatcher() if machine_id is not None: yield self.open_close_ports_on_machine(machine_id) finally: # Ensure that the observations runs after the # corresponding action completes. In particular, tests # that use observation depend on this ordering to ensure # that the action has in fact happened before they can # proceed. observers = list(self._open_close_ports_observers) for observer in observers: yield observer(unit_state) def add_open_close_ports_on_machine_observer(self, observer): """Add `observer` for calls to `open_close_ports`. :param observer: A callback receives the machine id for each call. """ self._open_close_ports_on_machine_observers.add(observer) @inlineCallbacks def open_close_ports_on_machine(self, machine_id): """Called upon changes that *may* open/close ports for a machine. :param machine_id: The machine ID of the machine that needs to be checked. This machine supports multiple service units being assigned to a machine; all service units are checked each time this is called to determine the active set of ports to be opened. """ if not self.is_running(): raise StopWatcher() try: machine_state = yield self.machine_state_manager.get_machine_state( machine_id) instance_id = yield machine_state.get_instance_id() machine = yield self.provider.get_machine(instance_id) unit_states = yield machine_state.get_all_service_unit_states() policy_ports = set() for unit_state in unit_states: service_state = yield self.service_state_manager.\ get_service_state(unit_state.service_name) exposed = yield service_state.get_exposed_flag() if exposed: ports = yield unit_state.get_open_ports() for port in ports: policy_ports.add( (port["port"], port["proto"])) current_ports = yield self.provider.get_opened_ports( machine, machine_id) to_open = policy_ports - current_ports to_close = current_ports - policy_ports for port, proto in to_open: yield self.provider.open_port(machine, machine_id, port, proto) for port, proto in to_close: yield self.provider.close_port( machine, machine_id, port, proto) except MachinesNotFound: log.info("No provisioned machine for machine %r", machine_id) except Exception: log.exception("Got exception in opening/closing ports, will retry") self._retry_machines_on_port_error.add(machine_id) finally: # Ensure that the observation runs after the corresponding # action completes. In particular, tests that use # observation depend on this ordering to ensure that this # action has happened before they can proceed. observers = list(self._open_close_ports_on_machine_observers) for observer in observers: yield observer(machine_id)