def block_until_ping(self, dst_ip): predicate = functools.partial(self.ping_predicate, dst_ip) utils.wait_until_true( predicate, exception=FakeMachineException( "No ICMP reply obtained from IP address %s" % dst_ip) )
def test_l2_agent_restart(self, agent_restart_timeout=20): # Environment preparation is effectively the same as connectivity test vms = self._prepare_vms_in_single_network() vms.ping_all() ns0 = vms[0].namespace ip1 = vms[1].ip agents = [host.l2_agent for host in self.environment.hosts] # Restart agents on all nodes simultaneously while pinging across # the hosts. The ping has to cross int and phys bridges and travels # via central bridge as the vms are on separate hosts. with net_helpers.async_ping(ns0, [ip1], timeout=2, count=agent_restart_timeout) as done: LOG.debug("Restarting agents") executor = futures.ThreadPoolExecutor(max_workers=len(agents)) restarts = [agent.restart(executor=executor) for agent in agents] futures.wait(restarts, timeout=agent_restart_timeout) self.assertTrue(all([r.done() for r in restarts])) LOG.debug("Restarting agents - done") # It is necessary to give agents time to initialize # because some crucial steps (e.g. setting up bridge flows) # happen only after RPC is established common_utils.wait_until_true( done, exception=RuntimeError("Could not ping the other VM, L2 agent " "restart leads to network disruption"))
def _assert_router_does_not_exist(self, router): # If the namespace assertion succeeds # then the devices and iptable rules have also been deleted, # so there's no need to check that explicitly. self.assertFalse(self._namespace_exists(router.ns_name)) common_utils.wait_until_true( lambda: not self._metadata_proxy_exists(self.agent.conf, router))
def _wait_until_ipv6_forwarding_has_state(self, ns_name, dev_name, state): def _ipv6_forwarding_has_state(): return ip_lib.get_ipv6_forwarding( device=dev_name, namespace=ns_name) == state common_utils.wait_until_true(_ipv6_forwarding_has_state)
def _assert_ping_during_agents_restart( self, agents, src_namespace, ips, restart_timeout=10, ping_timeout=1, count=10): with net_helpers.async_ping( src_namespace, ips, timeout=ping_timeout, count=count) as done: LOG.debug("Restarting agents") executor = futures.ThreadPoolExecutor(max_workers=len(agents)) restarts = [agent.restart(executor=executor) for agent in agents] futures.wait(restarts, timeout=restart_timeout) self.assertTrue(all([r.done() for r in restarts])) LOG.debug("Restarting agents - done") # It is necessary to give agents time to initialize # because some crucial steps (e.g. setting up bridge flows) # happen only after RPC is established agent_names = ', '.join({agent.process_fixture.process_name for agent in agents}) common_utils.wait_until_true( done, timeout=count * (ping_timeout + 1), exception=RuntimeError("Could not ping the other VM, " "re-starting %s leads to network " "disruption" % agent_names))
def test_mtu_update(self): tenant_id = uuidutils.generate_uuid() router = self.safe_client.create_router(tenant_id) network = self.safe_client.create_network(tenant_id) subnet = self.safe_client.create_subnet( tenant_id, network['id'], '20.0.0.0/24', gateway_ip='20.0.0.1') self.safe_client.add_router_interface(router['id'], subnet['id']) namespace = "%s@%s" % ( self._get_namespace(router['id']), self.environment.hosts[0].l3_agent.get_namespace_suffix(), ) self._assert_namespace_exists(namespace) ip = ip_lib.IPWrapper(namespace) common_utils.wait_until_true(lambda: ip.get_devices()) devices = ip.get_devices() self.assertEqual(1, len(devices)) ri_dev = devices[0] mtu = ri_dev.link.mtu self.assertEqual(1500, mtu) mtu -= 1 network = self.safe_client.update_network(network['id'], mtu=mtu) common_utils.wait_until_true(lambda: ri_dev.link.mtu == mtu)
def _test_restart_service_on_sighup(self, service, workers=1): """Test that a service correctly (re)starts on receiving SIGHUP. 1. Start a service with a given number of workers. 2. Send SIGHUP to the service. 3. Wait for workers (if any) to (re)start. """ self._start_server(callback=service, workers=workers) os.kill(self.service_pid, signal.SIGHUP) expected_msg = FAKE_START_MSG * workers * 2 # Wait for temp file to be created and its size reaching the expected # value expected_size = len(expected_msg) condition = lambda: (os.path.isfile(self.temp_file) and os.stat(self.temp_file).st_size == expected_size) utils.wait_until_true( condition, timeout=5, sleep=0.1, exception=RuntimeError( "Timed out waiting for file %(filename)s to be created and " "its size become equal to %(size)s." % {'filename': self.temp_file, 'size': expected_size})) # Verify that start has been called twice for each worker (one for # initial start, and the second one on SIGHUP after children were # terminated). with open(self.temp_file, 'r') as f: res = f.readline() self.assertEqual(expected_msg, res)
def test_ha_router_restart_agents_no_packet_lost(self): tenant_id = uuidutils.generate_uuid() ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id) router = self.safe_client.create_router(tenant_id, ha=True, external_network=ext_net['id']) external_vm = self.useFixture( machine_fixtures.FakeMachine( self.environment.central_external_bridge, common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24))) common_utils.wait_until_true( lambda: len(self.client.list_l3_agent_hosting_routers( router['id'])['agents']) == 2, timeout=90) common_utils.wait_until_true( functools.partial( self._is_ha_router_active_on_one_agent, router['id']), timeout=90) router_ip = router['external_gateway_info'][ 'external_fixed_ips'][0]['ip_address'] l3_agents = [host.agents['l3'] for host in self.environment.hosts] self._assert_ping_during_agents_restart( l3_agents, external_vm.namespace, [router_ip], count=60)
def _assert_dvr_external_device(self, router): external_port = router.get_ex_gw_port() snat_ns_name = dvr_snat_ns.SnatNamespace.get_snat_ns_name( router.router_id) # if the agent is in dvr_snat mode, then we have to check # that the correct ports and ip addresses exist in the # snat_ns_name namespace if self.agent.conf.agent_mode == 'dvr_snat': device_exists = functools.partial( self.device_exists_with_ips_and_mac, external_port, router.get_external_device_name, snat_ns_name) utils.wait_until_true(device_exists) # if the agent is in dvr mode then the snat_ns_name namespace # should not be present at all: elif self.agent.conf.agent_mode == 'dvr': self.assertFalse( self._namespace_exists(snat_ns_name), "namespace %s was found but agent is in dvr mode not dvr_snat" % (str(snat_ns_name)) ) # if the agent is anything else the test is misconfigured # we force a test failure with message else: self.assertTrue(False, " agent not configured for dvr or dvr_snat")
def _test_metadata_proxy_spawn_kill_with_subnet_create_delete(self): network = self.network_dict_for_dhcp(ip_version=6) self.configure_dhcp_for_network(network=network) pm = self._get_metadata_proxy_process(network) # A newly created network with ipv6 subnet will not have metadata proxy self.assertFalse(pm.active) new_network = copy.deepcopy(network) dhcp_enabled_ipv4_subnet = self.create_subnet_dict(network.id) new_network.subnets.append(dhcp_enabled_ipv4_subnet) self.mock_plugin_api.get_network_info.return_value = new_network self.agent.refresh_dhcp_helper(network.id) # Metadata proxy should be spawned for the newly added subnet common_utils.wait_until_true( lambda: pm.active, timeout=5, sleep=0.1, exception=RuntimeError("Metadata proxy didn't spawn")) self.mock_plugin_api.get_network_info.return_value = network self.agent.refresh_dhcp_helper(network.id) # Metadata proxy should be killed because network doesn't need it. common_utils.wait_until_true( lambda: not pm.active, timeout=5, sleep=0.1, exception=RuntimeError("Metadata proxy didn't get killed"))
def test_mtu_update(self): # The test case needs access to devices in nested namespaces. ip_lib # doesn't support it, and it's probably unsafe to touch the library for # testing matters. # TODO(jlibosva) revisit when ip_lib supports nested namespaces if self.environment.hosts[0].dhcp_agent.namespace is not None: self.skipTest("ip_lib doesn't support nested namespaces") self.vm.block_until_dhcp_config_done() namespace = dhcp_agent._get_namespace_name( self.network['id'], suffix=self.environment.hosts[0].dhcp_agent.get_namespace_suffix()) ip = ip_lib.IPWrapper(namespace) devices = ip.get_devices() self.assertEqual(1, len(devices)) dhcp_dev = devices[0] mtu = dhcp_dev.link.mtu self.assertEqual(1450, mtu) mtu -= 1 self.safe_client.update_network(self.network['id'], mtu=mtu) common_utils.wait_until_true(lambda: dhcp_dev.link.mtu == mtu)
def test_cleanup_network_namespaces_cleans_dhcp_and_l3_namespaces(self): dhcp_namespace = self.useFixture( net_helpers.NamespaceFixture(dhcp.NS_PREFIX)).name l3_namespace = self.useFixture( net_helpers.NamespaceFixture(namespaces.NS_PREFIX)).name bridge = self.useFixture( net_helpers.VethPortFixture(namespace=dhcp_namespace)).bridge self.useFixture( net_helpers.VethPortFixture(bridge, l3_namespace)) # we scope the get_namespaces to our own ones not to affect other # tests, as otherwise cleanup will kill them all self.get_namespaces.return_value = [l3_namespace, dhcp_namespace] # launch processes in each namespace to make sure they're # killed during cleanup procs_launched = self._launch_processes([l3_namespace, dhcp_namespace]) self.assertIsNot(procs_launched, 0) common_utils.wait_until_true( lambda: self._get_num_spawned_procs() == procs_launched, timeout=15, exception=Exception("Didn't spawn expected number of processes")) netns_cleanup.cleanup_network_namespaces(self.conf) self.get_namespaces_p.stop() namespaces_now = ip_lib.list_network_namespaces() procs_after = self._get_num_spawned_procs() self.assertEqual(procs_after, 0) self.assertNotIn(l3_namespace, namespaces_now) self.assertNotIn(dhcp_namespace, namespaces_now)
def test_has_updates(self): utils.wait_until_true(lambda: self.monitor.has_updates) # clear the event list self.monitor.get_events() self.useFixture(net_helpers.OVSPortFixture()) # has_updates after port addition should become True utils.wait_until_true(lambda: self.monitor.has_updates is True)
def _test_trunk_creation_helper(self, ports): self.setup_agent_and_ports(port_dicts=ports) self.wait_until_ports_state(self.ports, up=True) self.trunk_br.delete_port(self.trunk_port_name) self.wait_until_ports_state(self.ports, up=False) common_utils.wait_until_true(lambda: not self.trunk_br.bridge_exists(self.trunk_br.br_name))
def _test_controller_timeout_does_not_break_connectivity(self, kill_signal=None): # Environment preparation is effectively the same as connectivity test vms = self._prepare_vms_in_single_network() vms.ping_all() ns0 = vms[0].namespace ip1 = vms[1].ip LOG.debug("Stopping agents (hence also OVS bridge controllers)") for host in self.environment.hosts: if kill_signal is not None: host.l2_agent.stop(kill_signal=kill_signal) else: host.l2_agent.stop() # Ping to make sure that 3 x 5 seconds is overcame even under a high # load. The time was chosen to match three times inactivity_probe time, # which is the time after which the OVS vswitchd # treats the controller as dead and starts managing the bridge # by itself when the fail type settings is not set to secure (see # ovs-vsctl man page for further details) with net_helpers.async_ping(ns0, [ip1], timeout=2, count=25) as done: common_utils.wait_until_true( done, exception=RuntimeError("Networking interrupted after " "controllers have vanished"))
def test_both_ha_router_lost_gw_connection(self): self.agent.conf.set_override( 'ha_vrrp_health_check_interval', 5) self.failover_agent.conf.set_override( 'ha_vrrp_health_check_interval', 5) router1, router2 = self.create_ha_routers() master_router, slave_router = self._get_master_and_slave_routers( router1, router2) self.fail_gw_router_port(master_router) self.fail_gw_router_port(slave_router) common_utils.wait_until_true( lambda: master_router.ha_state == 'master') common_utils.wait_until_true( lambda: slave_router.ha_state == 'master') self.restore_gw_router_port(master_router) new_master, new_slave = self._get_master_and_slave_routers( master_router, slave_router) self.assertEqual(master_router, new_master) self.assertEqual(slave_router, new_slave)
def test_new_fip_sends_garp(self): next_ip_cidr = net_helpers.increment_ip_cidr(self.machines.ip_cidr, 2) expected_ip = str(netaddr.IPNetwork(next_ip_cidr).ip) # Create incomplete ARP entry self.peer.assert_no_ping(expected_ip) has_entry = has_expected_arp_entry( self.peer.port.name, self.peer.namespace, expected_ip, self.router.port.link.address) self.assertFalse(has_entry) self.router.port.addr.add(next_ip_cidr) has_arp_entry_predicate = functools.partial( has_expected_arp_entry, self.peer.port.name, self.peer.namespace, expected_ip, self.router.port.link.address, ) exc = RuntimeError( "No ARP entry in %s namespace containing IP address %s and MAC " "address %s" % ( self.peer.namespace, expected_ip, self.router.port.link.address)) utils.wait_until_true( has_arp_entry_predicate, exception=exc)
def block_until_no_ping(self, dst_ip): predicate = functools.partial( lambda ip: not self.ping_predicate(ip), dst_ip) utils.wait_until_true( predicate, exception=FakeMachineException( "ICMP packets still pass to %s IP address." % dst_ip) )
def _wait_for_min_bw_rule_applied(self, vm, min_bw, direction): if direction == constants.EGRESS_DIRECTION: utils.wait_until_true( lambda: vm.bridge.get_egress_min_bw_for_port( vm.neutron_port['id']) == min_bw) elif direction == constants.INGRESS_DIRECTION: self.fail('"%s" direction not implemented' % constants.INGRESS_DIRECTION)
def destroy_state_change_monitor(self, process_monitor): pm = self._get_state_change_monitor_process_manager() process_monitor.unregister(self.router_id, IP_MONITOR_PROCESS_SERVICE) pm.disable(sig=str(int(signal.SIGTERM))) try: common_utils.wait_until_true(lambda: not pm.active, timeout=SIGTERM_TIMEOUT) except common_utils.WaitTimeout: pm.disable(sig=str(int(signal.SIGKILL)))
def _ensure_port_binding_failed(self, port_id): def port_binding_failed(): port = self.safe_client.client.show_port(port_id)['port'] return (port[portbindings.VIF_TYPE] == portbindings.VIF_TYPE_BINDING_FAILED) common_utils.wait_until_true(port_binding_failed)
def block_until_dhcp_config_done(self): utils.wait_until_true( lambda: self.ip_configured() and self.gateway_configured(), exception=machine_fixtures.FakeMachineException( "Address %s or gateway %s not configured properly on " "port %s" % (self.ip_cidr, self.gateway_ip, self.port.name) ) )
def _wait_for_bw_rule_applied(self, vm, limit, burst, direction): if direction == constants.EGRESS_DIRECTION: utils.wait_until_true( lambda: vm.bridge.get_egress_bw_limit_for_port( vm.port.name) == (limit, burst)) elif direction == constants.INGRESS_DIRECTION: utils.wait_until_true( lambda: vm.bridge.get_ingress_bw_limit_for_port( vm.port.name) == (limit, burst))
def _spawn_keepalived(self, keepalived_manager): keepalived_manager.spawn() process = keepalived_manager.get_process() common_utils.wait_until_true( lambda: process.active, timeout=5, sleep=0.01, exception=RuntimeError(_("Keepalived didn't spawn"))) return process
def test_router_interface_status(self): network = self.create_network() subnet = self.create_subnet(network) # Add router interface with subnet id router = self._create_router(data_utils.rand_name('router-'), True) intf = self.create_router_interface(router['id'], subnet['id']) status_active = lambda: self.client.show_port( intf['port_id'])['port']['status'] == 'ACTIVE' utils.wait_until_true(status_active, exception=AssertionError)
def _read_stream(stream, timeout): if timeout: poller = select.poll() poller.register(stream.fileno()) poll_predicate = functools.partial(poller.poll, 1) common_utils.wait_until_true(poll_predicate, timeout, 0.1, RuntimeError( 'No output in %.2f seconds' % timeout)) return stream.readline()
def _ensure_lla_task(self, gw_ifname, ns_name, lla_with_mask): # It would be insane for taking so long unless DAD test failed # In that case, the subnet would never be assigned a prefix. utils.wait_until_true(functools.partial(self._lla_available, gw_ifname, ns_name, lla_with_mask), timeout=l3_constants.LLA_TASK_TIMEOUT, sleep=2)
def wait_until_bandwidth_limit_rule_applied(bridge, port_vif, rule): def _bandwidth_limit_rule_applied(): bw_rule = bridge.get_egress_bw_limit_for_port(port_vif) expected = None, None if rule: expected = rule.max_kbps, rule.max_burst_kbps return bw_rule == expected common_utils.wait_until_true(_bandwidth_limit_rule_applied)
def wait_until_bandwidth_limit_rule_applied(check_function, port_vif, rule): def _bandwidth_limit_rule_applied(): bw_rule = check_function(port_vif) expected = None, None if rule: expected = rule.max_kbps, rule.max_burst_kbps return bw_rule == expected common_utils.wait_until_true(_bandwidth_limit_rule_applied)
def _wait_until_network_rescheduled(self, old_agent): def _agent_rescheduled(): network_agents = self.client.list_dhcp_agent_hosting_networks( self.network['id'])['agents'] if network_agents: return network_agents[0]['id'] != old_agent['id'] return False common_utils.wait_until_true(_agent_rescheduled)
def test_interface_added_after_initilization(self): for device in self.devices[:len(self.devices) - 1]: self.ip_wrapper.add_dummy(device) utils.wait_until_true(lambda: self._read_file({}), timeout=30) ip_addresses = [ {'cidr': '192.168.251.21/24', 'event': 'added', 'name': self.devices[0]}, {'cidr': '192.168.251.22/24', 'event': 'added', 'name': self.devices[1]}] self._handle_ip_addresses('added', ip_addresses) self._check_read_file(ip_addresses) self.ip_wrapper.add_dummy(self.devices[-1]) ip_addresses.append({'cidr': '192.168.251.23/24', 'event': 'added', 'name': self.devices[-1]}) self._handle_ip_addresses('added', [ip_addresses[-1]]) self._check_read_file(ip_addresses)
def test_qos(self): """This is a basic test that check that a QoS policy with a bandwidth limit rule is applied correctly by sending a file from the instance to the test node. Then calculating the bandwidth every ~1 sec by the number of bits received / elapsed time. """ NC_PORT = 1234 self.setup_network_and_server() self.check_connectivity(self.fip['floating_ip_address'], CONF.validation.image_ssh_user, self.keypair['private_key']) rulesets = [{ 'protocol': 'tcp', 'direction': 'ingress', 'port_range_min': NC_PORT, 'port_range_max': NC_PORT, 'remote_ip_prefix': '0.0.0.0/0' }] self.create_secgroup_rules(rulesets) ssh_client = ssh.Client(self.fip['floating_ip_address'], CONF.validation.image_ssh_user, pkey=self.keypair['private_key']) policy = self.admin_manager.network_client.create_qos_policy( name='test-policy', description='test-qos-policy', shared=True) policy_id = policy['policy']['id'] self.admin_manager.network_client.create_bandwidth_limit_rule( policy_id, max_kbps=constants.LIMIT_KILO_BITS_PER_SECOND, max_burst_kbps=constants.LIMIT_KILO_BITS_PER_SECOND) port = self.client.list_ports( network_id=self.network['id'], device_id=self.server['server']['id'])['ports'][0] self.admin_manager.network_client.update_port(port['id'], qos_policy_id=policy_id) self._create_file_for_bw_tests(ssh_client) utils.wait_until_true(lambda: self._check_bw( ssh_client, self.fip['floating_ip_address'], port=NC_PORT), timeout=120, sleep=1)
def test_no_stale_flows_after_port_delete(self): def find_drop_flow(ofport, flows): for flow in flows.split("\n"): if "in_port=%d" % ofport in flow and "actions=drop" in flow: return True return False def num_ports_with_drop_flows(ofports, flows): count = 0 for ofport in ofports: if find_drop_flow(ofport, flows): count = count + 1 return count # setup self.setup_agent_and_ports(port_dicts=self.create_test_ports()) self.wait_until_ports_state(self.ports, up=True) # call port_delete first for port in self.ports: self.agent.port_delete([], port_id=port['id']) portnames = [port["vif_name"] for port in self.ports] ofports = [ port.ofport for port in self.agent.int_br.get_vif_ports() if port.port_name in portnames ] # wait until ports are marked dead, with drop flow utils.wait_until_true(lambda: num_ports_with_drop_flows( ofports, self.agent.int_br.dump_flows(constants.LOCAL_SWITCHING)) == len(ofports)) # delete the ports on bridge for port in self.ports: self.agent.int_br.delete_port(port['vif_name']) self.wait_until_ports_state(self.ports, up=False) # verify no stale drop flows self.assertEqual( 0, num_ports_with_drop_flows( ofports, self.agent.int_br.dump_flows(constants.LOCAL_SWITCHING)))
def _test_ipv6_router_advts_and_fwd_helper(self, state, enable_v6_gw, expected_ra, expected_forwarding): # Schedule router to l3 agent, and then add router gateway. Verify # that router gw interface is configured to receive Router Advts and # IPv6 forwarding is enabled. router_info = l3_test_common.prepare_router_data( enable_snat=True, enable_ha=True, dual_stack=True, enable_gw=False) router = self.manage_router(self.agent, router_info) common_utils.wait_until_true(lambda: router.ha_state == 'master') if state == 'backup': self.fail_ha_router(router) common_utils.wait_until_true(lambda: router.ha_state == 'backup') _ext_dev_name, ex_port = l3_test_common.prepare_ext_gw_test( mock.Mock(), router, dual_stack=enable_v6_gw) router_info['gw_port'] = ex_port router.process() self._assert_ipv6_accept_ra(router, expected_ra) self._assert_ipv6_forwarding(router, expected_forwarding)
def start_agent(self, agent, ports=None, unplug_ports=None): if unplug_ports is None: unplug_ports = [] if ports is None: ports = [] self.setup_agent_rpc_mocks(agent, unplug_ports) polling_manager = polling.InterfacePollingMinimizer() self._mock_get_events(agent, polling_manager, ports) self.addCleanup(polling_manager.stop) polling_manager.start() utils.wait_until_true( polling_manager._monitor.is_active) agent.check_ovs_status = mock.Mock( return_value=constants.OVS_NORMAL) self.agent_thread = eventlet.spawn(agent.rpc_loop, polling_manager) self.addCleanup(self.stop_agent, agent, self.agent_thread) return polling_manager
def test_ha_router_namespace_has_ipv6_forwarding_disabled(self): router_info = self.generate_router_info(enable_ha=True) router_info[constants.HA_INTERFACE_KEY]['status'] = ( constants.PORT_STATUS_DOWN) router = self.manage_router(self.agent, router_info) external_port = router.get_ex_gw_port() external_device_name = router.get_external_device_name( external_port['id']) common_utils.wait_until_true(lambda: router.ha_state == 'backup') self._wait_until_ipv6_forwarding_has_state(router.ns_name, external_device_name, 0) router.router[constants.HA_INTERFACE_KEY]['status'] = ( constants.PORT_STATUS_ACTIVE) self.agent._process_updated_router(router.router) common_utils.wait_until_true(lambda: router.ha_state == 'primary') self._wait_until_ipv6_forwarding_has_state(router.ns_name, external_device_name, 1)
def test_queue_event_multiple_events_notify_method(self): def _batch_notifier_dequeue(): while not self.notifier._pending_events.empty(): self.notifier._pending_events.get() c_mock = mock.patch.object( self.notifier, '_notify', side_effect=_batch_notifier_dequeue).start() events = 20 for i in range(events): self.notifier.queue_event('Event %s' % i) eventlet.sleep(0) # yield to let coro execute utils.wait_until_true(self.notifier._pending_events.empty, timeout=5) # Called twice: when the first thread calls "synced_send" and then, # in the same loop, when self._pending_events is not empty(). All # self.notifier.queue_event calls are done in just one # "batch_interval" (2 secs). self.assertEqual(2, c_mock.call_count)
def test_restart_subport_events(self): ports = self._fill_trunk_dict() self.setup_agent_and_ports(port_dicts=ports) self.wait_until_ports_state(self.ports, up=True) # restart and simulate a subport delete deleted_port = self.ports[2] deleted_sp = trunk_manager.SubPort(self.trunk_dict['id'], deleted_port['id']) self.stop_agent(self.agent, self.agent_thread) self.polling_manager.stop() self.trunk_dict['sub_ports'] = self.trunk_dict['sub_ports'][:1] self.setup_agent_and_ports(port_dicts=ports[:2]) # NOTE: the port_dicts passed in setup_agent_and_ports is stored in # self.ports so we are waiting here only for ports[:2] self.wait_until_ports_state(self.ports, up=True) common_utils.wait_until_true( lambda: (deleted_sp.patch_port_trunk_name not in self.trunk_br. get_port_name_list()))
def wait_until_true(predicate, timeout=WAIT_UNTIL_TRUE_DEFAULT_TIMEOUT, sleep=WAIT_UNTIL_TRUE_DEFAULT_SLEEP, exception=None): """Wait until predicate() returns true, and return. Raises a TestTimeoutException after timeout seconds, polling once every sleep seoncds. """ exception = exception or TestTimeoutException return n_utils.wait_until_true(predicate, timeout, sleep, exception)
def test_new_fip_sends_garp(self): ns_ip_wrapper = ip_lib.IPWrapper(self.router.namespace) new_interface = ns_ip_wrapper.add_dummy('new_interface') new_interface_cidr = '169.254.152.1/24' new_interface.link.set_up() new_interface.addr.add(new_interface_cidr) self._generate_cmd_opts(monitor_interface='new_interface', cidr=new_interface_cidr) self._run_monitor() next_ip_cidr = net_helpers.increment_ip_cidr(self.machines.ip_cidr, 2) expected_ip = str(netaddr.IPNetwork(next_ip_cidr).ip) # Create incomplete ARP entry self.peer.assert_no_ping(expected_ip) # Wait for ping expiration eventlet.sleep(1) has_entry = has_expected_arp_entry( self.peer.port.name, self.peer.namespace, expected_ip, self.router.port.link.address) self.assertFalse(has_entry) self.router.port.addr.add(next_ip_cidr) has_arp_entry_predicate = functools.partial( has_expected_arp_entry, self.peer.port.name, self.peer.namespace, expected_ip, self.router.port.link.address, ) exc = RuntimeError( "No ARP entry in %s namespace containing IP address %s and MAC " "address %s" % ( self.peer.namespace, expected_ip, self.router.port.link.address)) utils.wait_until_true(has_arp_entry_predicate, timeout=15, exception=exc) msg = ('Sent GARP to %(cidr)s from %(device)s' % {'cidr': expected_ip, 'device': self.router.port.name}) self._search_in_file(self.log_file, msg)
def _start_ovn_northd(self): if not self.ovsdb_server_mgr: return def wait_for_northd(): try: self.nb_api.nb_global except StopIteration: LOG.debug("NB_Global is not ready yet") return False try: next( iter( self.sb_api.db_list_rows('SB_Global').execute( check_error=True))) except StopIteration: LOG.debug("SB_Global is not ready yet") return False except KeyError: # Maintenance worker doesn't register SB_Global therefore # we don't need to wait for it LOG.debug("SB_Global is not registered in this IDL") return True timeout = 20 ovn_nb_db = self.ovsdb_server_mgr.get_ovsdb_connection_path('nb') ovn_sb_db = self.ovsdb_server_mgr.get_ovsdb_connection_path('sb') LOG.debug("Starting OVN northd") self.ovn_northd_mgr = self.useFixture( process.OvnNorthd(self.temp_dir, ovn_nb_db, ovn_sb_db, protocol=self._ovsdb_protocol)) LOG.debug("OVN northd started: %r", self.ovn_northd_mgr) n_utils.wait_until_true( wait_for_northd, timeout, sleep=1, exception=Exception("ovn-northd didn't initialize OVN DBs in %d" "seconds" % timeout))
def test_ha_router_restart_agents_no_packet_lost(self): tenant_id = uuidutils.generate_uuid() ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id) router = self.safe_client.create_router(tenant_id, ha=True, external_network=ext_net['id']) external_vm = self.useFixture( machine_fixtures.FakeMachine( self.environment.central_bridge, common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24))) common_utils.wait_until_true(lambda: len( self.client.list_l3_agent_hosting_routers(router['id'])['agents']) == 2, timeout=90) common_utils.wait_until_true(functools.partial( self._is_ha_router_active_on_one_agent, router['id']), timeout=90) router_ip = router['external_gateway_info']['external_fixed_ips'][0][ 'ip_address'] # Let's check first if connectivity from external_vm to router's # external gateway IP is possible before we restart agents external_vm.block_until_ping(router_ip) l3_agents = [host.agents['l3'] for host in self.environment.hosts] l3_standby_agents = self._get_l3_agents_with_ha_state( l3_agents, router['id'], 'standby') l3_active_agents = self._get_l3_agents_with_ha_state( l3_agents, router['id'], 'active') self._assert_ping_during_agents_restart(l3_standby_agents, external_vm.namespace, [router_ip], count=60) self._assert_ping_during_agents_restart(l3_active_agents, external_vm.namespace, [router_ip], count=60)
def _start_server(self, callback, workers, processes_queue=None): """Run a given service. :param callback: callback that will start the required service :param workers: number of service workers :returns: list of spawned workers' pids """ self.workers = workers # Fork a new process in which server will be started pid = os.fork() if pid == 0: status = 0 try: callback(workers) except SystemExit as exc: status = exc.code except BaseException: traceback.print_exc() status = 2 # Really exit os._exit(status) self.service_pid = pid # If number of workers is 1 it is assumed that we run # a service in the current process. if self.workers > 1: workers = self._get_workers(10, processes_queue=processes_queue) self.assertEqual(len(workers), self.workers) return workers # Wait for a service to start. utils.wait_until_true( self.health_checker, timeout=10, sleep=0.1, exception=RuntimeError("Failed to start service.")) return [self.service_pid]
def _wait_for_status(self, expected_statuses, check_call=True): call_count = len(expected_statuses) update_loadbalancer_status = ( self._o_driver_lib.update_loadbalancer_status) n_utils.wait_until_true( lambda: update_loadbalancer_status.call_count == call_count, timeout=10) if check_call: # NOTE(mjozefcz): The updates are send in parallel and includes # dicts with unordered lists inside. So we can't simply use # assert_has_calls here. Sample structure: # {'listeners': [], # 'loadbalancers': [{'id': 'a', 'provisioning_status': 'ACTIVE'}], # 'members': [{'id': 'b', 'provisioning_status': 'DELETED'}, # {'id': 'c', 'provisioning_status': 'DELETED'}], # 'pools': [{'id': 'd', 'operating_status': 'ONLINE', # 'provisioning_status': 'ACTIVE'}]}, updated_statuses = [] for call in update_loadbalancer_status.mock_calls: updated_statuses.append(call[1][0]) calls_found = [] for expected_status in expected_statuses: for updated_status in updated_statuses: # Find status update having equal keys if (sorted(updated_status.keys()) == sorted(expected_status.keys())): val_check = [] # Withing this status update check if all values of # expected keys match. for k, v in expected_status.items(): val_check.append( sorted(expected_status[k], key=lambda x: x['id']) == sorted(updated_status[k], key=lambda x: x['id'])) if False in val_check: # At least one value don't match. continue calls_found.append(expected_status) break # Validate if we found all expected calls. self.assertCountEqual(expected_statuses, calls_found)
def _assert_port_forwarding_iptables_is_set(self, router_info, pf): (interface_name, namespace, iptables_manager ) = self.fip_pf_ext._get_resource_by_router(router_info) chain_rule = self.fip_pf_ext._get_fip_rules( pf, iptables_manager.wrap_name)[1] chain_name = chain_rule[0] rule = chain_rule[1] rule_tag = 'fip_portforwarding-' + pf.id rule_obj = iptable_mng.IptablesRule(chain_name, rule, True, False, iptables_manager.wrap_name, rule_tag, None) def check_chain_rules_set(): existing_chains = iptables_manager.ipv4['nat'].chains if chain_name not in existing_chains: return False existing_rules = iptables_manager.ipv4['nat'].rules return rule_obj in existing_rules common_utils.wait_until_true(check_chain_rules_set)
def _wait_for_bw_rule_applied(self, vm, limit, burst, direction): if direction == constants.EGRESS_DIRECTION: utils.wait_until_true( lambda: vm.bridge.get_egress_bw_limit_for_port( vm.port.name) == (limit, burst)) elif direction == constants.INGRESS_DIRECTION: utils.wait_until_true( lambda: vm.bridge.get_ingress_bw_limit_for_port( vm.port.name) == (limit, burst), timeout=10) br_int_flows = vm.bridge.dump_flows_for_table( ovs_constants.LOCAL_SWITCHING) expected = ( 'priority=200,reg3=0 ' 'actions=set_queue:%(queue_num)s,' 'load:0x1->NXM_NX_REG3[0],resubmit(,0)' % { 'queue_num': ovs_lib.QOS_DEFAULT_QUEUE } ) self.assertIn(expected, br_int_flows)
def _test_restart_service_on_sighup(self, service, workers=1): self._start_server(callback=service, workers=workers) os.kill(self.service_pid, signal.SIGHUP) expected_msg = test_server.FAKE_START_MSG * workers * 2 expected_size = len(expected_msg) utils.wait_until_true( lambda: (os.path.isfile(self.temp_file) and os.stat(self.temp_file) .st_size == expected_size), timeout=5, sleep=0.1, exception=RuntimeError( "Timed out waiting for file %(filename)s to be created and " "its size become equal to %(size)s." % { 'filename': self.temp_file, 'size': expected_size })) with open(self.temp_file, 'rb') as f: res = f.readline() self.assertEqual(expected_msg, res)
def test_find_child_pids(self): pid = os.getppid() child_pids = utils.find_child_pids(pid) child_pids_recursive = utils.find_child_pids(pid, recursive=True) for _pid in child_pids: self.assertIn(_pid, child_pids_recursive) cmd = ['sleep', '100'] process = async_process.AsyncProcess(cmd) process.start() common_utils.wait_until_true(lambda: process._process.pid, sleep=0.5, timeout=10) self.addCleanup(self._stop_process, process) child_pids_after = utils.find_child_pids(pid) child_pids_recursive_after = utils.find_child_pids(pid, recursive=True) self.assertEqual(child_pids, child_pids_after) for _pid in child_pids + [process.pid]: self.assertIn(_pid, child_pids_recursive_after)
def test__update_device_attributes_subport_delete(self): with self.port() as parent, self.port() as childport: subport = create_subport_dict(childport['port']['id']) trunk = self._create_test_trunk(parent) parent['port']['binding:host_id'] = 'host' core_plugin = directory.get_plugin() core_plugin.update_port(self.context, parent['port']['id'], parent) self.trunk_plugin.add_subports(self.context, trunk['id'], {'sub_ports': [subport]}) trunk_obj = self._get_trunk_obj(trunk['id']) trunk_obj.status = constants.TRUNK_ACTIVE_STATUS trunk_obj.update() utils.wait_until_true(lambda: ports.Port.get_object( self.context, id=[childport['port']['id']]).device_owner == constants.TRUNK_SUBPORT_OWNER, timeout=5) subport_obj = ports.Port.get_object(self.context, id=[childport['port']['id']]) self.assertEqual(constants.TRUNK_SUBPORT_OWNER, subport_obj.device_owner) self.assertEqual(subport_obj.device_id, trunk['id']) self.assertEqual('host', subport_obj.bindings[0]['host']) payload = events.DBEventPayload( self.context, resource_id=trunk_obj.id, states=( None, trunk_obj, ), metadata={'subports': trunk_obj.sub_ports}) self.trunk_plugin._update_device_attributes( resources.SUBPORTS, events.AFTER_DELETE, None, payload) time.sleep(0.1) utils.wait_until_true(lambda: ports.Port.get_object( self.context, id=[childport['port']['id']]).device_owner == '', timeout=5) subport_obj = ports.Port.get_object(self.context, id=[childport['port']['id']]) self.assertEqual('', subport_obj.device_owner) self.assertEqual('', subport_obj.device_id) self.assertEqual('', subport_obj.bindings[0]['host'])
def _test_restart_service_on_sighup(self, service, workers=1): """Test that a service correctly (re)starts on receiving SIGHUP. 1. Start a service with a given number of workers. 2. Send SIGHUP to the service. 3. Wait for workers (if any) to (re)start. """ self._start_server(callback=service, workers=workers) os.kill(self.service_pid, signal.SIGHUP) # After sending SIGHUP it is expected that there will be as many # FAKE_RESET_MSG as number of workers + one additional for main # process expected_msg = (FAKE_START_MSG * workers + FAKE_RESET_MSG * (workers + 1)) # Wait for temp file to be created and its size reaching the expected # value expected_size = len(expected_msg) ret_msg = '' def is_ret_buffer_ok(): nonlocal ret_msg LOG.debug('Checking returned buffer size') while not self._mp_queue.empty(): ret_msg += self._mp_queue.get() LOG.debug('Size of buffer is %s. Expected size: %s', len(ret_msg), expected_size) return len(ret_msg) == expected_size try: utils.wait_until_true(is_ret_buffer_ok, timeout=5, sleep=1) except utils.WaitTimeout: raise RuntimeError('Expected buffer size: %s, current size: %s' % (len(ret_msg), expected_size)) # Verify that start has been called twice for each worker (one for # initial start, and the second one on SIGHUP after children were # terminated). self.assertEqual(expected_msg, ret_msg)
def _test_restart_service_on_sighup(self, service, workers=1): """Test that a service correctly (re)starts on receiving SIGHUP. 1. Start a service with a given number of workers. 2. Send SIGHUP to the service. 3. Wait for workers (if any) to (re)start. """ self._start_server(callback=service, workers=workers) os.kill(self.service_pid, signal.SIGHUP) expected_msg = FAKE_START_MSG * workers * 2 # Wait for temp file to be created and its size reaching the expected # value expected_size = len(expected_msg) condition = lambda: (os.path.isfile(self.temp_file) and os.stat( self.temp_file).st_size == expected_size) try: utils.wait_until_true(condition, timeout=5, sleep=1) except utils.TimerTimeout: if not os.path.isfile(self.temp_file): raise RuntimeError( "Timed out waiting for file %(filename)s to be created" % {'filename': self.temp_file}) else: raise RuntimeError( "Expected size for file %(filename)s: %(size)s, current " "size: %(current_size)s" % { 'filename': self.temp_file, 'size': expected_size, 'current_size': os.stat(self.temp_file).st_size }) # Verify that start has been called twice for each worker (one for # initial start, and the second one on SIGHUP after children were # terminated). with open(self.temp_file, 'rb') as f: res = f.readline() self.assertEqual(expected_msg, res)
def _assert_harouter_fip_is_set(self, router_info, fip_pf): (interface_name, namespace, iptables_manager) = self.fip_pf_ext._get_resource_by_router( router_info) keepalived_pm = router_info.keepalived_manager.get_process() utils.get_conf_file_name(keepalived_pm.pids_path, keepalived_pm.uuid, keepalived_pm.service_pid_fname) conf_path = os.path.join(keepalived_pm.pids_path, keepalived_pm.uuid, 'keepalived.conf') regex = "%s dev %s" % (fip_pf, interface_name) pattern = re.compile(regex) def check_harouter_fip_is_set(): if re.findall(pattern, utils.get_value_from_file(conf_path)): return True return False common_utils.wait_until_true(check_harouter_fip_is_set)
def destroy_monitored_metadata_proxy(cls, monitor, uuid, conf, ns_name): monitor.unregister(uuid, METADATA_SERVICE_NAME) pm = cls._get_metadata_proxy_process_manager(uuid, conf, ns_name=ns_name) pm.disable(sig=str(int(signal.SIGTERM))) try: common_utils.wait_until_true(lambda: not pm.active, timeout=SIGTERM_TIMEOUT) except common_utils.WaitTimeout: LOG.warning( 'Metadata process %s did not finish after SIGTERM ' 'signal in %s seconds, sending SIGKILL signal', pm.pid, SIGTERM_TIMEOUT) pm.disable(sig=str(int(signal.SIGKILL))) # Delete metadata proxy config and PID files. HaproxyConfigurator.cleanup_config_file(uuid, cfg.CONF.state_path) linux_utils.delete_if_exists(pm.get_pid_file_name(), run_as_root=True) cls.monitors.pop(uuid, None)
def stop(self, block=False, kill_signal=None, kill_timeout=None): """Halt the process and watcher threads. :param block: Block until the process has stopped. :param kill_signal: Number of signal that will be sent to the process when terminating the process :param kill_timeout: If given, process will be killed with SIGKILL if timeout will be reached and process will still be running :raises utils.WaitTimeout if blocking is True and the process did not stop in time. """ kill_signal = kill_signal or getattr(signal, 'SIGKILL', signal.SIGTERM) if self._is_running: LOG.debug('Halting async process [%s].', self.cmd) self._kill(kill_signal, kill_timeout) else: raise AsyncProcessException(_('Process is not running.')) if block: common_utils.wait_until_true(lambda: not self.is_active())
def test_create_bridges(self): bridges_added = [] bridges_to_monitor = ['br01', 'br02', 'br03'] bridges_to_create = ['br01', 'br02', 'br03', 'br04', 'br05'] self.ovs = ovs_lib.BaseOVS() self.ovs.ovsdb.idl_monitor.start_bridge_monitor(bridges_to_monitor) self.addCleanup(self._delete_bridges, bridges_to_create) for bridge in bridges_to_create: self.ovs.add_bridge(bridge) def retrieve_bridges(bridges_added): bridges_added += self.ovs.ovsdb.idl_monitor.bridges_added return len(bridges_added) common_utils.wait_until_true( lambda: retrieve_bridges(bridges_added) == len(bridges_to_monitor), timeout=5) bridges_added.sort() self.assertEqual(bridges_to_monitor, bridges_added) self.assertEqual([], self.ovs.ovsdb.idl_monitor.bridges_added)
def test_add_and_remove_multiple_ips(self): # NOTE(ralonsoh): testing [1], adding multiple IPs. # [1] https://bugs.launchpad.net/neutron/+bug/1832307 utils.wait_until_true(lambda: self._read_file({}), timeout=30) self.ip_wrapper.add_dummy(self.devices[0]) ip_addresses = [] for i in range(100): _cidr = str(netaddr.IPNetwork('192.168.252.1/32').ip + i) + '/32' ip_addresses.append({'cidr': _cidr, 'event': 'added', 'name': self.devices[0]}) self._handle_ip_addresses('added', ip_addresses) self._check_read_file(ip_addresses) for i in range(100): _cidr = str(netaddr.IPNetwork('192.168.252.1/32').ip + i) + '/32' ip_addresses.append({'cidr': _cidr, 'event': 'removed', 'name': self.devices[0]}) self._handle_ip_addresses('removed', ip_addresses) self._check_read_file(ip_addresses)
def _test_restart_service_on_sighup(self, service, workers=1): """Test that a service correctly (re)starts on receiving SIGHUP. 1. Start a service with a given number of workers. 2. Send SIGHUP to the service. 3. Wait for workers (if any) to (re)start. """ self._start_server(callback=service, workers=workers) os.kill(self.service_pid, signal.SIGHUP) # After sending SIGHUP it is expected that there will be as many # FAKE_RESET_MSG as number of workers + one additional for main # process expected_msg = (FAKE_START_MSG * workers + FAKE_RESET_MSG * (workers + 1)) # Wait for temp file to be created and its size reaching the expected # value expected_size = len(expected_msg) condition = lambda: (os.path.isfile(self.temp_file) and os.stat( self.temp_file).st_size == expected_size) utils.wait_until_true( condition, timeout=5, sleep=0.1, exception=RuntimeError( "Timed out waiting for file %(filename)s to be created and " "its size become equal to %(size)s." % { 'filename': self.temp_file, 'size': expected_size })) # Verify that start has been called twice for each worker (one for # initial start, and the second one on SIGHUP after children were # terminated). with open(self.temp_file, 'rb') as f: res = f.readline() self.assertEqual(expected_msg, res)
def _test_metadata_proxy_spawn_kill_with_subnet_create_delete(self): network = self.network_dict_for_dhcp(ip_version=lib_const.IP_VERSION_6, dhcp_enabled=False) self.configure_dhcp_for_network(network=network) pm = self._get_metadata_proxy_process(network) self.assertFalse(pm.active) new_network = copy.deepcopy(network) dhcp_enabled_ipv4_subnet = self.create_subnet_dict(network.id) new_network.subnets.append(dhcp_enabled_ipv4_subnet) self.mock_plugin_api.get_network_info.return_value = new_network fixed_ip_mock = mock.Mock(ip_address='192.168.10.2', subnet_id=dhcp_enabled_ipv4_subnet.id) dhcp_port_mock = mock.Mock(dns_assignment={}, extra_dhcp_opts=[], fixed_ips=[fixed_ip_mock], id=new_network.ports[0].id, mac_address=str( self._DHCP_PORT_MAC_ADDRESS)) self.mock_plugin_api.get_dhcp_port.return_value = dhcp_port_mock self.mock_plugin_api.update_dhcp_port.return_value = dhcp_port_mock self.agent.refresh_dhcp_helper(network.id) # Metadata proxy should be spawned for the newly added subnet common_utils.wait_until_true( lambda: pm.active, timeout=5, sleep=0.1, exception=RuntimeError("Metadata proxy didn't spawn")) self.mock_plugin_api.get_network_info.return_value = network self.agent.refresh_dhcp_helper(network.id) # Metadata proxy should be killed because network doesn't need it. common_utils.wait_until_true( lambda: not pm.active, timeout=5, sleep=0.1, exception=RuntimeError("Metadata proxy didn't get killed"))
def test_cleanup_network_namespaces_cleans_dhcp_and_l3_namespaces(self): dhcp_namespace = self.useFixture( net_helpers.NamespaceFixture(dhcp.NS_PREFIX)).name l3_namespace = self.useFixture( net_helpers.NamespaceFixture(namespaces.NS_PREFIX)).name bridge = self.useFixture( net_helpers.VethPortFixture(namespace=dhcp_namespace)).bridge self.useFixture( net_helpers.VethPortFixture(bridge, l3_namespace)) # we scope the get_namespaces to our own ones not to affect other # tests, as otherwise cleanup will kill them all self.get_namespaces.return_value = [l3_namespace, dhcp_namespace] # launch processes in each namespace to make sure they're # killed during cleanup procs_launched = self._launch_processes([l3_namespace, dhcp_namespace]) self.assertIsNot(procs_launched, 0) try: common_utils.wait_until_true( lambda: self._get_num_spawned_procs() == procs_launched, timeout=15) except eventlet.Timeout: num_spawned_procs = self._get_num_spawned_procs() err_str = ("Expected number/spawned number: {0}/{1}\nProcess " "information:\n".format(num_spawned_procs, procs_launched)) cmd = ['ps', '-f', '-u', 'root'] err_str += utils.execute(cmd, run_as_root=True) raise Exception(err_str) netns_cleanup.cleanup_network_namespaces(self.conf) self.get_namespaces_p.stop() namespaces_now = ip_lib.list_network_namespaces() procs_after = self._get_num_spawned_procs() self.assertEqual(procs_after, 0) self.assertNotIn(l3_namespace, namespaces_now) self.assertNotIn(dhcp_namespace, namespaces_now)
def test_agent_events(self, m_pb_created, m_pb_deleted): lswitchport_name = self._create_logical_switch_port() self.sb_api.lsp_bind(lswitchport_name, self.chassis_name).execute(check_error=True, log_errors=True) def pb_created(): if m_pb_created.call_count < 1: return False args = m_pb_created.call_args[0] self.assertEqual('update', args[0]) self.assertEqual(self.chassis_name, args[1].chassis[0].name) self.assertFalse(args[2].chassis) return True n_utils.wait_until_true( pb_created, timeout=10, exception=Exception( "PortBindingChassisCreatedEvent didn't happen on port " "binding.")) self.sb_api.lsp_unbind(lswitchport_name).execute(check_error=True, log_errors=True) def pb_deleted(): if m_pb_deleted.call_count < 1: return False args = m_pb_deleted.call_args[0] self.assertEqual('update', args[0]) self.assertFalse(args[1].chassis) self.assertEqual(self.chassis_name, args[2].chassis[0].name) return True n_utils.wait_until_true( pb_deleted, timeout=10, exception=Exception( "PortBindingChassisDeletedEvent didn't happen on port" "unbind."))