def main(): """etcd driver main entry point. Implementation note: this is implemented as a function to allow it to be imported and executed from the pyinstaller launcher. Without the extra indirection, pyilauncher would deadlock when it tried to import this module. """ last_ppid = os.getppid() common.default_logging(gevent_in_use=False, syslog_executable_name="calico-felix-etcd") felix_sck = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: felix_sck.connect(sys.argv[1]) except: _log.exception("Failed to connect to Felix") raise etcd_driver = driver.EtcdDriver(felix_sck) etcd_driver.start() while not etcd_driver.join(timeout=1): parent_pid = os.getppid() # Defensive, just in case we don't get a socket error, check if the # parent PID has changed, indicating that Felix has died. if parent_pid == 1 or parent_pid != last_ppid: _log.critical("Process adopted, assuming felix has died") etcd_driver.stop() break _log.critical("Driver shutting down.")
def main(): try: # Initialise the logging with default parameters. common.default_logging() # Load config # FIXME: old felix used argparse but that's not in Python 2.6, so # hard-coded path. try: config = Config("/etc/calico/felix.cfg") except: # Attempt to open a log file, ignoring any errors it gets, before # we raise the exception. try: common.complete_logging("/var/log/calico/felix.log", logging.DEBUG, logging.DEBUG, logging.DEBUG) except: pass raise _log.info("Felix initializing") gevent.spawn(_main_greenlet, config).join() # Should never return except BaseException: # Make absolutely sure that we exit by asking the OS to terminate our # process. We don't want to let a stray background thread keep us # alive. _log.exception("Felix exiting due to exception") os._exit(1) raise # Unreachable but keeps the linter happy about the broad except.
def main(): try: # Initialise the logging with default parameters. common.default_logging() #*********************************************************************# #* This is the default configuration path - we expect in most cases *# #* that the configuration file path is passed in on the command *# #* line. *# #*********************************************************************# CONFIG_FILE_PATH = 'felix.cfg' parser = argparse.ArgumentParser(description='Felix (Calico agent)') parser.add_argument('-c', '--config-file', dest='config_file') args = parser.parse_args() config_path = args.config_file or CONFIG_FILE_PATH # Create an instance of the Felix agent and start it running. agent = FelixAgent(config_path, zmq.Context()) while True: agent.run() except: #*********************************************************************# #* Log the exception then terminate. We cannot call sys.exit here *# #* because sometimes we hang on exit processing deep inside zmq *# #* (when the exception that causes termination was caused by a *# #* socket error). *# #*********************************************************************# log.exception("Felix exiting after uncaught exception") os._exit(1)
def test_startup(self): common.default_logging() context = stub_zmq.Context() agent = felix.FelixAgent(config_path, context) set_expected_global_rules() stub_fiptables.check_state(expected_iptables) stub_ipsets.check_state(expected_ipsets) self.assertEqual(agent.hostname, "test_hostname")
def main(): common.default_logging(gevent_in_use=False) # The parent process sends us communication pipes as FD 3 and 4. Open # those as files. Wrap the resulting files in a FileObject to make # them cooperate with gevent. pipe_from_parent = os.fdopen(3, 'rb', -1) pipe_to_parent = os.fdopen(4, 'wb', -1) reader = MessageReader(pipe_from_parent) writer = MessageWriter(pipe_to_parent) config = Config() while True: for msg_type, msg, seq_no in reader.new_messages(): _log.info("New %s message (#%s)", msg_type, seq_no) if msg_type == MSG_TYPE_CONFIG_UPDATE: config.update_from(msg.config) elif msg_type == MSG_TYPE_IPSET_DELTA: _log.info("IP set delta message: %s", msg) elif msg_type == MSG_TYPE_IPSET_REMOVED: _log.info("IP set removed message: %s", msg) elif msg_type == MSG_TYPE_IPSET_UPDATE: _log.info("IP set added message: %s", msg) elif msg_type == MSG_TYPE_WL_EP_UPDATE: _log.info("Workload endpoint update message: %s", msg) elif msg_type == MSG_TYPE_WL_EP_REMOVE: _log.info("Workload endpoint remove message: %s", msg) elif msg_type == MSG_TYPE_HOST_EP_UPDATE: _log.info("Host endpoint update message: %s", msg) elif msg_type == MSG_TYPE_HOST_EP_REMOVE: _log.info("Host endpoint update remove: %s", msg) elif msg_type == MSG_TYPE_HOST_METADATA_UPDATE: _log.info("Host endpoint update message: %s", msg) elif msg_type == MSG_TYPE_HOST_METADATA_REMOVE: _log.info("Host endpoint remove message: %s", msg) elif msg_type == MSG_TYPE_IPAM_POOL_UPDATE: _log.info("IPAM pool update messages:%s", msg) elif msg_type == MSG_TYPE_IPAM_POOL_REMOVE: _log.info("IPAM pool remove message: %s", msg) elif msg_type == MSG_TYPE_POLICY_UPDATE: _log.info("Policy update message: %s", msg) elif msg_type == MSG_TYPE_POLICY_REMOVED: _log.info("Policy update message: %s", msg) elif msg_type == MSG_TYPE_PROFILE_UPDATE: _log.info("Profile update message: %s", msg) elif msg_type == MSG_TYPE_PROFILE_REMOVED: _log.info("Profile update message: %s", msg) elif msg_type == MSG_TYPE_IN_SYNC: _log.info("In sync message: %s", msg) else: _log.error("Unexpected message %r %s", msg_type, msg)
def main(): # Initialise the logging with default parameters. common.default_logging(gevent_in_use=True) try: gevent.spawn(_main_greenlet).join() # Should never return except Exception: # Make absolutely sure that we exit by asking the OS to terminate our # process. We don't want to let a stray background thread keep us # alive. _log.exception("Felix exiting due to exception") os._exit(1) raise # Unreachable but keeps the linter happy about the broad except.
def test_no_work(self): """ Test starting up, and sending no work at all. """ common.default_logging() context = stub_zmq.Context() agent = felix.FelixAgent(config_path, context) context.add_poll_result(0) agent.run() set_expected_global_rules() stub_fiptables.check_state(expected_iptables) stub_ipsets.check_state(expected_ipsets)
def main(): # Initialise the logging with default parameters. common.default_logging(gevent_in_use=True) # Create configuration, reading defaults from file if it exists. parser = optparse.OptionParser() parser.add_option('-c', '--config-file', dest='config_file', help="configuration file to use", default="/etc/calico/felix.cfg") options, args = parser.parse_args() try: config = Config(options.config_file) except Exception: # Config loading error, and not just invalid parameters (from optparse) # as they generate a SystemExit. Attempt to open a log file, ignoring # any errors it gets, before we raise the exception. try: common.complete_logging("/var/log/calico/felix.log", logging.DEBUG, logging.DEBUG, logging.DEBUG, gevent_in_use=True) except Exception: pass # Log the exception with logging in whatever state we managed to get it # to, then reraise it, taking Felix down. _log.exception("Exception loading configuration") raise _log.info("Felix initializing") try: gevent.spawn(_main_greenlet, config).join() # Should never return except Exception: # Make absolutely sure that we exit by asking the OS to terminate our # process. We don't want to let a stray background thread keep us # alive. _log.exception("Felix exiting due to exception") os._exit(1) raise # Unreachable but keeps the linter happy about the broad except.
""" import logging import os import socket import sys from prometheus_client import start_http_server from calico.etcddriver import driver from calico import common _log = logging.getLogger(__name__) last_ppid = os.getppid() common.default_logging(gevent_in_use=False, syslog_executable_name="calico-felix-etcd") felix_sck = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: felix_sck.connect(sys.argv[1]) except: _log.exception("Failed to connect to Felix") raise etcd_driver = driver.EtcdDriver(felix_sck) etcd_driver.start() while not etcd_driver.join(timeout=1): parent_pid = os.getppid() # Defensive, just in case we don't get a socket error, check if the
""" import logging import os import socket import sys from prometheus_client import start_http_server from calico.etcddriver import driver from calico import common _log = logging.getLogger(__name__) last_ppid = os.getppid() common.default_logging(gevent_in_use=False, syslog_executable_name="calico-felix-etcd") felix_sck = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: felix_sck.connect(sys.argv[1]) except: _log.exception("Failed to connect to Felix") raise etcd_driver = driver.EtcdDriver(felix_sck) etcd_driver.start() while not etcd_driver.join(timeout=1): parent_pid = os.getppid() # Defensive, just in case we don't get a socket error, check if the # parent PID has changed, indicating that Felix has died.
def main(): # Parse command line args. parser = argparse.ArgumentParser(description='Calico ACL Manager') parser.add_argument('-c', '--config-file', dest='config_file') args = parser.parse_args() log_defaults = {'LogFilePath': None, 'LogSeverityFile': 'INFO', 'LogSeveritySys': 'ERROR', 'LogSeverityScreen': 'ERROR', 'LocalAddress': '*' } # Read config file. config = ConfigParser.ConfigParser(log_defaults) config.read(args.config_file or 'acl_manager.cfg') plugin_address = config.get('global', 'PluginAddress') local_address = config.get('global', 'LocalAddress') log_file_path = config.get('log', 'LogFilePath') log_file_level = config.get('log', 'LogSeverityFile') log_syslog_level = config.get('log', 'LogSeveritySys') log_stream_level = config.get('log', 'LogSeverityScreen') # Convert log level names into python log levels. loglevels = {"none": None, "debug": logging.DEBUG, "info": logging.INFO, "warn": logging.WARNING, "warning": logging.WARNING, "err": logging.ERROR, "error": logging.ERROR, "crit": logging.CRITICAL, "critical": logging.CRITICAL} file_level = loglevels[log_file_level.lower()] syslog_level = loglevels[log_syslog_level.lower()] stream_level = loglevels[log_stream_level.lower()] # Configure logging. common.default_logging() common.complete_logging(logfile=log_file_path, file_level=file_level, syslog_level=syslog_level, stream_level=stream_level) log.error("ACL Manager starting (version: %s)", pkg_resources.get_distribution('calico')) # Create ZeroMQ context. context = zmq.Context() log.info("pyzmq version is %s" % zmq.pyzmq_version()) # Create and start components. acl_store = ACLStore() network_store = NetworkStore() publisher = ACLPublisher(context, acl_store, local_address) acl_store.start(publisher) processor = RuleProcessor(acl_store, network_store) network_store.add_processor(processor) subscriber = NetworkSubscriber(context, network_store, plugin_address)
def test_resync(self): """ Test the resync flows. """ common.default_logging() context = stub_zmq.Context() agent = felix.FelixAgent(config_path, context) #*********************************************************************# #* Set the resync timeout to 5 seconds, and the KEEPALIVE timeout to *# #* much more. *# #*********************************************************************# agent.config.RESYNC_INT_SEC = 5 agent.config.CONN_TIMEOUT_MS = 50000 agent.config.CONN_KEEPALIVE_MS = 50000 # Get started. context.add_poll_result(0) agent.run() # Now we should have got a resync request. resync_req = context.sent_data[TYPE_EP_REQ].pop() log.debug("Resync request : %s" % resync_req) self.assertFalse(context.sent_data_present()) resync_id = resync_req['resync_id'] resync_rsp = { 'type': "RESYNCSTATE", 'endpoint_count': "0", 'rc': "SUCCESS", 'message': "hello" } poll_result = context.add_poll_result(1000) poll_result.add(TYPE_EP_REQ, resync_rsp) agent.run() # nothing yet self.assertFalse(context.sent_data_present()) poll_result = context.add_poll_result(5999) agent.run() # nothing yet - 4999 ms since last request self.assertFalse(context.sent_data_present()) poll_result = context.add_poll_result(6001) agent.run() # We should have got another resync request. resync_req = context.sent_data[TYPE_EP_REQ].pop() log.debug("Resync request : %s" % resync_req) self.assertFalse(context.sent_data_present()) resync_id = resync_req['resync_id'] resync_rsp = { 'type': "RESYNCSTATE", 'endpoint_count': "2", 'rc': "SUCCESS", 'message': "hello" } # No more resyncs until enough data has arrived. poll_result = context.add_poll_result(15000) poll_result.add(TYPE_EP_REQ, resync_rsp) agent.run() self.assertFalse(context.sent_data_present()) # Send an endpoint created message to Felix. endpoint_id = str(uuid.uuid4()) log.debug("Build first endpoint created : %s" % endpoint_id) mac = stub_utils.get_mac() suffix = endpoint_id[:11] tap = "tap" + suffix addr = '1.2.3.4' endpoint_created_req = { 'type': "ENDPOINTCREATED", 'endpoint_id': endpoint_id, 'resync_id': resync_id, 'issued': futils.time_ms(), 'mac': mac, 'state': Endpoint.STATE_ENABLED, 'addrs': [{ 'gateway': "1.2.3.1", 'addr': addr }] } poll_result = context.add_poll_result(15001) poll_result.add(TYPE_EP_REP, endpoint_created_req) agent.run() # We stop using sent_data_present, since there are ACL requests around. endpoint_created_rsp = context.sent_data[TYPE_EP_REP].pop() self.assertEqual(endpoint_created_rsp['rc'], "SUCCESS") self.assertFalse(context.sent_data[TYPE_EP_REQ]) # Send a second endpoint created message to Felix - triggers another resync. endpoint_id = str(uuid.uuid4()) log.debug("Build second endpoint created : %s" % endpoint_id) mac = stub_utils.get_mac() suffix = endpoint_id[:11] tap = "tap" + suffix addr = '1.2.3.5' endpoint_created_req = { 'type': "ENDPOINTCREATED", 'endpoint_id': endpoint_id, 'resync_id': resync_id, 'issued': futils.time_ms(), 'mac': mac, 'state': Endpoint.STATE_ENABLED, 'addrs': [{ 'gateway': "1.2.3.1", 'addr': addr }] } poll_result = context.add_poll_result(15002) poll_result.add(TYPE_EP_REP, endpoint_created_req) agent.run() endpoint_created_rsp = context.sent_data[TYPE_EP_REP].pop() self.assertEqual(endpoint_created_rsp['rc'], "SUCCESS") self.assertFalse(context.sent_data[TYPE_EP_REQ]) # No more resyncs until enough 5000 ms after last rsp. poll_result = context.add_poll_result(20000) poll_result.add(TYPE_EP_REQ, resync_rsp) agent.run() self.assertFalse(context.sent_data[TYPE_EP_REQ]) # We should have got another resync request. poll_result = context.add_poll_result(20003) poll_result.add(TYPE_EP_REP, endpoint_created_req) agent.run() resync_req = context.sent_data[TYPE_EP_REQ].pop() log.debug("Resync request : %s" % resync_req) self.assertFalse(context.sent_data[TYPE_EP_REQ])
def test_main_flow(self): """ Test starting up and going through some of the basic flow. """ common.default_logging() context = stub_zmq.Context() agent = felix.FelixAgent(config_path, context) context.add_poll_result(0) agent.run() # Now we want to reply to the RESYNC request. resync_req = context.sent_data[TYPE_EP_REQ].pop() log.debug("Resync request : %s" % resync_req) self.assertFalse(context.sent_data_present()) resync_id = resync_req['resync_id'] resync_rsp = { 'type': "RESYNCSTATE", 'endpoint_count': 1, 'rc': "SUCCESS", 'message': "hello" } poll_result = context.add_poll_result(50) poll_result.add(TYPE_EP_REQ, resync_rsp) agent.run() # Felix expects one endpoint created message - give it what it wants endpoint_id = str(uuid.uuid4()) log.debug("Build first endpoint created : %s" % endpoint_id) mac = stub_utils.get_mac() suffix = endpoint_id[:11] tap = "tap" + suffix addr = '1.2.3.4' endpoint_created_req = { 'type': "ENDPOINTCREATED", 'endpoint_id': endpoint_id, 'resync_id': resync_id, 'issued': futils.time_ms(), 'mac': mac, 'state': Endpoint.STATE_ENABLED, 'addrs': [{ 'gateway': "1.2.3.1", 'addr': addr }] } poll_result = context.add_poll_result(100) poll_result.add(TYPE_EP_REP, endpoint_created_req) agent.run() log.debug("Create tap interface %s" % tap) tap_obj = stub_devices.TapInterface(tap) stub_devices.add_tap(tap_obj) poll_result = context.add_poll_result(150) agent.run() #*********************************************************************# #* As soon as that endpoint has been made to exist, we should see an *# #* ACL request coming through, and a response to the endpoint *# #* created. We send a reply to that now. *# #*********************************************************************# endpoint_created_rsp = context.sent_data[TYPE_EP_REP].pop() self.assertEqual(endpoint_created_rsp['rc'], "SUCCESS") acl_req = context.sent_data[TYPE_ACL_REQ].pop() self.assertFalse(context.sent_data_present()) self.assertEqual(acl_req['endpoint_id'], endpoint_id) acl_rsp = {'type': "GETACLSTATE", 'rc': "SUCCESS", 'message': ""} poll_result = context.add_poll_result(200) poll_result.add(TYPE_ACL_REQ, acl_rsp) # Check the rules are what we expect. set_expected_global_rules() add_endpoint_rules(suffix, tap, addr, None, mac) stub_fiptables.check_state(expected_iptables) add_endpoint_ipsets(suffix) stub_ipsets.check_state(expected_ipsets) # OK - now try giving it some ACLs, and see if they get applied correctly. acls = get_blank_acls() acls['v4']['outbound'].append({ 'cidr': "0.0.0.0/0", 'protocol': "icmp" }) acls['v4']['outbound'].append({ 'cidr': "1.2.3.0/24", 'protocol': "tcp" }) acls['v4']['outbound'].append({ 'cidr': "0.0.0.0/0", 'protocol': "tcp", 'port': "80" }) acls['v4']['inbound'].append({ 'cidr': "1.2.2.0/24", 'protocol': "icmp" }) acls['v4']['inbound'].append({ 'cidr': "0.0.0.0/0", 'protocol': "tcp", 'port': "8080" }) acls['v4']['inbound'].append({ 'cidr': "2.4.6.8/32", 'protocol': "udp", 'port': "8080" }) acls['v4']['inbound'].append({'cidr': "1.2.3.3/32"}) acls['v4']['inbound'].append({ 'cidr': "3.6.9.12/32", 'protocol': "tcp", 'port': ['10', '50'] }) acls['v4']['inbound'].append({ 'cidr': "5.4.3.2/32", 'protocol': "icmp", 'icmp_type': "3", 'icmp_code': "2" }) acls['v4']['inbound'].append({ 'cidr': "5.4.3.2/32", 'protocol': "icmp", 'icmp_type': "9" }) acls['v4']['inbound'].append({ 'cidr': "5.4.3.2/32", 'protocol': "icmp", 'icmp_type': "blah" }) # We include a couple of invalid rules that Felix will just ignore (and log). acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "tcp", 'port': ['blah', 'blah'] }) acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "tcp", 'port': ['1', '2', '3'] }) acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "tcp", 'port': 'flibble' }) acls['v4']['inbound'].append({'protocol': "tcp"}) acls['v4']['inbound'].append({'cidr': "4.3.2.1/32", 'port': "123"}) acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "icmp", 'icmp_code': "blah" }) acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "icmp", 'port': "1" }) acls['v4']['inbound'].append({ 'cidr': "4.3.2.1/32", 'protocol': "rsvp", 'port': "1" }) acl_req = {'type': "ACLUPDATE", 'acls': acls} poll_result.add(TYPE_ACL_SUB, acl_req, endpoint_id) agent.run() stub_fiptables.check_state(expected_iptables) expected_ipsets.add("felix-from-icmp-" + suffix, "0.0.0.0/1") expected_ipsets.add("felix-from-icmp-" + suffix, "128.0.0.0/1") expected_ipsets.add("felix-from-port-" + suffix, "1.2.3.0/24,tcp:0") expected_ipsets.add("felix-from-port-" + suffix, "0.0.0.0/1,tcp:80") expected_ipsets.add("felix-from-port-" + suffix, "128.0.0.0/1,tcp:80") expected_ipsets.add("felix-to-icmp-" + suffix, "1.2.2.0/24") expected_ipsets.add("felix-to-port-" + suffix, "0.0.0.0/1,tcp:8080") expected_ipsets.add("felix-to-port-" + suffix, "128.0.0.0/1,tcp:8080") expected_ipsets.add("felix-to-port-" + suffix, "2.4.6.8/32,udp:8080") expected_ipsets.add("felix-to-addr-" + suffix, "1.2.3.3/32") expected_ipsets.add("felix-to-port-" + suffix, "3.6.9.12/32,tcp:10-50") expected_ipsets.add("felix-to-port-" + suffix, "5.4.3.2/32,icmp:3/2") expected_ipsets.add("felix-to-port-" + suffix, "5.4.3.2/32,icmp:9/0") expected_ipsets.add("felix-to-port-" + suffix, "5.4.3.2/32,icmp:blah") stub_ipsets.check_state(expected_ipsets) # Add another endpoint, and check the state. endpoint_id2 = str(uuid.uuid4()) log.debug("Build second endpoint created : %s" % endpoint_id2) mac2 = stub_utils.get_mac() suffix2 = endpoint_id2[:11] tap2 = "tap" + suffix2 addr2 = '1.2.3.5' endpoint_created_req = { 'type': "ENDPOINTCREATED", 'endpoint_id': endpoint_id2, 'issued': futils.time_ms(), 'mac': mac2, 'state': Endpoint.STATE_ENABLED, 'addrs': [{ 'gateway': "1.2.3.1", 'addr': addr2 }] } poll_result = context.add_poll_result(250) poll_result.add(TYPE_EP_REP, endpoint_created_req) tap_obj2 = stub_devices.TapInterface(tap2) stub_devices.add_tap(tap_obj2) agent.run() # Check that we got what we expected - i.e. a success response, a GETACLSTATE, # and the rules in the right state. endpoint_created_rsp = context.sent_data[TYPE_EP_REP].pop() self.assertEqual(endpoint_created_rsp['rc'], "SUCCESS") acl_req = context.sent_data[TYPE_ACL_REQ].pop() self.assertEqual(acl_req['endpoint_id'], endpoint_id2) self.assertFalse(context.sent_data_present()) add_endpoint_rules(suffix2, tap2, addr2, None, mac2) stub_fiptables.check_state(expected_iptables) add_endpoint_ipsets(suffix2) stub_ipsets.check_state(expected_ipsets) # OK, finally wind down with an ENDPOINTDESTROYED message for that second endpoint. endpoint_destroyed_req = { 'type': "ENDPOINTDESTROYED", 'endpoint_id': endpoint_id2, 'issued': futils.time_ms() } poll_result = context.add_poll_result(300) poll_result.add(TYPE_EP_REP, endpoint_destroyed_req) stub_devices.del_tap(tap2) agent.run() # Rebuild and recheck the state. set_expected_global_rules() add_endpoint_rules(suffix, tap, addr, None, mac) stub_fiptables.check_state(expected_iptables)