Exemplo n.º 1
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    setproctitle.setproctitle(daemon.process_name('main'))

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items
    db.clear_stale_locks()
    db.see_this_node()
    db.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
        DAEMON_PIDS[pid] = d
        LOG.withField('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if util.is_network_node():
        # Bootstrap the floating network in the Networks table
        floating_network = db.get_network('floating')
        if not floating_network:
            db.create_floating_network(config.get('FLOATING_NETWORK'))
            floating_network = net.from_db('floating')

        subst = {
            'physical_bridge':
            util.get_safe_interface_name('phy-br-%s' %
                                         config.get('NODE_EGRESS_NIC')),
            'physical_nic':
            config.get('NODE_EGRESS_NIC')
        }

        if not util.check_for_interface(subst['physical_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = db.get_ipmanager('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                util.create_interface(subst['physical_bridge'], 'bridge', '')
                util.execute(None,
                             'ip link set %(physical_bridge)s up' % subst)
                util.execute(
                    None, 'ip addr add %(master_float)s/%(netmask)s '
                    'dev %(physical_bridge)s' % subst)

                util.execute(
                    None, 'iptables -A FORWARD -o %(physical_nic)s '
                    '-i %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -A FORWARD -i %(physical_nic)s '
                    '-o %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -t nat -A POSTROUTING '
                    '-o %(physical_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in DAEMON_PIDS:
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    while True:
        time.sleep(10)

        wpid, _ = os.waitpid(-1, os.WNOHANG)
        while wpid != 0:
            LOG.warning('%s died (pid %d)' %
                        (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
            del DAEMON_PIDS[wpid]
            wpid, _ = os.waitpid(-1, os.WNOHANG)

        _audit_daemons()
        db.see_this_node()
Exemplo n.º 2
0
from flask_jwt_extended import get_jwt_identity

from shakenfist.daemons import daemon
from shakenfist.external_api import base as api_base
from shakenfist import db
from shakenfist.instance import Instance
from shakenfist.ipmanager import IPManager
from shakenfist import logutil
from shakenfist import net
from shakenfist.networkinterface import NetworkInterface


LOG, HANDLER = logutil.setup(__name__)
daemon.set_log_level(LOG, 'api')


def metadata_putpost(meta_type, owner, key, value):
    if meta_type not in ['namespace', 'instance', 'network']:
        return api_base.error(500, 'invalid meta_type %s' % meta_type)
    if not key:
        return api_base.error(400, 'no key specified')
    if not value:
        return api_base.error(400, 'no value specified')

    with db.get_lock('metadata', meta_type, owner,
                     op='Metadata update'):
        md = db.get_metadata(meta_type, owner)
        if md is None:
            md = {}
        md[key] = value
        db.persist_metadata(meta_type, owner, md)
Exemplo n.º 3
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    LOG.info('Starting...')
    setproctitle.setproctitle(
        daemon.process_name('main') + '-v%s' % util_general.get_version())

    # If you ran this, it means we're not shutting down any more
    n = Node.new(config.NODE_NAME, config.NODE_MESH_IP)
    n.state = Node.STATE_CREATED

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items.
    etcd.clear_stale_locks()
    Node.observe_this_node()
    etcd.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            try:
                DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
                sys.exit(0)
            except Exception as e:
                util_general.ignore_exception('daemon creation', e)
                sys.exit(1)

        DAEMON_PIDS[pid] = d
        LOG.with_field('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if config.NODE_IS_NETWORK_NODE:
        # Bootstrap the floating network in the Networks table
        floating_network = net.Network.from_db('floating')
        if not floating_network:
            floating_network = net.Network.create_floating_network(
                config.FLOATING_NETWORK)

        subst = {
            'egress_bridge': util_network.get_safe_interface_name(
                'egr-br-%s' % config.NODE_EGRESS_NIC),
            'egress_nic': config.NODE_EGRESS_NIC
        }

        if not util_network.check_for_interface(subst['egress_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util_general.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = IPManager.from_db('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                # We need to copy the MTU of the interface we are bridging to
                # or weird networking things happen.
                mtu = util_network.get_interface_mtu(config.NODE_EGRESS_NIC)

                util_network.create_interface(
                    subst['egress_bridge'], 'bridge', '', mtu=mtu)

                util_process.execute(None,
                                     'ip link set %(egress_bridge)s up' % subst)
                util_process.execute(None,
                                     'ip addr add %(master_float)s/%(netmask)s '
                                     'dev %(egress_bridge)s' % subst)

                util_process.execute(None,
                                     'iptables -A FORWARD -o %(egress_nic)s '
                                     '-i %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -A FORWARD -i %(egress_nic)s '
                                     '-o %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -t nat -A POSTROUTING '
                                     '-o %(egress_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in list(DAEMON_PIDS):
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    running = True
    while True:
        time.sleep(5)

        try:
            wpid, _ = os.waitpid(-1, os.WNOHANG)
            while wpid != 0:
                LOG.warning('%s exited (pid %d)'
                            % (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
                if wpid in DAEMON_PIDS:
                    del DAEMON_PIDS[wpid]
                wpid, _ = os.waitpid(-1, os.WNOHANG)

        except ChildProcessError:
            # We get this if there are no child processes
            pass

        n = Node.from_db(config.NODE_NAME)
        if n.state.value not in [Node.STATE_STOPPING, Node.STATE_STOPPED]:
            _audit_daemons()
            Node.observe_this_node()

        elif len(DAEMON_PIDS) == 0:
            n.state = Node.STATE_STOPPED
            return

        else:
            if running:
                for pid in DAEMON_PIDS:
                    try:
                        os.kill(pid, signal.SIGTERM)
                        LOG.info('Sent SIGTERM to %s (pid %s)'
                                 % (DAEMON_PIDS.get(pid, 'unknown'), pid))
                    except OSError as e:
                        LOG.warn('Failed to send SIGTERM to %s: %s' % (pid, e))

            running = False