Пример #1
0
    def test__unshare_network_simple(self):
        """Tests unshare network sequence.
        """
        # Access protected module _create_supervision_tree
        # pylint: disable=W0212
        app = utils.to_obj({
            'name':
            'proid.test#0',
            'uniqueid':
            'ID1234',
            'environment':
            'dev',
            'network': {
                'veth': 'id1234.0',
                'vip': '192.168.1.1',
                'gateway': '192.168.254.254',
            },
            'host_ip':
            '172.31.81.67',
            'shared_ip':
            True,
            'ephemeral_ports': [],
            'endpoints': [{
                'real_port': '5007',
                'proto': 'tcp',
                'port': '22',
                'type': 'infra'
            }, {
                'real_port': '5013',
                'proto': 'udp',
                'port': '12345'
            }],
        })
        app_unique_name = appmgr.app_unique_name(app)

        appmgr.run._unshare_network(self.app_env, app)

        treadmill.iptables.add_ip_set.assert_has_calls([
            mock.call(treadmill.iptables.SET_INFRA_SVC, '192.168.1.1,tcp:22'),
        ])

        self.app_env.rules.create_rule.assert_has_calls([
            mock.call(rule=firewall.DNATRule('tcp', '172.31.81.67', '5007',
                                             '192.168.1.1', '22'),
                      owner=app_unique_name),
            mock.call(rule=firewall.DNATRule('udp', '172.31.81.67', '5013',
                                             '192.168.1.1', '12345'),
                      owner=app_unique_name)
        ],
                                                        any_order=True)
        treadmill.newnet.create_newnet.assert_called_with(
            'id1234.0',
            '192.168.1.1',
            '192.168.254.254',
            '172.31.81.67',
        )
Пример #2
0
    def test__create_root_dir(self):
        """Test creation on the container root directory."""
        # Access protected module _create_root_dir
        # pylint: disable=W0212
        app = utils.to_obj(
            {
                'proid': 'myproid',
                'name': 'myproid.test#0',
                'uniqueid': 'ID1234',
                'environment': 'dev',
                'disk': '100G',
            }
        )
        app_unique_name = appmgr.app_unique_name(app)
        container_dir = os.path.join(self.root, 'apps', app_unique_name)
        mock_ld_client = self.app_env.svc_localdisk.make_client.return_value
        localdisk = {
            'block_dev': '/dev/foo',
        }
        mock_ld_client.wait.return_value = localdisk
        treadmill.appmgr.run._create_root_dir(self.app_env,
                                              container_dir,
                                              '/some/root_dir',
                                              app)

        treadmill.fs.chroot_init.assert_called_with('/some/root_dir')
        treadmill.fs.create_filesystem.assert_called_with('/dev/foo')
        treadmill.fs.mount_filesystem('/dev/foo', '/some/root_dir')
        treadmill.fs.make_rootfs.assert_called_with('/some/root_dir',
                                                    'myproid')
        treadmill.fs.configure_plugins.assert_called_with(
            self.root,
            '/some/root_dir',
            app
        )
        treadmill.fs.mkdir_safe.assert_called_with('/some/root_dir/.etc')
        treadmill.fs.mount_bind.assert_called_with(
            '/some/root_dir',
            '/treadmill',
            '/test_treadmill',
        )
        shutil.copyfile.assert_call_with(
            os.path.join(self.app_env.root, 'etc/resolv.conf'),
            '/some/root_dir/.etc/resolve.conf'
        )
        treadmill.subproc.check_call.assert_has_calls([
            mock.call(
                [
                    'mount', '-n', '--bind',
                    os.path.join(self.app_env.root, 'etc/resolv.conf'),
                    '/etc/resolv.conf'
                ]
            )
        ])
Пример #3
0
    def test__unshare_network_complex(self):
        """Test unshare network advanced sequence (ephemeral/passthrough)."""
        # Access protected module _create_supervision_tree
        # pylint: disable=W0212
        app = utils.to_obj(
            {
                'name': 'myproid.test#0',
                'environment': 'dev',
                'uniqueid': 'ID1234',
                'network': {
                    'veth': 'id1234.0',
                    'vip': '192.168.0.2',
                    'gateway': '192.168.254.254'
                },
                'shared_ip': False,
                'endpoints': [
                    {
                        'name': 'ssh',
                        'port': 54321,
                        'real_port': 54321,
                        'type': 'infra',
                    }
                ],
                'ephemeral_ports': [
                    10000,
                    10001,
                    10002,
                ],
                'passthrough': [
                    'xxx',
                    'yyy',
                    'zzz',
                ],
            }
        )
        app_unique_name = appmgr.app_unique_name(app)
        hosts_to_ip = {
            'xxx': '4.4.4.4',
            'yyy': '5.5.5.5',
            'zzz': '5.5.5.5',
        }
        socket.gethostbyname.side_effect = lambda h: hosts_to_ip[h]
        self.app_env.rules.get_rules.return_value = set()

        treadmill.appmgr.run._unshare_network(
            self.app_env,
            app
        )

        self.app_env.rules.create_rule.assert_has_calls([
            mock.call(rule=firewall.DNATRule('172.31.81.67', 54321,
                                             '192.168.0.2', 54321),
                      owner=app_unique_name),
            mock.call(rule=firewall.DNATRule('172.31.81.67', 10000,
                                             '192.168.0.2', 10000),
                      owner=app_unique_name),
            mock.call(rule=firewall.DNATRule('172.31.81.67', 10001,
                                             '192.168.0.2', 10001),
                      owner=app_unique_name),
            mock.call(rule=firewall.DNATRule('172.31.81.67', 10002,
                                             '192.168.0.2', 10002),
                      owner=app_unique_name),
            mock.call(rule=firewall.PassThroughRule('4.4.4.4',
                                                    '192.168.0.2'),
                      owner=app_unique_name),
            mock.call(rule=firewall.PassThroughRule('5.5.5.5',
                                                    '192.168.0.2'),
                      owner=app_unique_name),
        ])

        # Check that infra services + ephemeral ports are in the same set.
        treadmill.iptables.add_ip_set.assert_has_calls([
            mock.call(treadmill.iptables.SET_INFRA_SVC,
                      '192.168.0.2,tcp:54321'),
            mock.call(treadmill.iptables.SET_INFRA_SVC,
                      '192.168.0.2,tcp:10000'),
            mock.call(treadmill.iptables.SET_INFRA_SVC,
                      '192.168.0.2,tcp:10001'),
            mock.call(treadmill.iptables.SET_INFRA_SVC,
                      '192.168.0.2,tcp:10002'),
        ])

        treadmill.newnet.create_newnet.assert_called_with(
            'id1234.0',
            '192.168.0.2',
            '192.168.254.254',
            None,
        )
Пример #4
0
def configure(tm_env, event):
    """Creates directory necessary for starting the application.

    This operation is idem-potent (it can be repeated).

    The directory layout is::

        - (treadmill root)
          - apps
            - (app unique name)
              - app.yml
                run
                finish

    The 'run' script is responsible for creating container environment
    and starting svscan inside the container.

    The 'finish' script is invoked when container terminates and will
    deallocate any resources (NAT rules, etc) that were allocated for the
    container.
    """
    # R0915: Need to refactor long function into smaller pieces.
    #
    # pylint: disable=R0915

    # Load the app from the event
    try:
        manifest_data = app_manifest.load(tm_env, event)
    except IOError:
        # File is gone. Nothing to do.
        _LOGGER.exception("No event to load: %r", event)
        return

    # Freeze the app data into a namedtuple object
    app = utils.to_obj(manifest_data)

    # Check the identity we are going to run as. It needs to exists on the host
    # or we will fail later on as we try to seteuid.
    try:
        pwd.getpwnam(app.proid)

    except KeyError:
        _LOGGER.exception('Unable to find proid %r in passwd database.',
                          app.proid)
        raise

    # Generate a unique name for the app
    uniq_name = appmgr.app_unique_name(app)

    # Create the app's running directory
    container_dir = os.path.join(tm_env.apps_dir, uniq_name)

    # We assume it is a 'resume' if the container directory already exists.
    is_resume = False
    try:
        os.makedirs(container_dir)
    except OSError as err:
        if err.errno == errno.EEXIST:
            _LOGGER.info('Resuming container %r', uniq_name)
            is_resume = True
        else:
            raise

    # Copy the event as 'manifest.yml' in the container dir
    shutil.copyfile(
        event,
        os.path.join(container_dir, 'manifest.yml')
    )

    # Setup the service clients
    cgroup_client = tm_env.svc_cgroup.make_client(
        os.path.join(container_dir, 'cgroups')
    )
    localdisk_client = tm_env.svc_localdisk.make_client(
        os.path.join(container_dir, 'localdisk')
    )
    network_client = tm_env.svc_network.make_client(
        os.path.join(container_dir, 'network')
    )

    # Store the app int the container_dir
    app_yml = os.path.join(container_dir, _APP_YML)
    with open(app_yml, 'w') as f:
        yaml.dump(manifest_data, stream=f)

    # Generate resources requests

    # Cgroup
    cgroup_req = {
        'memory': app.memory,
        'cpu': app.cpu,
    }
    # Local Disk
    localdisk_req = {
        'size': app.disk,
    }
    # Network
    network_req = {
        'environment': app.environment,
    }

    if not is_resume:
        cgroup_client.create(uniq_name, cgroup_req)
        localdisk_client.create(uniq_name, localdisk_req)

    else:
        cgroup_client.update(uniq_name, cgroup_req)
        localdisk_client.update(uniq_name, localdisk_req)

    if not app.shared_network:
        if not is_resume:
            network_client.create(uniq_name, network_req)
        else:
            network_client.update(uniq_name, network_req)

    # Mark the container as defaulting to down state
    utils.touch(os.path.join(container_dir, 'down'))

    # Generate the supervisor's run script
    app_run_cmd = ' '.join([
        os.path.join(treadmill.TREADMILL, 'bin', 'treadmill'),
        'sproc', 'run', container_dir
    ])

    run_out_file = os.path.join(container_dir, 'run.out')

    utils.create_script(os.path.join(container_dir, 'run'),
                        'supervisor.run_no_log',
                        log_out=run_out_file,
                        cmd=app_run_cmd)

    _init_log_file(run_out_file,
                   os.path.join(tm_env.apps_dir, "%s.run.out" % uniq_name))

    # Unique name for the link, based on creation time.
    cleanup_link = os.path.join(tm_env.cleanup_dir, uniq_name)
    finish_cmd = '/bin/ln -snvf %s %s' % (container_dir, cleanup_link)

    utils.create_script(os.path.join(container_dir, 'finish'),
                        'supervisor.finish',
                        service=app.name, proid=None,
                        cmds=[finish_cmd])

    appevents.post(
        tm_env.app_events_dir,
        events.ConfiguredTraceEvent(
            instanceid=app.name,
            uniqueid=app.uniqueid
        )
    )
    return container_dir
Пример #5
0
def _cleanup_network(tm_env, app, network_client):
    """Cleanup the network part of a container.
    """
    # Generate a unique name for the app
    unique_name = appmgr.app_unique_name(app)

    try:
        app_network = network_client.get(unique_name)

    except services.ResourceServiceError:
        _LOGGER.warning('network never allocated')
        return

    if app_network is None:
        _LOGGER.info('Network resource already freed')
        return

    # Unconfigure passthrough
    if hasattr(app, 'passthrough'):
        _LOGGER.info('Deleting passthrough for: %r', app.passthrough)
        # Resolve all the hosts
        # FIXME: There is no guarantie the hosts will resolve to
        #        the same IPs as they did during creation.
        ips = set([socket.gethostbyname(host) for host in app.passthrough])
        for ip in ips:
            tm_env.rules.unlink_rule(
                rule=firewall.PassThroughRule(src_ip=ip,
                                              dst_ip=app_network['vip']),
                owner=unique_name,
            )

    for endpoint in app.endpoints:
        tm_env.rules.unlink_rule(
            rule=firewall.DNATRule(proto=endpoint.proto,
                                   orig_ip=app.host_ip,
                                   orig_port=endpoint.real_port,
                                   new_ip=app_network['vip'],
                                   new_port=endpoint.port),
            owner=unique_name,
        )
        # See if this was an "infra" endpoint and if so remove it
        # from the whitelist set.
        if getattr(endpoint, 'type', None) == 'infra':
            _LOGGER.debug('removing %s:%s from infra services set',
                          app_network['vip'], endpoint.port)
            iptables.rm_ip_set(
                iptables.SET_INFRA_SVC, '{ip},{proto}:{port}'.format(
                    ip=app_network['vip'],
                    proto=endpoint.proto,
                    port=endpoint.port,
                ))

    _cleanup_ports(tm_env, unique_name, app_network['vip'],
                   app.ephemeral_ports.tcp, 'tcp')
    _cleanup_ports(tm_env, unique_name, app_network['vip'],
                   app.ephemeral_ports.udp, 'udp')

    # Terminate any entries in the conntrack table
    iptables.flush_conntrack_table(app_network['vip'])
    # Cleanup network resources
    network_client.delete(unique_name)
Пример #6
0
def _cleanup(tm_env, zkclient, container_dir, app):
    """Cleanup a container that actually ran.
    """
    # Too many branches.
    #
    # pylint: disable=R0912

    rootdir = os.path.join(container_dir, 'root')
    # Generate a unique name for the app
    unique_name = appmgr.app_unique_name(app)
    # Create service clients
    cgroup_client = tm_env.svc_cgroup.make_client(
        os.path.join(container_dir, 'cgroups'))
    localdisk_client = tm_env.svc_localdisk.make_client(
        os.path.join(container_dir, 'localdisk'))
    network_client = tm_env.svc_network.make_client(
        os.path.join(container_dir, 'network'))

    # Make sure all processes are killed
    # FIXME(boysson): Should we use `kill_apps_in_cgroup` instead?
    _kill_apps_by_root(rootdir)

    # Setup the archive filename that will hold this container's data
    filetime = utils.datetime_utcnow().strftime('%Y%m%d_%H%M%S%f')
    archive_filename = os.path.join(
        container_dir, '{instance_name}_{hostname}_{timestamp}.tar'.format(
            instance_name=appmgr.appname_task_id(app.name),
            hostname=sysinfo.hostname(),
            timestamp=filetime))

    # Tar up container root filesystem if archive list is in manifest
    try:
        localdisk = localdisk_client.get(unique_name)
        fs.archive_filesystem(localdisk['block_dev'], rootdir,
                              archive_filename, app.archive)
    except services.ResourceServiceError:
        _LOGGER.warning('localdisk never allocated')
    except subprocess.CalledProcessError:
        _LOGGER.exception('Unable to archive root device of %r', unique_name)
    except:  # pylint: disable=W0702
        _LOGGER.exception('Unknown exception while archiving %r', unique_name)

    # Destroy the volume
    try:
        localdisk = localdisk_client.delete(unique_name)
    except (IOError, OSError) as err:
        if err.errno == errno.ENOENT:
            pass
        else:
            raise

    if not app.shared_network:
        _cleanup_network(tm_env, app, network_client)

    # Add metrics to archive
    rrd_file = os.path.join(
        tm_env.metrics_dir, 'apps',
        '{name}-{instanceid}-{uniqueid}.rrd'.format(
            name=app.app,
            instanceid=app.task,
            uniqueid=app.uniqueid,
        ))
    rrdutils.flush_noexc(rrd_file)
    _copy_metrics(rrd_file, container_dir)

    # Cleanup our cgroup resources
    try:
        cgroup_client.delete(unique_name)
    except (IOError, OSError) as err:
        if err.errno == errno.ENOENT:
            pass
        else:
            raise

    try:
        _archive_logs(tm_env, container_dir)
    except Exception:  # pylint: disable=W0703
        _LOGGER.exception('Unexpected exception storing local logs.')

    # Append or create the tarball with folders outside of container
    # Compress and send the tarball to HCP
    try:
        archive_filename = fs.tar(sources=container_dir,
                                  target=archive_filename,
                                  compression='gzip').name
        _send_container_archive(zkclient, app, archive_filename)
    except:  # pylint: disable=W0702
        _LOGGER.exception("Failed to update archive")