示例#1
0
    def test_server_to_entry(self):
        """Tests convertion of app dictionary to ldap entry."""
        srv = {
            '_id': 'xxx',
            'cell': 'yyy',
            'partition': 'p',
            'traits': ['a', 'b', 'c'],
            'data': {'a': '1', 'b': '2'},
        }

        ldap_entry = {
            'server': ['xxx'],
            'cell': ['yyy'],
            'partition': ['p'],
            'trait': ['a', 'b', 'c'],
            'data': ['{"a": "1", "b": "2"}'],
        }

        self.assertEqual(
            admin.Server(None).to_entry(srv),
            ldap_entry
        )
        self.assertEqual(
            admin.Server(None).from_entry(ldap_entry),
            srv
        )
示例#2
0
 def _list(cell, traits, partition):
     """List servers"""
     admin_srv = admin.Server(context.GLOBAL.ldap.conn)
     servers = admin_srv.list({'cell': cell,
                               'traits': cli.combine(traits),
                               'partition': partition})
     cli.out(formatter(servers))
示例#3
0
    def test_server_to_entry(self):
        """Tests convertion of app dictionary to ldap entry."""
        srv = {
            '_id': 'xxx',
            'cell': 'yyy',
            'traits': ['a', 'b', 'c'],
        }

        ldap_entry = {
            'server': ['xxx'],
            'cell': ['yyy'],
            'trait': ['a', 'b', 'c'],
        }

        self.assertEquals(ldap_entry, admin.Server(None).to_entry(srv))
        self.assertEquals(srv, admin.Server(None).from_entry(ldap_entry))
示例#4
0
    def configure(cell, traits, server, partition, data):
        """Create, get or modify server configuration"""
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)

        attrs = {}
        if cell:
            attrs['cell'] = cell
        if traits:
            attrs['traits'] = cli.combine(traits)
        if partition:
            if partition == '-':
                partition = None
            attrs['partition'] = partition
        if data:
            if data == ['-']:
                data = None
            attrs['data'] = data

        if attrs:
            try:
                admin_srv.create(server, attrs)
            except ldap3.LDAPEntryAlreadyExistsResult:
                admin_srv.update(server, attrs)

        try:
            cli.out(formatter(admin_srv.get(server)))
        except ldap3.LDAPNoSuchObjectResult:
            click.echo('Server does not exist: %s' % server, err=True)
示例#5
0
    def scale_cmd(count, partition):
        """Scale nodes to specified count."""
        if partition in ('-', '_default'):
            partition = None

        cell = context.GLOBAL.cell
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)
        all_servers = sorted(admin_srv.list({'cell': cell}),
                             key=lambda x: x.get('partition'))

        by_partition = {}
        for part, srvs in itertools.groupby(all_servers,
                                            lambda x: x.get('partition')):
            by_partition[part] = list(srvs)

        current_count = len(
            by_partition.get(partition if partition else '_default', []))

        count_by_partition = collections.Counter(
            {p: len(s)
             for p, s in by_partition.items()})

        if count not in {None, current_count}:
            if count > current_count:
                autoscale.create_n_servers(count - current_count, partition)
            else:
                autoscale.delete_n_servers(current_count - count, partition)

            count_by_partition[partition if partition else '_default'] = count

        for part in sorted(count_by_partition):
            print('{: <32}: {}'.format(part, count_by_partition[part]))
示例#6
0
    def configure(cell, traits, server, partition, data):
        """Create, get or modify server configuration"""
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)

        attrs = {}
        if cell:
            attrs['cell'] = cell
        if traits:
            attrs['traits'] = cli.combine(traits)
        if partition:
            if partition == '-':
                partition = None
            attrs['partition'] = partition
        if data:
            with io.open(data, 'rb') as fd:
                attrs['data'] = json.loads(fd.read())

        if attrs:
            try:
                admin_srv.create(server, attrs)
            except ldap_exceptions.LDAPEntryAlreadyExistsResult:
                admin_srv.update(server, attrs)

        try:
            cli.out(formatter(admin_srv.get(server)))
        except ldap_exceptions.LDAPNoSuchObjectResult:
            cli.bad_exit('Server does not exist: %s', server)
示例#7
0
def sync_servers():
    """Sync global servers list."""
    _LOGGER.info('Sync servers.')
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    global_servers = admin_srv.list({})
    zkutils.ensure_exists(context.GLOBAL.zk.conn,
                          z.path.globals('servers'),
                          data=[server['_id'] for server in global_servers])
示例#8
0
 def _list(cell, features):
     """Displays servers list."""
     admin_srv = admin.Server(context.GLOBAL.ldap.conn)
     servers = admin_srv.list({
         'cell': cell,
         'features': cli.combine(features)
     })
     cli.out(formatter(servers))
示例#9
0
 def test_create(self):
     """Dummy test for treadmill.api.server.create()"""
     svr_admin = admin.Server(None)
     self.svr.create('foo.somewhere.in.xx.com', {
         'cell': 'ny-999-cell',
         'partition': 'xxx'
     })
     svr_admin.get.assert_called_with('foo.somewhere.in.xx.com', dirty=True)
示例#10
0
 def _list(cell, traits, label):
     """List servers"""
     admin_srv = admin.Server(context.GLOBAL.ldap.conn)
     servers = admin_srv.list({
         'cell': cell,
         'traits': cli.combine(traits),
         'label': label
     })
     cli.out(formatter(servers))
示例#11
0
        def _check(conn, **_kwargs):
            """Server state: """
            admin_srv = admin.Server(context.GLOBAL.ldap.conn)
            servers_in_ldap = {
                server['_id']: server['partition']
                for server in admin_srv.list({'cell': context.GLOBAL.cell})
            }

            zkclient = context.GLOBAL.zk.conn
            presence = set(zkclient.get_children(z.SERVER_PRESENCE))
            in_zk = set(zkclient.get_children(z.SERVERS))
            blacked_out = set(zkclient.get_children(z.BLACKEDOUT_SERVERS))

            conn.execute("""
                CREATE TABLE servers (
                    name text,
                    partition text,
                    in_ldap,
                    in_zk,
                    up integer,
                    blackout integer,
                    presence integer
                )
                """)

            all_servers = set(servers_in_ldap.keys()) | in_zk
            up = {server: checkout.telnet(server) for server in all_servers}

            rows = []
            for name in set(servers_in_ldap.keys()) | in_zk:
                rows.append((
                    name,
                    servers_in_ldap.get(name),
                    name in servers_in_ldap,
                    name in in_zk,
                    name in up,
                    name in blacked_out,
                    name in presence,
                ))

            conn.executemany(
                """
                INSERT INTO servers(
                    name,
                    partition,
                    in_ldap,
                    in_zk,
                    up,
                    blackout,
                    presence
                ) values(?, ?, ?, ?, ?, ?, ?)
                """, rows)

            return _metadata()
示例#12
0
def sync_server_topology():
    """Sync servers into buckets in the masterapi.
    """
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    servers = admin_srv.list({'cell': context.GLOBAL.cell})
    zkclient = context.GLOBAL.zk.conn

    # Cells are composed of buckets. The topology is ~1000 servers per pod
    # with each pod composed of racks, each ~40 servers.
    def _server_pod_rack(servername):
        # In the absence of any information about the server and topology, we
        # simply hash the servername and use the value to place the server in
        # a fictive topology of at most 4 pods, each with 16 racks.
        svr_hash = hashlib.md5(servername.encode()).hexdigest()
        svr_id = int(svr_hash, 16)  # That is a 128 bit number
        pod = (svr_id >> (128 - 2))  # First 2 bits -> pod
        # below the first 2 bits, we take the rest, modulo 16
        rack = (svr_id % (1 << (128 - 2))) % 16
        return (pod, rack)

    for server in servers:
        servername = server['_id']
        partition = server.get('partition')

        (pod, rack) = _server_pod_rack(servername)
        pod_bucket = 'pod:{:04X}'.format(pod)
        rack_bucket = 'rack:{:04X}'.format(rack)

        _LOGGER.info('Update: %r(partition:%r) -> %r, %r',
                     servername, partition, pod_bucket, rack_bucket)

        masterapi.create_bucket(zkclient, pod_bucket, parent_id=None)
        masterapi.cell_insert_bucket(zkclient, pod_bucket)
        masterapi.create_bucket(zkclient, rack_bucket, parent_id=pod_bucket)
        masterapi.create_server(
            zkclient,
            servername,
            rack_bucket,
            partition=partition
        )

    ldap_servers = set(server['_id'] for server in servers)
    zk_servers = set(masterapi.list_servers(zkclient))
    zk_server_presence = set(zkclient.get_children(z.SERVER_PRESENCE))
    for servername in zk_servers - ldap_servers:
        if servername in zk_server_presence:
            _LOGGER.warning('%s not in LDAP but node still present, skipping.',
                            servername)
        else:
            _LOGGER.info('Delete: %s', servername)
            masterapi.delete_server(zkclient, servername)
示例#13
0
def delete_servers_by_name(servers):
    """Delete servers by name."""
    ipa_client = awscontext.GLOBAL.ipaclient
    ec2_conn = awscontext.GLOBAL.ec2

    _LOGGER.info('Deleting servers: %r', servers)

    hostmanager.delete_hosts(ipa_client=ipa_client,
                             ec2_conn=ec2_conn,
                             hostnames=servers)

    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    for server in servers:
        admin_srv.delete(server)
示例#14
0
    def test_list(self):
        """Dummy test for treadmill.api.server._list()"""
        self.svr.list(None, None)
        svr_admin = admin.Server(None)
        self.assertTrue(svr_admin.list.called)

        self.svr.list('some-cell', None)
        svr_admin.list.assert_called_with({'cell': 'some-cell'})

        self.svr.list(partition='xxx')
        svr_admin.list.assert_called_with({})

        self.svr.list('some-cell', 'xxx')
        svr_admin.list.assert_called_with({'cell': 'some-cell'})
示例#15
0
def _resolve_partition_threshold(cell, partition, value):
    """Resolve threshold % to an integer."""
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    servers = admin_srv.list({'cell': cell})

    total = 0
    for srv in servers:
        if srv['partition'] == partition:
            total = total + 1

    limit = int((value / 100.0) * total)

    _LOGGER.debug('Total/limit: %s/%s', total, limit)
    return max(limit, _MINIMUM_THRESHOLD)
示例#16
0
def test():
    """Create sysapps test class."""
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    cell = context.GLOBAL.cell

    ldap_servers = [item['_id'] for item in admin_srv.list({'cell': cell})]

    zkclient = context.GLOBAL.zk.conn

    configured_servers = zkclient.get_children(z.SERVERS)
    up_servers = [presence.server_hostname(node)
                  for node in zkclient.get_children(z.SERVER_PRESENCE)]
    blackedout_servers = zkclient.get_children(z.BLACKEDOUT_SERVERS)
    rebooted_servers = zkclient.get_children(z.REBOOTS)

    class LdapSyncTest(unittest.TestCase):
        """Checks LDAP to Zookeeper server sync."""

    for server in ldap_servers:
        @chk.T(LdapSyncTest,
               server=server, configured_servers=configured_servers)
        def _test_server_configured(self, server, configured_servers):
            """Check if server is synced between LDAP and Zk: {server}."""
            self.assertIn(server, configured_servers)

    class ServerTest(unittest.TestCase):
        """Checks server(s) are up and alive."""

    expected_up = (
        set(configured_servers) -
        set(blackedout_servers) -
        set(rebooted_servers)
    )

    for server in expected_up:
        @chk.T(ServerTest, server=server, up_servers=up_servers)
        def _test_server_up(self, server, up_servers):
            """Check if server is up: {server}."""
            self.assertIn(server, up_servers)

        @chk.T(ServerTest, server=server)
        def _test_server_ssh(self, server):
            """Check if SSH port is open: {server}."""
            self.assertTrue(chk.telnet(server, 22))

    # TODO: implement test that for each partition sum of available capacity
    #       is not below partition threshold.

    return [LdapSyncTest, ServerTest]
示例#17
0
def delete_n_servers(count, partition=None):
    """Delete old servers."""
    ipa_client = awscontext.GLOBAL.ipaclient
    ec2_conn = awscontext.GLOBAL.ec2

    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    servers = admin_srv.list({
        'cell': context.GLOBAL.cell,
        'partition': partition
    })

    hostnames = sorted([s['_id'] for s in servers])
    extra = hostnames[0:count]
    hostmanager.delete_hosts(ipa_client=ipa_client,
                             ec2_conn=ec2_conn,
                             hostnames=extra)

    for hostname in extra:
        admin_srv.delete(hostname)
示例#18
0
def _state():
    """Return tuple that represents current state:

    (running_apps_count,
     pending_apps_count,
     busy_server_count,
     idle_servers)
    """
    cellapis = context.GLOBAL.state_api()
    response = restclient.get(cellapis, _STATE_URL)

    apps = response.json()

    admin_srv = admin.Server(context.GLOBAL.ldap.conn)

    running = 0
    pending = 0
    busy_servers = set()
    for app in apps:
        if app['host']:
            running += 1
            busy_servers.add(app['host'])
        else:
            pending += 1

    _LOGGER.info('Apps: running: %s, pending: %s', running, pending)

    servers = admin_srv.list({'cell': context.GLOBAL.cell})
    all_servers = {s['_id'] for s in servers}

    idle_servers = all_servers - busy_servers
    _LOGGER.info(
        'Servers: busy: %s, idle: %s',
        len(busy_servers),
        len(idle_servers)
    )

    return _STATE(
        running=running,
        pending=pending,
        busy_srv_cnt=len(busy_servers),
        idle_servers=list(idle_servers)
    )
示例#19
0
def sync_servers():
    """Sync servers and buckets."""
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    servers = admin_srv.list({'cell': context.GLOBAL.cell})

    for server in servers:
        servername = server['_id']

        rack = 'rack:unknown'
        building = 'building:unknown'

        traits = []
        partition = None

        master.create_bucket(context.GLOBAL.zk.conn, building, None)
        master.cell_insert_bucket(context.GLOBAL.zk.conn, building)
        master.create_bucket(context.GLOBAL.zk.conn, rack, building)
        master.create_server(context.GLOBAL.zk.conn, servername, rack)
        master.update_server_attrs(context.GLOBAL.zk.conn, servername,
                                   traits=traits, partition=partition)
示例#20
0
    def configure(cell, features, server):
        """Get or modify server configuration."""
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)

        attrs = {}
        if cell:
            attrs['cell'] = cell
        if features:
            attrs['features'] = cli.combine(features)

        if attrs:
            try:
                admin_srv.create(server, attrs)
            except ldap3.LDAPEntryAlreadyExistsResult:
                admin_srv.update(server, attrs)

        try:
            cli.out(formatter(admin_srv.get(server)))
        except ldap3.LDAPNoSuchObjectResult:
            click.echo('Server does not exist: %s' % server, err=True)
示例#21
0
def _run_sync():
    """Sync Zookeeper with LDAP, runs with lock held.
    """
    while True:
        # Sync app groups
        admin_app_group = admin.AppGroup(context.GLOBAL.ldap.conn)
        app_groups = admin_app_group.list({})
        _sync_collection(context.GLOBAL.zk.conn, app_groups, z.path.appgroup(),
                         _match_appgroup)

        # Sync partitions
        admin_cell = admin.Cell(context.GLOBAL.ldap.conn)
        partitions = admin_cell.partitions(context.GLOBAL.cell)
        _sync_partitions(context.GLOBAL.zk.conn, partitions)

        # Sync allocations.
        admin_alloc = admin.CellAllocation(context.GLOBAL.ldap.conn)

        allocations = admin_alloc.list({'cell': context.GLOBAL.cell})
        _sync_allocations(context.GLOBAL.zk.conn, allocations)

        # Global servers
        admin_srv = admin.Server(context.GLOBAL.ldap.conn)
        global_servers = admin_srv.list({})
        zkutils.ensure_exists(
            context.GLOBAL.zk.conn,
            z.path.globals('servers'),
            data=[server['_id'] for server in global_servers])

        # Servers - because they can have custom topology - are loaded
        # from the plugin.
        try:
            servers_plugin = importlib.import_module(
                'treadmill.plugins.sproc.servers')
            servers_plugin.init()
        except ImportError as err:
            _LOGGER.warning(
                'Unable to load treadmill.plugins.sproc.servers: %s', err)

        time.sleep(60)
示例#22
0
 def _get_server_info():
     """Get server information"""
     return admin.Server(context.GLOBAL.ldap.conn).list(
         {'cell': context.GLOBAL.cell})
示例#23
0
 def test_get(self):
     """Dummy test for treadmill.api.server.get()"""
     svr_admin = admin.Server(None)
     self.svr.get('foo.somewhere.in.xx.com')
     svr_admin.get.assert_called_with('foo.somewhere.in.xx.com')
示例#24
0
 def delete(servers):
     """Delete server(s)."""
     admin_srv = admin.Server(context.GLOBAL.ldap.conn)
     for server in servers:
         admin_srv.delete(server)
示例#25
0
def create_n_servers(count, partition=None):
    """Create new servers in the cell."""

    ipa_client = awscontext.GLOBAL.ipaclient
    ec2_conn = awscontext.GLOBAL.ec2
    sts_conn = awscontext.GLOBAL.sts
    ipa_domain = awscontext.GLOBAL.ipa_domain
    admin_srv = admin.Server(context.GLOBAL.ldap.conn)
    admin_cell = admin.Cell(context.GLOBAL.ldap.conn)
    cell = admin_cell.get(context.GLOBAL.cell)

    data = cell['data']

    image_id = data['image']
    if not image_id.startswith('ami-'):
        account = sts_conn.get_caller_identity().get('Account')
        image_id = ec2client.get_image(ec2_conn,
                                       owners=[account],
                                       name=image_id)['ImageId']

    instance_type = data['size']
    subnets = data['subnets']
    secgroup_id = data['secgroup']
    hostgroups = data['hostgroups']
    instance_profile = data['instance_profile']
    disk_size = int(data['disk_size'])
    hostname_template = '{}-{}-{}'.format(context.GLOBAL.cell,
                                          partition if partition else 'node',
                                          '{time}')

    instance_vars = {
        'treadmill_cell': context.GLOBAL.cell,
        'treadmill_ldap': ','.join(context.GLOBAL.ldap.url),
        'treadmill_ldap_suffix': context.GLOBAL.ldap_suffix,
        'treadmill_dns_domain': context.GLOBAL.dns_domain,
        'treadmill_isa': 'node',
        'treadmill_profile': 'aws',
        'treadmill_krb_realm': krb5.get_host_realm(sysinfo.hostname())[0],
    }

    # FIXME: Add Partition: $partition to tags when Autoscaler is cell aware
    tags = [{'Key': 'Cell', 'Value': context.GLOBAL.cell}]

    key = None

    for idx in range(0, count):
        hostnames = hostmanager.create_host(ipa_client=ipa_client,
                                            ec2_conn=ec2_conn,
                                            image_id=image_id,
                                            count=1,
                                            disk=disk_size,
                                            domain=ipa_domain,
                                            key=key,
                                            secgroup_ids=secgroup_id,
                                            instance_type=instance_type,
                                            subnets=subnets,
                                            role='node',
                                            instance_vars=instance_vars,
                                            instance_profile=instance_profile,
                                            hostgroups=hostgroups,
                                            hostname=hostname_template,
                                            ip_address=None,
                                            eni=None,
                                            tags=tags)

        # Count is one, but it is more robust to treat it as list.
        for hostname in hostnames:
            print(hostname)
            attrs = {'cell': context.GLOBAL.cell, 'partition': partition}
            admin_srv.create(hostname, attrs)
示例#26
0
 def delete(hostname):
     """Delete a LDAP server record"""
     _LOGGER.info('removing %s from LDAP', hostname)
     admin.Server(context.GLOBAL.ldap.conn).delete(hostname)
示例#27
0
 def list():
     """List LDAP server records that are not valid ec2 instances"""
     _LOGGER.info('fetched server list from LDAP')
     client = admin.Server(context.GLOBAL.ldap.conn)
     return {host.get("_id") for host in client.list({})}
示例#28
0
 def _admin_svr():
     """Lazily return admin object."""
     return admin.Server(context.GLOBAL.ldap.conn)