Exemplo n.º 1
0
def get_server(zkclient, server_id, placement=False):
    """Return server object."""
    data = zkutils.get_default(zkclient, z.path.server(server_id), {})
    if placement:
        placement_data = zkutils.get_default(zkclient,
                                             z.path.placement(server_id))
        if placement_data:
            data.update(placement_data)

    return data
Exemplo n.º 2
0
    def _watch_task_instances(instance_ids):

        instance = None
        for instance_id in instance_ids:
            instance = '#'.join([task, instance_id])

            # Either watch is established or data is acquired.
            if instance in cell_state.tasks:
                continue

            # On first load, optimize lookup by preloading state
            # of all scheduled instances.
            #
            # Once initial load is done, scheduled will be cleared.
            if scheduled:
                need_watch = instance in scheduled
            else:
                need_watch = zkclient.exists(z.path.scheduled(instance))

            if need_watch:
                watch_task_instance(zkclient, cell_state, instance)
            else:
                data = zkutils.get_default(zkclient, z.path.task(instance))
                cell_state.tasks[instance] = data

        return True
Exemplo n.º 3
0
    def load_allocations(self):
        """Load allocations and assignments map."""
        data = zkutils.get_default(self.zkclient, z.ALLOCATIONS, default={})
        if not data:
            return

        for obj in data:
            label = obj.get('partition')
            name = obj['name']

            _LOGGER.info('Loading allocation: %s, label: %s', name, label)

            alloc = self.cell.partitions[label].allocation
            alloc.label = label

            for part in re.split('[/:]', name):
                alloc = alloc.get_sub_alloc(part)
                alloc.label = label

            capacity = resources(obj)
            alloc.update(capacity, obj['rank'], obj.get('max-utilization'))

            for assignment in obj.get('assignments', []):
                pattern = assignment['pattern'] + '[#]' + ('[0-9]' * 10)
                priority = assignment['priority']
                _LOGGER.info('Assignment: %s - %s', pattern, priority)
                self.assignments[pattern] = (priority, alloc)
Exemplo n.º 4
0
    def process_events(self, events):
        """Callback invoked on state change/admin event."""
        # Events are sequential nodes in the form <prio>-<event>-<seq #>
        #
        # They are processed in order of (prio, seq_num, event)
        ordered = sorted([
            tuple([event.split('-')[i] for i in [0, 2, 1]]) for event in events
            if re.match(r'\d+\-\w+\-\d+$', event)
        ])

        for prio, seq, resource in ordered:
            _LOGGER.info('event: %s %s %s', prio, seq, resource)
            node_name = '-'.join([prio, resource, seq])
            if resource == 'allocations':
                # TODO: changing allocations has potential of complete
                #                reshuffle, so while ineffecient, reload
                #                all apps as well.
                self.load_allocations()
                self.load_apps()
            elif resource == 'apps':
                # The event node contains list of apps to be re-evaluated.
                apps = zkutils.get_default(self.zkclient,
                                           z.path.event(node_name),
                                           default=[])
                for app in apps:
                    self.load_app(app)
            elif resource == 'cell':
                self.load_cell()
            elif resource == 'servers':
                servers = zkutils.get_default(self.zkclient,
                                              z.path.event(node_name),
                                              default=[])
                if not servers:
                    # If not specified, reload all. Use union of servers in
                    # the model and in zookeeper.
                    servers = (set(self.servers.keys())
                               ^ set(self.zkclient.get_children(z.SERVERS)))
                self.reload_servers(servers)
            elif resource == 'identity_groups':
                self.load_identity_groups()
            else:
                _LOGGER.warn('Unsupported event resource: %s', resource)

        for node in events:
            _LOGGER.info('Deleting event: %s', z.path.event(node))
            zkutils.ensure_deleted(self.zkclient, z.path.event(node))
Exemplo n.º 5
0
def _save_version(zkclient, hostname, version):
    """Save server version data to ZK.
    """
    node_path = z.path.version_history(hostname)
    versions = zkutils.get_default(zkclient, node_path)
    if not versions:
        versions = []
    versions.insert(0, version)
    zkutils.put(zkclient, node_path, versions[0:_MAX_VERSIONS])
Exemplo n.º 6
0
    def load_placement_data(self):
        """Restore app identities."""
        for appname, app in self.cell.apps.items():
            if app.server:
                placement_data = zkutils.get_default(
                    self.zkclient, z.path.placement(app.server, appname))

                if placement_data is not None:
                    app.force_set_identity(placement_data.get('identity'))
                    app.placement_expiry = placement_data.get('expires', 0)
Exemplo n.º 7
0
 def _watch_finished(finished):
     """Watch /finished nodes."""
     for instance in finished:
         if instance in cell_state.finished:
             continue
         finished_data = zkutils.get_default(
             zkclient,
             z.path.finished(instance),
             {}
         )
         cell_state.finished[instance] = finished_data
Exemplo n.º 8
0
    def _watch_finished(finished):
        """Watch /finished nodes."""
        current = set(cell_state.finished)
        target = set(finished)

        for instance in target - current:
            finished_data = zkutils.get_default(zkclient,
                                                z.path.finished(instance), {})
            cell_state.finished[instance] = finished_data

        for instance in current - target:
            del cell_state.finished[instance]
Exemplo n.º 9
0
def _list_server_blackouts(zkclient, fmt):
    """List server blackouts."""

    with_partition = '%p' in fmt
    with_version = '%v' in fmt

    blackouts = []
    for node in zkclient.get_children(z.BLACKEDOUT_SERVERS):
        try:
            node_path = z.path.blackedout_server(node)
            data, metadata = zkutils.get_with_metadata(zkclient, node_path)
        except kazoo.client.NoNodeError:
            continue

        partition, version = None, None

        if with_partition:
            server_data = zkutils.get_default(zkclient, z.path.server(node))
            if server_data and server_data.get('partition'):
                partition = server_data['partition']

        if with_version:
            version_data = zkutils.get_default(zkclient, z.path.version(node))
            if version_data and version_data.get('codepath'):
                version = version_data['codepath']

        blackouts.append((metadata.created, node, partition, version, data))

    # [%t] %h %r will be printed as below
    # [Thu, 05 May 2016 02:59:58 +0000] <hostname> -
    fields = ('t', 'h', 'p', 'v', 'r')
    formatter = _gen_formatter(fields, fmt)

    for when, node, partition, version, reason in reversed(sorted(blackouts)):
        cli.out(
            formatter.format(utils.strftime_utc(when), node, partition or '-',
                             version or '-', reason or '-'))
Exemplo n.º 10
0
    def load_identity_groups(self):
        """Load identity groups."""
        names = set(self.zkclient.get_children(z.IDENTITY_GROUPS))
        extra = set(self.cell.identity_groups.keys()) - names
        _LOGGER.info('Removing identities: %r', extra)
        for name in extra:
            self.cell.remove_identity_group(name)

        for name in names:
            ident = zkutils.get_default(self.zkclient,
                                        z.path.identity_group(name))
            if ident:
                count = ident.get('count', 0)
                _LOGGER.info('Configuring identity: %s, %s', name, count)
                self.cell.configure_identity_group(name, count)
Exemplo n.º 11
0
def _create_ephemeral_with_retry(zkclient, path, data):
    """Create ephemeral node with retry."""
    prev_data = None
    for _ in range(0, _EPHEMERAL_RETRY_COUNT):
        try:
            return zkutils.create(zkclient,
                                  path,
                                  data,
                                  acl=[_SERVERS_ACL],
                                  ephemeral=True)
        except kazoo.client.NodeExistsError:
            prev_data = zkutils.get_default(zkclient, path)
            _LOGGER.warning('Node exists, will retry: %s, data: %r', path,
                            prev_data)
            time.sleep(_EPHEMERAL_RETRY_INTERVAL)

    raise exc.ContainerSetupError('%s:%s' % (path, prev_data),
                                  app_abort.AbortedReason.PRESENCE)
Exemplo n.º 12
0
def _create_ephemeral_with_retry(zkclient, path, data):
    """Create ephemeral node with retry."""
    prev_data = None
    for _ in range(0, 5):
        try:
            return zkutils.create(zkclient,
                                  path,
                                  data,
                                  acl=[_SERVERS_ACL],
                                  ephemeral=True)
        except kazoo.client.NodeExistsError:
            prev_data, metadata = zkutils.get_default(zkclient,
                                                      path,
                                                      need_metadata=True)
            _LOGGER.warn('Node exists, will retry: %s, data: %r, metadata: %r',
                         path, prev_data, metadata)
            time.sleep(_EPHEMERAL_RETRY_INTERVAL)

    raise exc.ContainerSetupError('presence.%s:%s' % (path, prev_data))
Exemplo n.º 13
0
    def load_bucket(self, bucketname):
        """Load bucket info, assume parent is already created."""
        # Do not load twice.
        if bucketname in self.buckets:
            return self.buckets[bucketname]

        _LOGGER.info('loading bucket: %s', bucketname)
        data = zkutils.get_default(self.zkclient,
                                   z.path.bucket(bucketname),
                                   default={})
        traits = data.get('traits', 0)

        level = data.get('level', bucketname.split(':')[0])
        bucket = scheduler.Bucket(bucketname, traits=traits, level=level)
        self.buckets[bucketname] = bucket

        parent_name = data.get('parent')
        if parent_name:
            parent = self.load_bucket(parent_name)
            parent.add_node(bucket)
        return bucket
Exemplo n.º 14
0
    def app_cmd(app, reason, clear):
        """Manage app blackouts."""
        zkclient = context.GLOBAL.zk.conn

        blacklist = zkutils.get_default(zkclient, z.BLACKEDOUT_APPS)
        if not blacklist:
            blacklist = {}

        if app:
            if clear:
                blacklist.pop(app, None)
            else:
                if not reason:
                    raise click.UsageError('--reason is required.')
                blacklist[app] = {'reason': reason, 'when': time.time()}
            zkutils.put(zkclient, z.BLACKEDOUT_APPS, data=blacklist)
            masterapi.create_event(zkclient, 0, 'apps_blacklist', None)

        for blacklisted, details in sorted(blacklist.items()):
            when = utils.strftime_utc(details['when'])
            cli.out('[%s] %s %s', when, blacklisted, details['reason'])
Exemplo n.º 15
0
    def load_app(self, appname):
        """Load single application data."""
        # TODO: need to check if app is blacklisted.
        manifest = zkutils.get_default(self.zkclient,
                                       z.path.scheduled(appname))
        if not manifest:
            self.cell.remove_app(appname)
            return

        priority, allocation = self.find_assignment(appname)
        if 'priority' in manifest and int(manifest['priority']) != -1:
            priority = int(manifest['priority'])

        # TODO: From scheduler perspective it is theoretically
        #                possible to update data retention timeout.
        data_retention = get_data_retention(manifest)
        lease = get_lease(manifest)

        app = self.cell.apps.get(appname, None)

        if app:
            app.priority = priority
            app.data_retention_timeout = data_retention
        else:
            demand = resources(manifest)
            affinity = manifest.get('affinity')
            affinity_limits = manifest.get('affinity_limits', None)
            identity_group = manifest.get('identity_group')
            schedule_once = manifest.get('schedule_once')
            app = scheduler.Application(appname,
                                        priority,
                                        demand,
                                        affinity=affinity,
                                        affinity_limits=affinity_limits,
                                        identity_group=identity_group,
                                        schedule_once=schedule_once,
                                        data_retention_timeout=data_retention,
                                        lease=lease)

        self.cell.add_app(allocation, app)
Exemplo n.º 16
0
    def adjust_server_state(self, servername, readonly=False):
        """Set server state."""
        server = self.servers.get(servername)
        if not server:
            return

        is_up = self.zkclient.exists(z.path.server_presence(servername))

        placement_node = z.path.placement(servername)

        # Restore state as it was stored in server placement node.
        #
        # zkutils.get_default return tuple if need_metadata is True, default it
        # is False, so it will return dict. pylint complains about it,
        # and it should be fixed in zkutils.
        #
        # pylint: disable=R0204
        state_since = zkutils.get_default(self.zkclient, placement_node)
        if not state_since:
            state_since = {'state': 'down', 'since': time.time()}

        state = scheduler.State(state_since['state'])
        since = state_since['since']
        server.set_state(state, since)

        # If presence does not exist - adjust state to down.
        if not is_up:
            server.state = scheduler.State.down
        else:
            if server.state is not scheduler.State.frozen:
                server.state = scheduler.State.up

        # Record server state:
        state, since = server.get_state()
        if not readonly:
            zkutils.put(self.zkclient, placement_node, {
                'state': state.value,
                'since': since
            })
Exemplo n.º 17
0
 def get_default(self, path, default=None):
     """Return stored object or default if not found."""
     return zkutils.get_default(self.zkclient, path, default=default)
Exemplo n.º 18
0
def get_scheduled_stats(zkclient):
    """Return count of scheduled apps by proid."""
    return zkutils.get_default(zkclient, z.SCHEDULED_STATS, {})
Exemplo n.º 19
0
def get_app(zkclient, app_id):
    """Return scheduled app details by app_id."""
    return zkutils.get_default(zkclient, _app_node(app_id))